diff options
| author | Jiri Kosina <jkosina@suse.cz> | 2011-11-13 14:55:35 -0500 |
|---|---|---|
| committer | Jiri Kosina <jkosina@suse.cz> | 2011-11-13 14:55:53 -0500 |
| commit | 2290c0d06d82faee87b1ab2d9d4f7bf81ef64379 (patch) | |
| tree | e075e4d5534193f28e6059904f61e5ca03958d3c /drivers/edac | |
| parent | 4da669a2e3e5bc70b30a0465f3641528681b5f77 (diff) | |
| parent | 52e4c2a05256cb83cda12f3c2137ab1533344edb (diff) | |
Merge branch 'master' into for-next
Sync with Linus tree to have 157550ff ("mtd: add GPMI-NAND driver
in the config and Makefile") as I have patch depending on that one.
Diffstat (limited to 'drivers/edac')
| -rw-r--r-- | drivers/edac/Kconfig | 16 | ||||
| -rw-r--r-- | drivers/edac/Makefile | 2 | ||||
| -rw-r--r-- | drivers/edac/amd64_edac.c | 37 | ||||
| -rw-r--r-- | drivers/edac/cpc925_edac.c | 67 | ||||
| -rw-r--r-- | drivers/edac/edac_core.h | 350 | ||||
| -rw-r--r-- | drivers/edac/edac_mce.c | 61 | ||||
| -rw-r--r-- | drivers/edac/i7300_edac.c | 51 | ||||
| -rw-r--r-- | drivers/edac/i7core_edac.c | 415 | ||||
| -rw-r--r-- | drivers/edac/mce_amd.c | 46 | ||||
| -rw-r--r-- | drivers/edac/mce_amd.h | 6 | ||||
| -rw-r--r-- | drivers/edac/mce_amd_inj.c | 1 | ||||
| -rw-r--r-- | drivers/edac/ppc4xx_edac.c | 2 | ||||
| -rw-r--r-- | drivers/edac/sb_edac.c | 1893 |
13 files changed, 2418 insertions, 529 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index af1a17d42bd7..5948a2194f50 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig | |||
| @@ -41,7 +41,7 @@ config EDAC_DEBUG | |||
| 41 | 41 | ||
| 42 | config EDAC_DECODE_MCE | 42 | config EDAC_DECODE_MCE |
| 43 | tristate "Decode MCEs in human-readable form (only on AMD for now)" | 43 | tristate "Decode MCEs in human-readable form (only on AMD for now)" |
| 44 | depends on CPU_SUP_AMD && X86_MCE | 44 | depends on CPU_SUP_AMD && X86_MCE_AMD |
| 45 | default y | 45 | default y |
| 46 | ---help--- | 46 | ---help--- |
| 47 | Enable this option if you want to decode Machine Check Exceptions | 47 | Enable this option if you want to decode Machine Check Exceptions |
| @@ -71,9 +71,6 @@ config EDAC_MM_EDAC | |||
| 71 | occurred so that a particular failing memory module can be | 71 | occurred so that a particular failing memory module can be |
| 72 | replaced. If unsure, select 'Y'. | 72 | replaced. If unsure, select 'Y'. |
| 73 | 73 | ||
| 74 | config EDAC_MCE | ||
| 75 | bool | ||
| 76 | |||
| 77 | config EDAC_AMD64 | 74 | config EDAC_AMD64 |
| 78 | tristate "AMD64 (Opteron, Athlon64) K8, F10h" | 75 | tristate "AMD64 (Opteron, Athlon64) K8, F10h" |
| 79 | depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE | 76 | depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE |
| @@ -173,8 +170,7 @@ config EDAC_I5400 | |||
| 173 | 170 | ||
| 174 | config EDAC_I7CORE | 171 | config EDAC_I7CORE |
| 175 | tristate "Intel i7 Core (Nehalem) processors" | 172 | tristate "Intel i7 Core (Nehalem) processors" |
| 176 | depends on EDAC_MM_EDAC && PCI && X86 | 173 | depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL |
| 177 | select EDAC_MCE | ||
| 178 | help | 174 | help |
| 179 | Support for error detection and correction the Intel | 175 | Support for error detection and correction the Intel |
| 180 | i7 Core (Nehalem) Integrated Memory Controller that exists on | 176 | i7 Core (Nehalem) Integrated Memory Controller that exists on |
| @@ -216,6 +212,14 @@ config EDAC_I7300 | |||
| 216 | Support for error detection and correction the Intel | 212 | Support for error detection and correction the Intel |
| 217 | Clarksboro MCH (Intel 7300 chipset). | 213 | Clarksboro MCH (Intel 7300 chipset). |
| 218 | 214 | ||
| 215 | config EDAC_SBRIDGE | ||
| 216 | tristate "Intel Sandy-Bridge Integrated MC" | ||
| 217 | depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL | ||
| 218 | depends on EXPERIMENTAL | ||
| 219 | help | ||
| 220 | Support for error detection and correction the Intel | ||
| 221 | Sandy Bridge Integrated Memory Controller. | ||
| 222 | |||
| 219 | config EDAC_MPC85XX | 223 | config EDAC_MPC85XX |
| 220 | tristate "Freescale MPC83xx / MPC85xx" | 224 | tristate "Freescale MPC83xx / MPC85xx" |
| 221 | depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx) | 225 | depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx) |
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 3e239133e29e..196a63dd37c5 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | 8 | ||
| 9 | obj-$(CONFIG_EDAC) := edac_stub.o | 9 | obj-$(CONFIG_EDAC) := edac_stub.o |
| 10 | obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o | 10 | obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o |
| 11 | obj-$(CONFIG_EDAC_MCE) += edac_mce.o | ||
| 12 | 11 | ||
| 13 | edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o | 12 | edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o |
| 14 | edac_core-y += edac_module.o edac_device_sysfs.o | 13 | edac_core-y += edac_module.o edac_device_sysfs.o |
| @@ -29,6 +28,7 @@ obj-$(CONFIG_EDAC_I5100) += i5100_edac.o | |||
| 29 | obj-$(CONFIG_EDAC_I5400) += i5400_edac.o | 28 | obj-$(CONFIG_EDAC_I5400) += i5400_edac.o |
| 30 | obj-$(CONFIG_EDAC_I7300) += i7300_edac.o | 29 | obj-$(CONFIG_EDAC_I7300) += i7300_edac.o |
| 31 | obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o | 30 | obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o |
| 31 | obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o | ||
| 32 | obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o | 32 | obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o |
| 33 | obj-$(CONFIG_EDAC_E752X) += e752x_edac.o | 33 | obj-$(CONFIG_EDAC_E752X) += e752x_edac.o |
| 34 | obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o | 34 | obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o |
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 9a8bebcf6b17..c9eee6d33e9a 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c | |||
| @@ -114,10 +114,22 @@ static int f10_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, | |||
| 114 | return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func); | 114 | return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func); |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | /* | ||
| 118 | * Select DCT to which PCI cfg accesses are routed | ||
| 119 | */ | ||
| 120 | static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct) | ||
| 121 | { | ||
| 122 | u32 reg = 0; | ||
| 123 | |||
| 124 | amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, ®); | ||
| 125 | reg &= 0xfffffffe; | ||
| 126 | reg |= dct; | ||
| 127 | amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg); | ||
| 128 | } | ||
| 129 | |||
| 117 | static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, | 130 | static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, |
| 118 | const char *func) | 131 | const char *func) |
| 119 | { | 132 | { |
| 120 | u32 reg = 0; | ||
| 121 | u8 dct = 0; | 133 | u8 dct = 0; |
| 122 | 134 | ||
| 123 | if (addr >= 0x140 && addr <= 0x1a0) { | 135 | if (addr >= 0x140 && addr <= 0x1a0) { |
| @@ -125,10 +137,7 @@ static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, | |||
| 125 | addr -= 0x100; | 137 | addr -= 0x100; |
| 126 | } | 138 | } |
| 127 | 139 | ||
| 128 | amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, ®); | 140 | f15h_select_dct(pvt, dct); |
| 129 | reg &= 0xfffffffe; | ||
| 130 | reg |= dct; | ||
| 131 | amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg); | ||
| 132 | 141 | ||
| 133 | return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func); | 142 | return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func); |
| 134 | } | 143 | } |
| @@ -198,6 +207,10 @@ static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bw) | |||
| 198 | if (boot_cpu_data.x86 == 0xf) | 207 | if (boot_cpu_data.x86 == 0xf) |
| 199 | min_scrubrate = 0x0; | 208 | min_scrubrate = 0x0; |
| 200 | 209 | ||
| 210 | /* F15h Erratum #505 */ | ||
| 211 | if (boot_cpu_data.x86 == 0x15) | ||
| 212 | f15h_select_dct(pvt, 0); | ||
| 213 | |||
| 201 | return __amd64_set_scrub_rate(pvt->F3, bw, min_scrubrate); | 214 | return __amd64_set_scrub_rate(pvt->F3, bw, min_scrubrate); |
| 202 | } | 215 | } |
| 203 | 216 | ||
| @@ -207,6 +220,10 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci) | |||
| 207 | u32 scrubval = 0; | 220 | u32 scrubval = 0; |
| 208 | int i, retval = -EINVAL; | 221 | int i, retval = -EINVAL; |
| 209 | 222 | ||
| 223 | /* F15h Erratum #505 */ | ||
| 224 | if (boot_cpu_data.x86 == 0x15) | ||
| 225 | f15h_select_dct(pvt, 0); | ||
| 226 | |||
| 210 | amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval); | 227 | amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval); |
| 211 | 228 | ||
| 212 | scrubval = scrubval & 0x001F; | 229 | scrubval = scrubval & 0x001F; |
| @@ -751,10 +768,10 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); | |||
| 751 | * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs | 768 | * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs |
| 752 | * are ECC capable. | 769 | * are ECC capable. |
| 753 | */ | 770 | */ |
| 754 | static enum edac_type amd64_determine_edac_cap(struct amd64_pvt *pvt) | 771 | static unsigned long amd64_determine_edac_cap(struct amd64_pvt *pvt) |
| 755 | { | 772 | { |
| 756 | u8 bit; | 773 | u8 bit; |
| 757 | enum dev_type edac_cap = EDAC_FLAG_NONE; | 774 | unsigned long edac_cap = EDAC_FLAG_NONE; |
| 758 | 775 | ||
| 759 | bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= K8_REV_F) | 776 | bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= K8_REV_F) |
| 760 | ? 19 | 777 | ? 19 |
| @@ -1953,11 +1970,9 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, | |||
| 1953 | amd64_handle_ue(mci, m); | 1970 | amd64_handle_ue(mci, m); |
| 1954 | } | 1971 | } |
| 1955 | 1972 | ||
| 1956 | void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg) | 1973 | void amd64_decode_bus_error(int node_id, struct mce *m) |
| 1957 | { | 1974 | { |
| 1958 | struct mem_ctl_info *mci = mcis[node_id]; | 1975 | __amd64_decode_bus_error(mcis[node_id], m); |
| 1959 | |||
| 1960 | __amd64_decode_bus_error(mci, m); | ||
| 1961 | } | 1976 | } |
| 1962 | 1977 | ||
| 1963 | /* | 1978 | /* |
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c index a687a0d16962..a774c0ddaf5b 100644 --- a/drivers/edac/cpc925_edac.c +++ b/drivers/edac/cpc925_edac.c | |||
| @@ -90,6 +90,7 @@ enum apimask_bits { | |||
| 90 | ECC_MASK_ENABLE = (APIMASK_ECC_UE_H | APIMASK_ECC_CE_H | | 90 | ECC_MASK_ENABLE = (APIMASK_ECC_UE_H | APIMASK_ECC_CE_H | |
| 91 | APIMASK_ECC_UE_L | APIMASK_ECC_CE_L), | 91 | APIMASK_ECC_UE_L | APIMASK_ECC_CE_L), |
| 92 | }; | 92 | }; |
| 93 | #define APIMASK_ADI(n) CPC925_BIT(((n)+1)) | ||
| 93 | 94 | ||
| 94 | /************************************************************ | 95 | /************************************************************ |
| 95 | * Processor Interface Exception Register (APIEXCP) | 96 | * Processor Interface Exception Register (APIEXCP) |
| @@ -581,16 +582,73 @@ static void cpc925_mc_check(struct mem_ctl_info *mci) | |||
| 581 | } | 582 | } |
| 582 | 583 | ||
| 583 | /******************** CPU err device********************************/ | 584 | /******************** CPU err device********************************/ |
| 585 | static u32 cpc925_cpu_mask_disabled(void) | ||
| 586 | { | ||
| 587 | struct device_node *cpus; | ||
| 588 | struct device_node *cpunode = NULL; | ||
| 589 | static u32 mask = 0; | ||
| 590 | |||
| 591 | /* use cached value if available */ | ||
| 592 | if (mask != 0) | ||
| 593 | return mask; | ||
| 594 | |||
| 595 | mask = APIMASK_ADI0 | APIMASK_ADI1; | ||
| 596 | |||
| 597 | cpus = of_find_node_by_path("/cpus"); | ||
| 598 | if (cpus == NULL) { | ||
| 599 | cpc925_printk(KERN_DEBUG, "No /cpus node !\n"); | ||
| 600 | return 0; | ||
| 601 | } | ||
| 602 | |||
| 603 | while ((cpunode = of_get_next_child(cpus, cpunode)) != NULL) { | ||
| 604 | const u32 *reg = of_get_property(cpunode, "reg", NULL); | ||
| 605 | |||
| 606 | if (strcmp(cpunode->type, "cpu")) { | ||
| 607 | cpc925_printk(KERN_ERR, "Not a cpu node in /cpus: %s\n", cpunode->name); | ||
| 608 | continue; | ||
| 609 | } | ||
| 610 | |||
| 611 | if (reg == NULL || *reg > 2) { | ||
| 612 | cpc925_printk(KERN_ERR, "Bad reg value at %s\n", cpunode->full_name); | ||
| 613 | continue; | ||
| 614 | } | ||
| 615 | |||
| 616 | mask &= ~APIMASK_ADI(*reg); | ||
| 617 | } | ||
| 618 | |||
| 619 | if (mask != (APIMASK_ADI0 | APIMASK_ADI1)) { | ||
| 620 | /* We assume that each CPU sits on it's own PI and that | ||
| 621 | * for present CPUs the reg property equals to the PI | ||
| 622 | * interface id */ | ||
| 623 | cpc925_printk(KERN_WARNING, | ||
| 624 | "Assuming PI id is equal to CPU MPIC id!\n"); | ||
| 625 | } | ||
| 626 | |||
| 627 | of_node_put(cpunode); | ||
| 628 | of_node_put(cpus); | ||
| 629 | |||
| 630 | return mask; | ||
| 631 | } | ||
| 632 | |||
| 584 | /* Enable CPU Errors detection */ | 633 | /* Enable CPU Errors detection */ |
| 585 | static void cpc925_cpu_init(struct cpc925_dev_info *dev_info) | 634 | static void cpc925_cpu_init(struct cpc925_dev_info *dev_info) |
| 586 | { | 635 | { |
| 587 | u32 apimask; | 636 | u32 apimask; |
| 637 | u32 cpumask; | ||
| 588 | 638 | ||
| 589 | apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET); | 639 | apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET); |
| 590 | if ((apimask & CPU_MASK_ENABLE) == 0) { | 640 | |
| 591 | apimask |= CPU_MASK_ENABLE; | 641 | cpumask = cpc925_cpu_mask_disabled(); |
| 592 | __raw_writel(apimask, dev_info->vbase + REG_APIMASK_OFFSET); | 642 | if (apimask & cpumask) { |
| 643 | cpc925_printk(KERN_WARNING, "CPU(s) not present, " | ||
| 644 | "but enabled in APIMASK, disabling\n"); | ||
| 645 | apimask &= ~cpumask; | ||
| 593 | } | 646 | } |
| 647 | |||
| 648 | if ((apimask & CPU_MASK_ENABLE) == 0) | ||
| 649 | apimask |= CPU_MASK_ENABLE; | ||
| 650 | |||
| 651 | __raw_writel(apimask, dev_info->vbase + REG_APIMASK_OFFSET); | ||
| 594 | } | 652 | } |
| 595 | 653 | ||
| 596 | /* Disable CPU Errors detection */ | 654 | /* Disable CPU Errors detection */ |
| @@ -622,6 +680,9 @@ static void cpc925_cpu_check(struct edac_device_ctl_info *edac_dev) | |||
| 622 | if ((apiexcp & CPU_EXCP_DETECTED) == 0) | 680 | if ((apiexcp & CPU_EXCP_DETECTED) == 0) |
| 623 | return; | 681 | return; |
| 624 | 682 | ||
| 683 | if ((apiexcp & ~cpc925_cpu_mask_disabled()) == 0) | ||
| 684 | return; | ||
| 685 | |||
| 625 | apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET); | 686 | apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET); |
| 626 | cpc925_printk(KERN_INFO, "Processor Interface Fault\n" | 687 | cpc925_printk(KERN_INFO, "Processor Interface Fault\n" |
| 627 | "Processor Interface register dump:\n"); | 688 | "Processor Interface register dump:\n"); |
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index 55b8278bb172..fe90cd4a7ebc 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h | |||
| @@ -34,11 +34,10 @@ | |||
| 34 | #include <linux/platform_device.h> | 34 | #include <linux/platform_device.h> |
| 35 | #include <linux/sysdev.h> | 35 | #include <linux/sysdev.h> |
| 36 | #include <linux/workqueue.h> | 36 | #include <linux/workqueue.h> |
| 37 | #include <linux/edac.h> | ||
| 37 | 38 | ||
| 38 | #define EDAC_MC_LABEL_LEN 31 | ||
| 39 | #define EDAC_DEVICE_NAME_LEN 31 | 39 | #define EDAC_DEVICE_NAME_LEN 31 |
| 40 | #define EDAC_ATTRIB_VALUE_LEN 15 | 40 | #define EDAC_ATTRIB_VALUE_LEN 15 |
| 41 | #define MC_PROC_NAME_MAX_LEN 7 | ||
| 42 | 41 | ||
| 43 | #if PAGE_SHIFT < 20 | 42 | #if PAGE_SHIFT < 20 |
| 44 | #define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT)) | 43 | #define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT)) |
| @@ -101,353 +100,6 @@ extern int edac_debug_level; | |||
| 101 | 100 | ||
| 102 | #define edac_dev_name(dev) (dev)->dev_name | 101 | #define edac_dev_name(dev) (dev)->dev_name |
| 103 | 102 | ||
| 104 | /* memory devices */ | ||
| 105 | enum dev_type { | ||
| 106 | DEV_UNKNOWN = 0, | ||
| 107 | DEV_X1, | ||
| 108 | DEV_X2, | ||
| 109 | DEV_X4, | ||
| 110 | DEV_X8, | ||
| 111 | DEV_X16, | ||
| 112 | DEV_X32, /* Do these parts exist? */ | ||
| 113 | DEV_X64 /* Do these parts exist? */ | ||
| 114 | }; | ||
| 115 | |||
| 116 | #define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN) | ||
| 117 | #define DEV_FLAG_X1 BIT(DEV_X1) | ||
| 118 | #define DEV_FLAG_X2 BIT(DEV_X2) | ||
| 119 | #define DEV_FLAG_X4 BIT(DEV_X4) | ||
| 120 | #define DEV_FLAG_X8 BIT(DEV_X8) | ||
| 121 | #define DEV_FLAG_X16 BIT(DEV_X16) | ||
| 122 | #define DEV_FLAG_X32 BIT(DEV_X32) | ||
| 123 | #define DEV_FLAG_X64 BIT(DEV_X64) | ||
| 124 | |||
| 125 | /* memory types */ | ||
| 126 | enum mem_type { | ||
| 127 | MEM_EMPTY = 0, /* Empty csrow */ | ||
| 128 | MEM_RESERVED, /* Reserved csrow type */ | ||
| 129 | MEM_UNKNOWN, /* Unknown csrow type */ | ||
| 130 | MEM_FPM, /* Fast page mode */ | ||
| 131 | MEM_EDO, /* Extended data out */ | ||
| 132 | MEM_BEDO, /* Burst Extended data out */ | ||
| 133 | MEM_SDR, /* Single data rate SDRAM */ | ||
| 134 | MEM_RDR, /* Registered single data rate SDRAM */ | ||
| 135 | MEM_DDR, /* Double data rate SDRAM */ | ||
| 136 | MEM_RDDR, /* Registered Double data rate SDRAM */ | ||
| 137 | MEM_RMBS, /* Rambus DRAM */ | ||
| 138 | MEM_DDR2, /* DDR2 RAM */ | ||
| 139 | MEM_FB_DDR2, /* fully buffered DDR2 */ | ||
| 140 | MEM_RDDR2, /* Registered DDR2 RAM */ | ||
| 141 | MEM_XDR, /* Rambus XDR */ | ||
| 142 | MEM_DDR3, /* DDR3 RAM */ | ||
| 143 | MEM_RDDR3, /* Registered DDR3 RAM */ | ||
| 144 | }; | ||
| 145 | |||
| 146 | #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) | ||
| 147 | #define MEM_FLAG_RESERVED BIT(MEM_RESERVED) | ||
| 148 | #define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN) | ||
| 149 | #define MEM_FLAG_FPM BIT(MEM_FPM) | ||
| 150 | #define MEM_FLAG_EDO BIT(MEM_EDO) | ||
| 151 | #define MEM_FLAG_BEDO BIT(MEM_BEDO) | ||
| 152 | #define MEM_FLAG_SDR BIT(MEM_SDR) | ||
| 153 | #define MEM_FLAG_RDR BIT(MEM_RDR) | ||
| 154 | #define MEM_FLAG_DDR BIT(MEM_DDR) | ||
| 155 | #define MEM_FLAG_RDDR BIT(MEM_RDDR) | ||
| 156 | #define MEM_FLAG_RMBS BIT(MEM_RMBS) | ||
| 157 | #define MEM_FLAG_DDR2 BIT(MEM_DDR2) | ||
| 158 | #define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) | ||
| 159 | #define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) | ||
| 160 | #define MEM_FLAG_XDR BIT(MEM_XDR) | ||
| 161 | #define MEM_FLAG_DDR3 BIT(MEM_DDR3) | ||
| 162 | #define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) | ||
| 163 | |||
| 164 | /* chipset Error Detection and Correction capabilities and mode */ | ||
| 165 | enum edac_type { | ||
| 166 | EDAC_UNKNOWN = 0, /* Unknown if ECC is available */ | ||
| 167 | EDAC_NONE, /* Doesn't support ECC */ | ||
| 168 | EDAC_RESERVED, /* Reserved ECC type */ | ||
| 169 | EDAC_PARITY, /* Detects parity errors */ | ||
| 170 | EDAC_EC, /* Error Checking - no correction */ | ||
| 171 | EDAC_SECDED, /* Single bit error correction, Double detection */ | ||
| 172 | EDAC_S2ECD2ED, /* Chipkill x2 devices - do these exist? */ | ||
| 173 | EDAC_S4ECD4ED, /* Chipkill x4 devices */ | ||
| 174 | EDAC_S8ECD8ED, /* Chipkill x8 devices */ | ||
| 175 | EDAC_S16ECD16ED, /* Chipkill x16 devices */ | ||
| 176 | }; | ||
| 177 | |||
| 178 | #define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN) | ||
| 179 | #define EDAC_FLAG_NONE BIT(EDAC_NONE) | ||
| 180 | #define EDAC_FLAG_PARITY BIT(EDAC_PARITY) | ||
| 181 | #define EDAC_FLAG_EC BIT(EDAC_EC) | ||
| 182 | #define EDAC_FLAG_SECDED BIT(EDAC_SECDED) | ||
| 183 | #define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED) | ||
| 184 | #define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED) | ||
| 185 | #define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED) | ||
| 186 | #define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED) | ||
| 187 | |||
| 188 | /* scrubbing capabilities */ | ||
| 189 | enum scrub_type { | ||
| 190 | SCRUB_UNKNOWN = 0, /* Unknown if scrubber is available */ | ||
| 191 | SCRUB_NONE, /* No scrubber */ | ||
| 192 | SCRUB_SW_PROG, /* SW progressive (sequential) scrubbing */ | ||
| 193 | SCRUB_SW_SRC, /* Software scrub only errors */ | ||
| 194 | SCRUB_SW_PROG_SRC, /* Progressive software scrub from an error */ | ||
| 195 | SCRUB_SW_TUNABLE, /* Software scrub frequency is tunable */ | ||
| 196 | SCRUB_HW_PROG, /* HW progressive (sequential) scrubbing */ | ||
| 197 | SCRUB_HW_SRC, /* Hardware scrub only errors */ | ||
| 198 | SCRUB_HW_PROG_SRC, /* Progressive hardware scrub from an error */ | ||
| 199 | SCRUB_HW_TUNABLE /* Hardware scrub frequency is tunable */ | ||
| 200 | }; | ||
| 201 | |||
| 202 | #define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG) | ||
| 203 | #define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC) | ||
| 204 | #define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC) | ||
| 205 | #define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE) | ||
| 206 | #define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG) | ||
| 207 | #define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC) | ||
| 208 | #define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC) | ||
| 209 | #define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE) | ||
| 210 | |||
| 211 | /* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */ | ||
| 212 | |||
| 213 | /* EDAC internal operation states */ | ||
| 214 | #define OP_ALLOC 0x100 | ||
| 215 | #define OP_RUNNING_POLL 0x201 | ||
| 216 | #define OP_RUNNING_INTERRUPT 0x202 | ||
| 217 | #define OP_RUNNING_POLL_INTR 0x203 | ||
| 218 | #define OP_OFFLINE 0x300 | ||
| 219 | |||
| 220 | /* | ||
| 221 | * There are several things to be aware of that aren't at all obvious: | ||
| 222 | * | ||
| 223 | * | ||
| 224 | * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc.. | ||
| 225 | * | ||
| 226 | * These are some of the many terms that are thrown about that don't always | ||
| 227 | * mean what people think they mean (Inconceivable!). In the interest of | ||
| 228 | * creating a common ground for discussion, terms and their definitions | ||
| 229 | * will be established. | ||
| 230 | * | ||
| 231 | * Memory devices: The individual chip on a memory stick. These devices | ||
| 232 | * commonly output 4 and 8 bits each. Grouping several | ||
| 233 | * of these in parallel provides 64 bits which is common | ||
| 234 | * for a memory stick. | ||
| 235 | * | ||
| 236 | * Memory Stick: A printed circuit board that aggregates multiple | ||
| 237 | * memory devices in parallel. This is the atomic | ||
| 238 | * memory component that is purchaseable by Joe consumer | ||
| 239 | * and loaded into a memory socket. | ||
| 240 | * | ||
| 241 | * Socket: A physical connector on the motherboard that accepts | ||
| 242 | * a single memory stick. | ||
| 243 | * | ||
| 244 | * Channel: Set of memory devices on a memory stick that must be | ||
| 245 | * grouped in parallel with one or more additional | ||
| 246 | * channels from other memory sticks. This parallel | ||
| 247 | * grouping of the output from multiple channels are | ||
| 248 | * necessary for the smallest granularity of memory access. | ||
| 249 | * Some memory controllers are capable of single channel - | ||
| 250 | * which means that memory sticks can be loaded | ||
| 251 | * individually. Other memory controllers are only | ||
| 252 | * capable of dual channel - which means that memory | ||
| 253 | * sticks must be loaded as pairs (see "socket set"). | ||
| 254 | * | ||
| 255 | * Chip-select row: All of the memory devices that are selected together. | ||
| 256 | * for a single, minimum grain of memory access. | ||
| 257 | * This selects all of the parallel memory devices across | ||
| 258 | * all of the parallel channels. Common chip-select rows | ||
| 259 | * for single channel are 64 bits, for dual channel 128 | ||
| 260 | * bits. | ||
| 261 | * | ||
| 262 | * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory. | ||
| 263 | * Motherboards commonly drive two chip-select pins to | ||
| 264 | * a memory stick. A single-ranked stick, will occupy | ||
| 265 | * only one of those rows. The other will be unused. | ||
| 266 | * | ||
| 267 | * Double-Ranked stick: A double-ranked stick has two chip-select rows which | ||
| 268 | * access different sets of memory devices. The two | ||
| 269 | * rows cannot be accessed concurrently. | ||
| 270 | * | ||
| 271 | * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick. | ||
| 272 | * A double-sided stick has two chip-select rows which | ||
| 273 | * access different sets of memory devices. The two | ||
| 274 | * rows cannot be accessed concurrently. "Double-sided" | ||
| 275 | * is irrespective of the memory devices being mounted | ||
| 276 | * on both sides of the memory stick. | ||
| 277 | * | ||
| 278 | * Socket set: All of the memory sticks that are required for | ||
| 279 | * a single memory access or all of the memory sticks | ||
| 280 | * spanned by a chip-select row. A single socket set | ||
| 281 | * has two chip-select rows and if double-sided sticks | ||
| 282 | * are used these will occupy those chip-select rows. | ||
| 283 | * | ||
| 284 | * Bank: This term is avoided because it is unclear when | ||
| 285 | * needing to distinguish between chip-select rows and | ||
| 286 | * socket sets. | ||
| 287 | * | ||
| 288 | * Controller pages: | ||
| 289 | * | ||
| 290 | * Physical pages: | ||
| 291 | * | ||
| 292 | * Virtual pages: | ||
| 293 | * | ||
| 294 | * | ||
| 295 | * STRUCTURE ORGANIZATION AND CHOICES | ||
| 296 | * | ||
| 297 | * | ||
| 298 | * | ||
| 299 | * PS - I enjoyed writing all that about as much as you enjoyed reading it. | ||
| 300 | */ | ||
| 301 | |||
| 302 | struct channel_info { | ||
| 303 | int chan_idx; /* channel index */ | ||
| 304 | u32 ce_count; /* Correctable Errors for this CHANNEL */ | ||
| 305 | char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */ | ||
| 306 | struct csrow_info *csrow; /* the parent */ | ||
| 307 | }; | ||
| 308 | |||
| 309 | struct csrow_info { | ||
| 310 | unsigned long first_page; /* first page number in dimm */ | ||
| 311 | unsigned long last_page; /* last page number in dimm */ | ||
| 312 | unsigned long page_mask; /* used for interleaving - | ||
| 313 | * 0UL for non intlv | ||
| 314 | */ | ||
| 315 | u32 nr_pages; /* number of pages in csrow */ | ||
| 316 | u32 grain; /* granularity of reported error in bytes */ | ||
| 317 | int csrow_idx; /* the chip-select row */ | ||
| 318 | enum dev_type dtype; /* memory device type */ | ||
| 319 | u32 ue_count; /* Uncorrectable Errors for this csrow */ | ||
| 320 | u32 ce_count; /* Correctable Errors for this csrow */ | ||
| 321 | enum mem_type mtype; /* memory csrow type */ | ||
| 322 | enum edac_type edac_mode; /* EDAC mode for this csrow */ | ||
| 323 | struct mem_ctl_info *mci; /* the parent */ | ||
| 324 | |||
| 325 | struct kobject kobj; /* sysfs kobject for this csrow */ | ||
| 326 | |||
| 327 | /* channel information for this csrow */ | ||
| 328 | u32 nr_channels; | ||
| 329 | struct channel_info *channels; | ||
| 330 | }; | ||
| 331 | |||
| 332 | struct mcidev_sysfs_group { | ||
| 333 | const char *name; /* group name */ | ||
| 334 | const struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */ | ||
| 335 | }; | ||
| 336 | |||
| 337 | struct mcidev_sysfs_group_kobj { | ||
| 338 | struct list_head list; /* list for all instances within a mc */ | ||
| 339 | |||
| 340 | struct kobject kobj; /* kobj for the group */ | ||
| 341 | |||
| 342 | const struct mcidev_sysfs_group *grp; /* group description table */ | ||
| 343 | struct mem_ctl_info *mci; /* the parent */ | ||
| 344 | }; | ||
| 345 | |||
| 346 | /* mcidev_sysfs_attribute structure | ||
| 347 | * used for driver sysfs attributes and in mem_ctl_info | ||
| 348 | * sysfs top level entries | ||
| 349 | */ | ||
| 350 | struct mcidev_sysfs_attribute { | ||
| 351 | /* It should use either attr or grp */ | ||
| 352 | struct attribute attr; | ||
| 353 | const struct mcidev_sysfs_group *grp; /* Points to a group of attributes */ | ||
| 354 | |||
| 355 | /* Ops for show/store values at the attribute - not used on group */ | ||
| 356 | ssize_t (*show)(struct mem_ctl_info *,char *); | ||
| 357 | ssize_t (*store)(struct mem_ctl_info *, const char *,size_t); | ||
| 358 | }; | ||
| 359 | |||
| 360 | /* MEMORY controller information structure | ||
| 361 | */ | ||
| 362 | struct mem_ctl_info { | ||
| 363 | struct list_head link; /* for global list of mem_ctl_info structs */ | ||
| 364 | |||
| 365 | struct module *owner; /* Module owner of this control struct */ | ||
| 366 | |||
| 367 | unsigned long mtype_cap; /* memory types supported by mc */ | ||
| 368 | unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */ | ||
| 369 | unsigned long edac_cap; /* configuration capabilities - this is | ||
| 370 | * closely related to edac_ctl_cap. The | ||
| 371 | * difference is that the controller may be | ||
| 372 | * capable of s4ecd4ed which would be listed | ||
| 373 | * in edac_ctl_cap, but if channels aren't | ||
| 374 | * capable of s4ecd4ed then the edac_cap would | ||
| 375 | * not have that capability. | ||
| 376 | */ | ||
| 377 | unsigned long scrub_cap; /* chipset scrub capabilities */ | ||
| 378 | enum scrub_type scrub_mode; /* current scrub mode */ | ||
| 379 | |||
| 380 | /* Translates sdram memory scrub rate given in bytes/sec to the | ||
| 381 | internal representation and configures whatever else needs | ||
| 382 | to be configured. | ||
| 383 | */ | ||
| 384 | int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw); | ||
| 385 | |||
| 386 | /* Get the current sdram memory scrub rate from the internal | ||
| 387 | representation and converts it to the closest matching | ||
| 388 | bandwidth in bytes/sec. | ||
| 389 | */ | ||
| 390 | int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci); | ||
| 391 | |||
| 392 | |||
| 393 | /* pointer to edac checking routine */ | ||
| 394 | void (*edac_check) (struct mem_ctl_info * mci); | ||
| 395 | |||
| 396 | /* | ||
| 397 | * Remaps memory pages: controller pages to physical pages. | ||
| 398 | * For most MC's, this will be NULL. | ||
| 399 | */ | ||
| 400 | /* FIXME - why not send the phys page to begin with? */ | ||
| 401 | unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci, | ||
| 402 | unsigned long page); | ||
| 403 | int mc_idx; | ||
| 404 | int nr_csrows; | ||
| 405 | struct csrow_info *csrows; | ||
| 406 | /* | ||
| 407 | * FIXME - what about controllers on other busses? - IDs must be | ||
| 408 | * unique. dev pointer should be sufficiently unique, but | ||
| 409 | * BUS:SLOT.FUNC numbers may not be unique. | ||
| 410 | */ | ||
| 411 | struct device *dev; | ||
| 412 | const char *mod_name; | ||
| 413 | const char *mod_ver; | ||
| 414 | const char *ctl_name; | ||
| 415 | const char *dev_name; | ||
| 416 | char proc_name[MC_PROC_NAME_MAX_LEN + 1]; | ||
| 417 | void *pvt_info; | ||
| 418 | u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */ | ||
| 419 | u32 ce_noinfo_count; /* Correctable Errors w/o info */ | ||
| 420 | u32 ue_count; /* Total Uncorrectable Errors for this MC */ | ||
| 421 | u32 ce_count; /* Total Correctable Errors for this MC */ | ||
| 422 | unsigned long start_time; /* mci load start time (in jiffies) */ | ||
| 423 | |||
| 424 | struct completion complete; | ||
| 425 | |||
| 426 | /* edac sysfs device control */ | ||
| 427 | struct kobject edac_mci_kobj; | ||
| 428 | |||
| 429 | /* list for all grp instances within a mc */ | ||
| 430 | struct list_head grp_kobj_list; | ||
| 431 | |||
| 432 | /* Additional top controller level attributes, but specified | ||
| 433 | * by the low level driver. | ||
| 434 | * | ||
| 435 | * Set by the low level driver to provide attributes at the | ||
| 436 | * controller level, same level as 'ue_count' and 'ce_count' above. | ||
| 437 | * An array of structures, NULL terminated | ||
| 438 | * | ||
| 439 | * If attributes are desired, then set to array of attributes | ||
| 440 | * If no attributes are desired, leave NULL | ||
| 441 | */ | ||
| 442 | const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes; | ||
| 443 | |||
| 444 | /* work struct for this MC */ | ||
| 445 | struct delayed_work work; | ||
| 446 | |||
| 447 | /* the internal state of this controller instance */ | ||
| 448 | int op_state; | ||
| 449 | }; | ||
| 450 | |||
| 451 | /* | 103 | /* |
| 452 | * The following are the structures to provide for a generic | 104 | * The following are the structures to provide for a generic |
| 453 | * or abstract 'edac_device'. This set of structures and the | 105 | * or abstract 'edac_device'. This set of structures and the |
diff --git a/drivers/edac/edac_mce.c b/drivers/edac/edac_mce.c deleted file mode 100644 index 9ccdc5b140e7..000000000000 --- a/drivers/edac/edac_mce.c +++ /dev/null | |||
| @@ -1,61 +0,0 @@ | |||
| 1 | /* Provides edac interface to mcelog events | ||
| 2 | * | ||
| 3 | * This file may be distributed under the terms of the | ||
| 4 | * GNU General Public License version 2. | ||
| 5 | * | ||
| 6 | * Copyright (c) 2009 by: | ||
| 7 | * Mauro Carvalho Chehab <mchehab@redhat.com> | ||
| 8 | * | ||
| 9 | * Red Hat Inc. http://www.redhat.com | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/module.h> | ||
| 13 | #include <linux/edac_mce.h> | ||
| 14 | #include <asm/mce.h> | ||
| 15 | |||
| 16 | int edac_mce_enabled; | ||
| 17 | EXPORT_SYMBOL_GPL(edac_mce_enabled); | ||
| 18 | |||
| 19 | |||
| 20 | /* | ||
| 21 | * Extension interface | ||
| 22 | */ | ||
| 23 | |||
| 24 | static LIST_HEAD(edac_mce_list); | ||
| 25 | static DEFINE_MUTEX(edac_mce_lock); | ||
| 26 | |||
| 27 | int edac_mce_register(struct edac_mce *edac_mce) | ||
| 28 | { | ||
| 29 | mutex_lock(&edac_mce_lock); | ||
| 30 | list_add_tail(&edac_mce->list, &edac_mce_list); | ||
| 31 | mutex_unlock(&edac_mce_lock); | ||
| 32 | return 0; | ||
| 33 | } | ||
| 34 | EXPORT_SYMBOL(edac_mce_register); | ||
| 35 | |||
| 36 | void edac_mce_unregister(struct edac_mce *edac_mce) | ||
| 37 | { | ||
| 38 | mutex_lock(&edac_mce_lock); | ||
| 39 | list_del(&edac_mce->list); | ||
| 40 | mutex_unlock(&edac_mce_lock); | ||
| 41 | } | ||
| 42 | EXPORT_SYMBOL(edac_mce_unregister); | ||
| 43 | |||
| 44 | int edac_mce_parse(struct mce *mce) | ||
| 45 | { | ||
| 46 | struct edac_mce *edac_mce; | ||
| 47 | |||
| 48 | list_for_each_entry(edac_mce, &edac_mce_list, list) { | ||
| 49 | if (edac_mce->check_error(edac_mce->priv, mce)) | ||
| 50 | return 1; | ||
| 51 | } | ||
| 52 | |||
| 53 | /* Nobody queued the error */ | ||
| 54 | return 0; | ||
| 55 | } | ||
| 56 | EXPORT_SYMBOL_GPL(edac_mce_parse); | ||
| 57 | |||
| 58 | MODULE_LICENSE("GPL"); | ||
| 59 | MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); | ||
| 60 | MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); | ||
| 61 | MODULE_DESCRIPTION("EDAC Driver for mcelog captured errors"); | ||
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index a76fe8366b68..6104dba380b6 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c | |||
| @@ -372,7 +372,7 @@ static const char *get_err_from_table(const char *table[], int size, int pos) | |||
| 372 | static void i7300_process_error_global(struct mem_ctl_info *mci) | 372 | static void i7300_process_error_global(struct mem_ctl_info *mci) |
| 373 | { | 373 | { |
| 374 | struct i7300_pvt *pvt; | 374 | struct i7300_pvt *pvt; |
| 375 | u32 errnum, value; | 375 | u32 errnum, error_reg; |
| 376 | unsigned long errors; | 376 | unsigned long errors; |
| 377 | const char *specific; | 377 | const char *specific; |
| 378 | bool is_fatal; | 378 | bool is_fatal; |
| @@ -381,9 +381,9 @@ static void i7300_process_error_global(struct mem_ctl_info *mci) | |||
| 381 | 381 | ||
| 382 | /* read in the 1st FATAL error register */ | 382 | /* read in the 1st FATAL error register */ |
| 383 | pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, | 383 | pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, |
| 384 | FERR_GLOBAL_HI, &value); | 384 | FERR_GLOBAL_HI, &error_reg); |
| 385 | if (unlikely(value)) { | 385 | if (unlikely(error_reg)) { |
| 386 | errors = value; | 386 | errors = error_reg; |
| 387 | errnum = find_first_bit(&errors, | 387 | errnum = find_first_bit(&errors, |
| 388 | ARRAY_SIZE(ferr_global_hi_name)); | 388 | ARRAY_SIZE(ferr_global_hi_name)); |
| 389 | specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum); | 389 | specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum); |
| @@ -391,15 +391,15 @@ static void i7300_process_error_global(struct mem_ctl_info *mci) | |||
| 391 | 391 | ||
| 392 | /* Clear the error bit */ | 392 | /* Clear the error bit */ |
| 393 | pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, | 393 | pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, |
| 394 | FERR_GLOBAL_HI, value); | 394 | FERR_GLOBAL_HI, error_reg); |
| 395 | 395 | ||
| 396 | goto error_global; | 396 | goto error_global; |
| 397 | } | 397 | } |
| 398 | 398 | ||
| 399 | pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, | 399 | pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, |
| 400 | FERR_GLOBAL_LO, &value); | 400 | FERR_GLOBAL_LO, &error_reg); |
| 401 | if (unlikely(value)) { | 401 | if (unlikely(error_reg)) { |
| 402 | errors = value; | 402 | errors = error_reg; |
| 403 | errnum = find_first_bit(&errors, | 403 | errnum = find_first_bit(&errors, |
| 404 | ARRAY_SIZE(ferr_global_lo_name)); | 404 | ARRAY_SIZE(ferr_global_lo_name)); |
| 405 | specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum); | 405 | specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum); |
| @@ -407,7 +407,7 @@ static void i7300_process_error_global(struct mem_ctl_info *mci) | |||
| 407 | 407 | ||
| 408 | /* Clear the error bit */ | 408 | /* Clear the error bit */ |
| 409 | pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, | 409 | pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, |
| 410 | FERR_GLOBAL_LO, value); | 410 | FERR_GLOBAL_LO, error_reg); |
| 411 | 411 | ||
| 412 | goto error_global; | 412 | goto error_global; |
| 413 | } | 413 | } |
| @@ -427,7 +427,7 @@ error_global: | |||
| 427 | static void i7300_process_fbd_error(struct mem_ctl_info *mci) | 427 | static void i7300_process_fbd_error(struct mem_ctl_info *mci) |
| 428 | { | 428 | { |
| 429 | struct i7300_pvt *pvt; | 429 | struct i7300_pvt *pvt; |
| 430 | u32 errnum, value; | 430 | u32 errnum, value, error_reg; |
| 431 | u16 val16; | 431 | u16 val16; |
| 432 | unsigned branch, channel, bank, rank, cas, ras; | 432 | unsigned branch, channel, bank, rank, cas, ras; |
| 433 | u32 syndrome; | 433 | u32 syndrome; |
| @@ -440,14 +440,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) | |||
| 440 | 440 | ||
| 441 | /* read in the 1st FATAL error register */ | 441 | /* read in the 1st FATAL error register */ |
| 442 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | 442 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, |
| 443 | FERR_FAT_FBD, &value); | 443 | FERR_FAT_FBD, &error_reg); |
| 444 | if (unlikely(value & FERR_FAT_FBD_ERR_MASK)) { | 444 | if (unlikely(error_reg & FERR_FAT_FBD_ERR_MASK)) { |
| 445 | errors = value & FERR_FAT_FBD_ERR_MASK ; | 445 | errors = error_reg & FERR_FAT_FBD_ERR_MASK ; |
| 446 | errnum = find_first_bit(&errors, | 446 | errnum = find_first_bit(&errors, |
| 447 | ARRAY_SIZE(ferr_fat_fbd_name)); | 447 | ARRAY_SIZE(ferr_fat_fbd_name)); |
| 448 | specific = GET_ERR_FROM_TABLE(ferr_fat_fbd_name, errnum); | 448 | specific = GET_ERR_FROM_TABLE(ferr_fat_fbd_name, errnum); |
| 449 | branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0; | ||
| 449 | 450 | ||
| 450 | branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0; | ||
| 451 | pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, | 451 | pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, |
| 452 | NRECMEMA, &val16); | 452 | NRECMEMA, &val16); |
| 453 | bank = NRECMEMA_BANK(val16); | 453 | bank = NRECMEMA_BANK(val16); |
| @@ -455,11 +455,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) | |||
| 455 | 455 | ||
| 456 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | 456 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, |
| 457 | NRECMEMB, &value); | 457 | NRECMEMB, &value); |
| 458 | |||
| 459 | is_wr = NRECMEMB_IS_WR(value); | 458 | is_wr = NRECMEMB_IS_WR(value); |
| 460 | cas = NRECMEMB_CAS(value); | 459 | cas = NRECMEMB_CAS(value); |
| 461 | ras = NRECMEMB_RAS(value); | 460 | ras = NRECMEMB_RAS(value); |
| 462 | 461 | ||
| 462 | /* Clean the error register */ | ||
| 463 | pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | ||
| 464 | FERR_FAT_FBD, error_reg); | ||
| 465 | |||
| 463 | snprintf(pvt->tmp_prt_buffer, PAGE_SIZE, | 466 | snprintf(pvt->tmp_prt_buffer, PAGE_SIZE, |
| 464 | "FATAL (Branch=%d DRAM-Bank=%d %s " | 467 | "FATAL (Branch=%d DRAM-Bank=%d %s " |
| 465 | "RAS=%d CAS=%d Err=0x%lx (%s))", | 468 | "RAS=%d CAS=%d Err=0x%lx (%s))", |
| @@ -476,21 +479,17 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) | |||
| 476 | 479 | ||
| 477 | /* read in the 1st NON-FATAL error register */ | 480 | /* read in the 1st NON-FATAL error register */ |
| 478 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | 481 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, |
| 479 | FERR_NF_FBD, &value); | 482 | FERR_NF_FBD, &error_reg); |
| 480 | if (unlikely(value & FERR_NF_FBD_ERR_MASK)) { | 483 | if (unlikely(error_reg & FERR_NF_FBD_ERR_MASK)) { |
| 481 | errors = value & FERR_NF_FBD_ERR_MASK; | 484 | errors = error_reg & FERR_NF_FBD_ERR_MASK; |
| 482 | errnum = find_first_bit(&errors, | 485 | errnum = find_first_bit(&errors, |
| 483 | ARRAY_SIZE(ferr_nf_fbd_name)); | 486 | ARRAY_SIZE(ferr_nf_fbd_name)); |
| 484 | specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum); | 487 | specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum); |
| 485 | 488 | branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0; | |
| 486 | /* Clear the error bit */ | ||
| 487 | pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs, | ||
| 488 | FERR_GLOBAL_LO, value); | ||
| 489 | 489 | ||
| 490 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | 490 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, |
| 491 | REDMEMA, &syndrome); | 491 | REDMEMA, &syndrome); |
| 492 | 492 | ||
| 493 | branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0; | ||
| 494 | pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, | 493 | pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, |
| 495 | RECMEMA, &val16); | 494 | RECMEMA, &val16); |
| 496 | bank = RECMEMA_BANK(val16); | 495 | bank = RECMEMA_BANK(val16); |
| @@ -498,18 +497,20 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) | |||
| 498 | 497 | ||
| 499 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | 498 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, |
| 500 | RECMEMB, &value); | 499 | RECMEMB, &value); |
| 501 | |||
| 502 | is_wr = RECMEMB_IS_WR(value); | 500 | is_wr = RECMEMB_IS_WR(value); |
| 503 | cas = RECMEMB_CAS(value); | 501 | cas = RECMEMB_CAS(value); |
| 504 | ras = RECMEMB_RAS(value); | 502 | ras = RECMEMB_RAS(value); |
| 505 | 503 | ||
| 506 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | 504 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, |
| 507 | REDMEMB, &value); | 505 | REDMEMB, &value); |
| 508 | |||
| 509 | channel = (branch << 1); | 506 | channel = (branch << 1); |
| 510 | if (IS_SECOND_CH(value)) | 507 | if (IS_SECOND_CH(value)) |
| 511 | channel++; | 508 | channel++; |
| 512 | 509 | ||
| 510 | /* Clear the error bit */ | ||
| 511 | pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | ||
| 512 | FERR_NF_FBD, error_reg); | ||
| 513 | |||
| 513 | /* Form out message */ | 514 | /* Form out message */ |
| 514 | snprintf(pvt->tmp_prt_buffer, PAGE_SIZE, | 515 | snprintf(pvt->tmp_prt_buffer, PAGE_SIZE, |
| 515 | "Corrected error (Branch=%d, Channel %d), " | 516 | "Corrected error (Branch=%d, Channel %d), " |
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index f6cf448d69b4..70ad8923f1d7 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c | |||
| @@ -31,11 +31,13 @@ | |||
| 31 | #include <linux/pci_ids.h> | 31 | #include <linux/pci_ids.h> |
| 32 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
| 33 | #include <linux/delay.h> | 33 | #include <linux/delay.h> |
| 34 | #include <linux/dmi.h> | ||
| 34 | #include <linux/edac.h> | 35 | #include <linux/edac.h> |
| 35 | #include <linux/mmzone.h> | 36 | #include <linux/mmzone.h> |
| 36 | #include <linux/edac_mce.h> | ||
| 37 | #include <linux/smp.h> | 37 | #include <linux/smp.h> |
| 38 | #include <asm/mce.h> | ||
| 38 | #include <asm/processor.h> | 39 | #include <asm/processor.h> |
| 40 | #include <asm/div64.h> | ||
| 39 | 41 | ||
| 40 | #include "edac_core.h" | 42 | #include "edac_core.h" |
| 41 | 43 | ||
| @@ -78,6 +80,8 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices"); | |||
| 78 | /* OFFSETS for Device 0 Function 0 */ | 80 | /* OFFSETS for Device 0 Function 0 */ |
| 79 | 81 | ||
| 80 | #define MC_CFG_CONTROL 0x90 | 82 | #define MC_CFG_CONTROL 0x90 |
| 83 | #define MC_CFG_UNLOCK 0x02 | ||
| 84 | #define MC_CFG_LOCK 0x00 | ||
| 81 | 85 | ||
| 82 | /* OFFSETS for Device 3 Function 0 */ | 86 | /* OFFSETS for Device 3 Function 0 */ |
| 83 | 87 | ||
| @@ -98,6 +102,15 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices"); | |||
| 98 | #define DIMM0_COR_ERR(r) ((r) & 0x7fff) | 102 | #define DIMM0_COR_ERR(r) ((r) & 0x7fff) |
| 99 | 103 | ||
| 100 | /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */ | 104 | /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */ |
| 105 | #define MC_SSRCONTROL 0x48 | ||
| 106 | #define SSR_MODE_DISABLE 0x00 | ||
| 107 | #define SSR_MODE_ENABLE 0x01 | ||
| 108 | #define SSR_MODE_MASK 0x03 | ||
| 109 | |||
| 110 | #define MC_SCRUB_CONTROL 0x4c | ||
| 111 | #define STARTSCRUB (1 << 24) | ||
| 112 | #define SCRUBINTERVAL_MASK 0xffffff | ||
| 113 | |||
| 101 | #define MC_COR_ECC_CNT_0 0x80 | 114 | #define MC_COR_ECC_CNT_0 0x80 |
| 102 | #define MC_COR_ECC_CNT_1 0x84 | 115 | #define MC_COR_ECC_CNT_1 0x84 |
| 103 | #define MC_COR_ECC_CNT_2 0x88 | 116 | #define MC_COR_ECC_CNT_2 0x88 |
| @@ -253,10 +266,7 @@ struct i7core_pvt { | |||
| 253 | unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS]; | 266 | unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS]; |
| 254 | int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS]; | 267 | int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS]; |
| 255 | 268 | ||
| 256 | unsigned int is_registered; | 269 | bool is_registered, enable_scrub; |
| 257 | |||
| 258 | /* mcelog glue */ | ||
| 259 | struct edac_mce edac_mce; | ||
| 260 | 270 | ||
| 261 | /* Fifo double buffers */ | 271 | /* Fifo double buffers */ |
| 262 | struct mce mce_entry[MCE_LOG_LEN]; | 272 | struct mce mce_entry[MCE_LOG_LEN]; |
| @@ -268,6 +278,9 @@ struct i7core_pvt { | |||
| 268 | /* Count indicator to show errors not got */ | 278 | /* Count indicator to show errors not got */ |
| 269 | unsigned mce_overrun; | 279 | unsigned mce_overrun; |
| 270 | 280 | ||
| 281 | /* DCLK Frequency used for computing scrub rate */ | ||
| 282 | int dclk_freq; | ||
| 283 | |||
| 271 | /* Struct to control EDAC polling */ | 284 | /* Struct to control EDAC polling */ |
| 272 | struct edac_pci_ctl_info *i7core_pci; | 285 | struct edac_pci_ctl_info *i7core_pci; |
| 273 | }; | 286 | }; |
| @@ -281,8 +294,7 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = { | |||
| 281 | /* Memory controller */ | 294 | /* Memory controller */ |
| 282 | { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) }, | 295 | { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) }, |
| 283 | { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) }, | 296 | { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) }, |
| 284 | 297 | /* Exists only for RDIMM */ | |
| 285 | /* Exists only for RDIMM */ | ||
| 286 | { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 }, | 298 | { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 }, |
| 287 | { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) }, | 299 | { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) }, |
| 288 | 300 | ||
| @@ -303,6 +315,16 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = { | |||
| 303 | { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) }, | 315 | { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) }, |
| 304 | { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) }, | 316 | { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) }, |
| 305 | { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) }, | 317 | { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) }, |
| 318 | |||
| 319 | /* Generic Non-core registers */ | ||
| 320 | /* | ||
| 321 | * This is the PCI device on i7core and on Xeon 35xx (8086:2c41) | ||
| 322 | * On Xeon 55xx, however, it has a different id (8086:2c40). So, | ||
| 323 | * the probing code needs to test for the other address in case of | ||
| 324 | * failure of this one | ||
| 325 | */ | ||
| 326 | { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) }, | ||
| 327 | |||
| 306 | }; | 328 | }; |
| 307 | 329 | ||
| 308 | static const struct pci_id_descr pci_dev_descr_lynnfield[] = { | 330 | static const struct pci_id_descr pci_dev_descr_lynnfield[] = { |
| @@ -319,6 +341,12 @@ static const struct pci_id_descr pci_dev_descr_lynnfield[] = { | |||
| 319 | { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) }, | 341 | { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) }, |
| 320 | { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) }, | 342 | { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) }, |
| 321 | { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) }, | 343 | { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) }, |
| 344 | |||
| 345 | /* | ||
| 346 | * This is the PCI device has an alternate address on some | ||
| 347 | * processors like Core i7 860 | ||
| 348 | */ | ||
| 349 | { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) }, | ||
| 322 | }; | 350 | }; |
| 323 | 351 | ||
| 324 | static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = { | 352 | static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = { |
| @@ -346,6 +374,10 @@ static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = { | |||
| 346 | { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) }, | 374 | { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) }, |
| 347 | { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) }, | 375 | { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) }, |
| 348 | { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) }, | 376 | { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) }, |
| 377 | |||
| 378 | /* Generic Non-core registers */ | ||
| 379 | { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2) }, | ||
| 380 | |||
| 349 | }; | 381 | }; |
| 350 | 382 | ||
| 351 | #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } | 383 | #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } |
| @@ -714,6 +746,10 @@ static int get_dimm_config(const struct mem_ctl_info *mci) | |||
| 714 | 746 | ||
| 715 | csr->edac_mode = mode; | 747 | csr->edac_mode = mode; |
| 716 | csr->mtype = mtype; | 748 | csr->mtype = mtype; |
| 749 | snprintf(csr->channels[0].label, | ||
| 750 | sizeof(csr->channels[0].label), | ||
| 751 | "CPU#%uChannel#%u_DIMM#%u", | ||
| 752 | pvt->i7core_dev->socket, i, j); | ||
| 717 | 753 | ||
| 718 | csrow++; | 754 | csrow++; |
| 719 | } | 755 | } |
| @@ -731,7 +767,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci) | |||
| 731 | debugf1("\t\t%#x\t%#x\t%#x\n", | 767 | debugf1("\t\t%#x\t%#x\t%#x\n", |
| 732 | (value[j] >> 27) & 0x1, | 768 | (value[j] >> 27) & 0x1, |
| 733 | (value[j] >> 24) & 0x7, | 769 | (value[j] >> 24) & 0x7, |
| 734 | (value[j] && ((1 << 24) - 1))); | 770 | (value[j] & ((1 << 24) - 1))); |
| 735 | } | 771 | } |
| 736 | 772 | ||
| 737 | return 0; | 773 | return 0; |
| @@ -1324,6 +1360,20 @@ static int i7core_get_onedevice(struct pci_dev **prev, | |||
| 1324 | pdev = pci_get_device(PCI_VENDOR_ID_INTEL, | 1360 | pdev = pci_get_device(PCI_VENDOR_ID_INTEL, |
| 1325 | dev_descr->dev_id, *prev); | 1361 | dev_descr->dev_id, *prev); |
| 1326 | 1362 | ||
| 1363 | /* | ||
| 1364 | * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs | ||
| 1365 | * is at addr 8086:2c40, instead of 8086:2c41. So, we need | ||
| 1366 | * to probe for the alternate address in case of failure | ||
| 1367 | */ | ||
| 1368 | if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) | ||
| 1369 | pdev = pci_get_device(PCI_VENDOR_ID_INTEL, | ||
| 1370 | PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev); | ||
| 1371 | |||
| 1372 | if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev) | ||
| 1373 | pdev = pci_get_device(PCI_VENDOR_ID_INTEL, | ||
| 1374 | PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT, | ||
| 1375 | *prev); | ||
| 1376 | |||
| 1327 | if (!pdev) { | 1377 | if (!pdev) { |
| 1328 | if (*prev) { | 1378 | if (*prev) { |
| 1329 | *prev = pdev; | 1379 | *prev = pdev; |
| @@ -1444,8 +1494,10 @@ static int mci_bind_devs(struct mem_ctl_info *mci, | |||
| 1444 | struct i7core_pvt *pvt = mci->pvt_info; | 1494 | struct i7core_pvt *pvt = mci->pvt_info; |
| 1445 | struct pci_dev *pdev; | 1495 | struct pci_dev *pdev; |
| 1446 | int i, func, slot; | 1496 | int i, func, slot; |
| 1497 | char *family; | ||
| 1447 | 1498 | ||
| 1448 | pvt->is_registered = 0; | 1499 | pvt->is_registered = false; |
| 1500 | pvt->enable_scrub = false; | ||
| 1449 | for (i = 0; i < i7core_dev->n_devs; i++) { | 1501 | for (i = 0; i < i7core_dev->n_devs; i++) { |
| 1450 | pdev = i7core_dev->pdev[i]; | 1502 | pdev = i7core_dev->pdev[i]; |
| 1451 | if (!pdev) | 1503 | if (!pdev) |
| @@ -1461,9 +1513,37 @@ static int mci_bind_devs(struct mem_ctl_info *mci, | |||
| 1461 | if (unlikely(func > MAX_CHAN_FUNC)) | 1513 | if (unlikely(func > MAX_CHAN_FUNC)) |
| 1462 | goto error; | 1514 | goto error; |
| 1463 | pvt->pci_ch[slot - 4][func] = pdev; | 1515 | pvt->pci_ch[slot - 4][func] = pdev; |
| 1464 | } else if (!slot && !func) | 1516 | } else if (!slot && !func) { |
| 1465 | pvt->pci_noncore = pdev; | 1517 | pvt->pci_noncore = pdev; |
| 1466 | else | 1518 | |
| 1519 | /* Detect the processor family */ | ||
| 1520 | switch (pdev->device) { | ||
| 1521 | case PCI_DEVICE_ID_INTEL_I7_NONCORE: | ||
| 1522 | family = "Xeon 35xx/ i7core"; | ||
| 1523 | pvt->enable_scrub = false; | ||
| 1524 | break; | ||
| 1525 | case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT: | ||
| 1526 | family = "i7-800/i5-700"; | ||
| 1527 | pvt->enable_scrub = false; | ||
| 1528 | break; | ||
| 1529 | case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE: | ||
| 1530 | family = "Xeon 34xx"; | ||
| 1531 | pvt->enable_scrub = false; | ||
| 1532 | break; | ||
| 1533 | case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT: | ||
| 1534 | family = "Xeon 55xx"; | ||
| 1535 | pvt->enable_scrub = true; | ||
| 1536 | break; | ||
| 1537 | case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2: | ||
| 1538 | family = "Xeon 56xx / i7-900"; | ||
| 1539 | pvt->enable_scrub = true; | ||
| 1540 | break; | ||
| 1541 | default: | ||
| 1542 | family = "unknown"; | ||
| 1543 | pvt->enable_scrub = false; | ||
| 1544 | } | ||
| 1545 | debugf0("Detected a processor type %s\n", family); | ||
| 1546 | } else | ||
| 1467 | goto error; | 1547 | goto error; |
| 1468 | 1548 | ||
| 1469 | debugf0("Associated fn %d.%d, dev = %p, socket %d\n", | 1549 | debugf0("Associated fn %d.%d, dev = %p, socket %d\n", |
| @@ -1472,7 +1552,7 @@ static int mci_bind_devs(struct mem_ctl_info *mci, | |||
| 1472 | 1552 | ||
| 1473 | if (PCI_SLOT(pdev->devfn) == 3 && | 1553 | if (PCI_SLOT(pdev->devfn) == 3 && |
| 1474 | PCI_FUNC(pdev->devfn) == 2) | 1554 | PCI_FUNC(pdev->devfn) == 2) |
| 1475 | pvt->is_registered = 1; | 1555 | pvt->is_registered = true; |
| 1476 | } | 1556 | } |
| 1477 | 1557 | ||
| 1478 | return 0; | 1558 | return 0; |
| @@ -1826,33 +1906,43 @@ check_ce_error: | |||
| 1826 | * WARNING: As this routine should be called at NMI time, extra care should | 1906 | * WARNING: As this routine should be called at NMI time, extra care should |
| 1827 | * be taken to avoid deadlocks, and to be as fast as possible. | 1907 | * be taken to avoid deadlocks, and to be as fast as possible. |
| 1828 | */ | 1908 | */ |
| 1829 | static int i7core_mce_check_error(void *priv, struct mce *mce) | 1909 | static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, |
| 1910 | void *data) | ||
| 1830 | { | 1911 | { |
| 1831 | struct mem_ctl_info *mci = priv; | 1912 | struct mce *mce = (struct mce *)data; |
| 1832 | struct i7core_pvt *pvt = mci->pvt_info; | 1913 | struct i7core_dev *i7_dev; |
| 1914 | struct mem_ctl_info *mci; | ||
| 1915 | struct i7core_pvt *pvt; | ||
| 1916 | |||
| 1917 | i7_dev = get_i7core_dev(mce->socketid); | ||
| 1918 | if (!i7_dev) | ||
| 1919 | return NOTIFY_BAD; | ||
| 1920 | |||
| 1921 | mci = i7_dev->mci; | ||
| 1922 | pvt = mci->pvt_info; | ||
| 1833 | 1923 | ||
| 1834 | /* | 1924 | /* |
| 1835 | * Just let mcelog handle it if the error is | 1925 | * Just let mcelog handle it if the error is |
| 1836 | * outside the memory controller | 1926 | * outside the memory controller |
| 1837 | */ | 1927 | */ |
| 1838 | if (((mce->status & 0xffff) >> 7) != 1) | 1928 | if (((mce->status & 0xffff) >> 7) != 1) |
| 1839 | return 0; | 1929 | return NOTIFY_DONE; |
| 1840 | 1930 | ||
| 1841 | /* Bank 8 registers are the only ones that we know how to handle */ | 1931 | /* Bank 8 registers are the only ones that we know how to handle */ |
| 1842 | if (mce->bank != 8) | 1932 | if (mce->bank != 8) |
| 1843 | return 0; | 1933 | return NOTIFY_DONE; |
| 1844 | 1934 | ||
| 1845 | #ifdef CONFIG_SMP | 1935 | #ifdef CONFIG_SMP |
| 1846 | /* Only handle if it is the right mc controller */ | 1936 | /* Only handle if it is the right mc controller */ |
| 1847 | if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket) | 1937 | if (mce->socketid != pvt->i7core_dev->socket) |
| 1848 | return 0; | 1938 | return NOTIFY_DONE; |
| 1849 | #endif | 1939 | #endif |
| 1850 | 1940 | ||
| 1851 | smp_rmb(); | 1941 | smp_rmb(); |
| 1852 | if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { | 1942 | if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { |
| 1853 | smp_wmb(); | 1943 | smp_wmb(); |
| 1854 | pvt->mce_overrun++; | 1944 | pvt->mce_overrun++; |
| 1855 | return 0; | 1945 | return NOTIFY_DONE; |
| 1856 | } | 1946 | } |
| 1857 | 1947 | ||
| 1858 | /* Copy memory error at the ringbuffer */ | 1948 | /* Copy memory error at the ringbuffer */ |
| @@ -1865,7 +1955,240 @@ static int i7core_mce_check_error(void *priv, struct mce *mce) | |||
| 1865 | i7core_check_error(mci); | 1955 | i7core_check_error(mci); |
| 1866 | 1956 | ||
| 1867 | /* Advise mcelog that the errors were handled */ | 1957 | /* Advise mcelog that the errors were handled */ |
| 1868 | return 1; | 1958 | return NOTIFY_STOP; |
| 1959 | } | ||
| 1960 | |||
| 1961 | static struct notifier_block i7_mce_dec = { | ||
| 1962 | .notifier_call = i7core_mce_check_error, | ||
| 1963 | }; | ||
| 1964 | |||
| 1965 | struct memdev_dmi_entry { | ||
| 1966 | u8 type; | ||
| 1967 | u8 length; | ||
| 1968 | u16 handle; | ||
| 1969 | u16 phys_mem_array_handle; | ||
| 1970 | u16 mem_err_info_handle; | ||
| 1971 | u16 total_width; | ||
| 1972 | u16 data_width; | ||
| 1973 | u16 size; | ||
| 1974 | u8 form; | ||
| 1975 | u8 device_set; | ||
| 1976 | u8 device_locator; | ||
| 1977 | u8 bank_locator; | ||
| 1978 | u8 memory_type; | ||
| 1979 | u16 type_detail; | ||
| 1980 | u16 speed; | ||
| 1981 | u8 manufacturer; | ||
| 1982 | u8 serial_number; | ||
| 1983 | u8 asset_tag; | ||
| 1984 | u8 part_number; | ||
| 1985 | u8 attributes; | ||
| 1986 | u32 extended_size; | ||
| 1987 | u16 conf_mem_clk_speed; | ||
| 1988 | } __attribute__((__packed__)); | ||
| 1989 | |||
| 1990 | |||
| 1991 | /* | ||
| 1992 | * Decode the DRAM Clock Frequency, be paranoid, make sure that all | ||
| 1993 | * memory devices show the same speed, and if they don't then consider | ||
| 1994 | * all speeds to be invalid. | ||
| 1995 | */ | ||
| 1996 | static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq) | ||
| 1997 | { | ||
| 1998 | int *dclk_freq = _dclk_freq; | ||
| 1999 | u16 dmi_mem_clk_speed; | ||
| 2000 | |||
| 2001 | if (*dclk_freq == -1) | ||
| 2002 | return; | ||
| 2003 | |||
| 2004 | if (dh->type == DMI_ENTRY_MEM_DEVICE) { | ||
| 2005 | struct memdev_dmi_entry *memdev_dmi_entry = | ||
| 2006 | (struct memdev_dmi_entry *)dh; | ||
| 2007 | unsigned long conf_mem_clk_speed_offset = | ||
| 2008 | (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed - | ||
| 2009 | (unsigned long)&memdev_dmi_entry->type; | ||
| 2010 | unsigned long speed_offset = | ||
| 2011 | (unsigned long)&memdev_dmi_entry->speed - | ||
| 2012 | (unsigned long)&memdev_dmi_entry->type; | ||
| 2013 | |||
| 2014 | /* Check that a DIMM is present */ | ||
| 2015 | if (memdev_dmi_entry->size == 0) | ||
| 2016 | return; | ||
| 2017 | |||
| 2018 | /* | ||
| 2019 | * Pick the configured speed if it's available, otherwise | ||
| 2020 | * pick the DIMM speed, or we don't have a speed. | ||
| 2021 | */ | ||
| 2022 | if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) { | ||
| 2023 | dmi_mem_clk_speed = | ||
| 2024 | memdev_dmi_entry->conf_mem_clk_speed; | ||
| 2025 | } else if (memdev_dmi_entry->length > speed_offset) { | ||
| 2026 | dmi_mem_clk_speed = memdev_dmi_entry->speed; | ||
| 2027 | } else { | ||
| 2028 | *dclk_freq = -1; | ||
| 2029 | return; | ||
| 2030 | } | ||
| 2031 | |||
| 2032 | if (*dclk_freq == 0) { | ||
| 2033 | /* First pass, speed was 0 */ | ||
| 2034 | if (dmi_mem_clk_speed > 0) { | ||
| 2035 | /* Set speed if a valid speed is read */ | ||
| 2036 | *dclk_freq = dmi_mem_clk_speed; | ||
| 2037 | } else { | ||
| 2038 | /* Otherwise we don't have a valid speed */ | ||
| 2039 | *dclk_freq = -1; | ||
| 2040 | } | ||
| 2041 | } else if (*dclk_freq > 0 && | ||
| 2042 | *dclk_freq != dmi_mem_clk_speed) { | ||
| 2043 | /* | ||
| 2044 | * If we have a speed, check that all DIMMS are the same | ||
| 2045 | * speed, otherwise set the speed as invalid. | ||
| 2046 | */ | ||
| 2047 | *dclk_freq = -1; | ||
| 2048 | } | ||
| 2049 | } | ||
| 2050 | } | ||
| 2051 | |||
| 2052 | /* | ||
| 2053 | * The default DCLK frequency is used as a fallback if we | ||
| 2054 | * fail to find anything reliable in the DMI. The value | ||
| 2055 | * is taken straight from the datasheet. | ||
| 2056 | */ | ||
| 2057 | #define DEFAULT_DCLK_FREQ 800 | ||
| 2058 | |||
| 2059 | static int get_dclk_freq(void) | ||
| 2060 | { | ||
| 2061 | int dclk_freq = 0; | ||
| 2062 | |||
| 2063 | dmi_walk(decode_dclk, (void *)&dclk_freq); | ||
| 2064 | |||
| 2065 | if (dclk_freq < 1) | ||
| 2066 | return DEFAULT_DCLK_FREQ; | ||
| 2067 | |||
| 2068 | return dclk_freq; | ||
| 2069 | } | ||
| 2070 | |||
| 2071 | /* | ||
| 2072 | * set_sdram_scrub_rate This routine sets byte/sec bandwidth scrub rate | ||
| 2073 | * to hardware according to SCRUBINTERVAL formula | ||
| 2074 | * found in datasheet. | ||
| 2075 | */ | ||
| 2076 | static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw) | ||
| 2077 | { | ||
| 2078 | struct i7core_pvt *pvt = mci->pvt_info; | ||
| 2079 | struct pci_dev *pdev; | ||
| 2080 | u32 dw_scrub; | ||
| 2081 | u32 dw_ssr; | ||
| 2082 | |||
| 2083 | /* Get data from the MC register, function 2 */ | ||
| 2084 | pdev = pvt->pci_mcr[2]; | ||
| 2085 | if (!pdev) | ||
| 2086 | return -ENODEV; | ||
| 2087 | |||
| 2088 | pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub); | ||
| 2089 | |||
| 2090 | if (new_bw == 0) { | ||
| 2091 | /* Prepare to disable petrol scrub */ | ||
| 2092 | dw_scrub &= ~STARTSCRUB; | ||
| 2093 | /* Stop the patrol scrub engine */ | ||
| 2094 | write_and_test(pdev, MC_SCRUB_CONTROL, | ||
| 2095 | dw_scrub & ~SCRUBINTERVAL_MASK); | ||
| 2096 | |||
| 2097 | /* Get current status of scrub rate and set bit to disable */ | ||
| 2098 | pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr); | ||
| 2099 | dw_ssr &= ~SSR_MODE_MASK; | ||
| 2100 | dw_ssr |= SSR_MODE_DISABLE; | ||
| 2101 | } else { | ||
| 2102 | const int cache_line_size = 64; | ||
| 2103 | const u32 freq_dclk_mhz = pvt->dclk_freq; | ||
| 2104 | unsigned long long scrub_interval; | ||
| 2105 | /* | ||
| 2106 | * Translate the desired scrub rate to a register value and | ||
| 2107 | * program the corresponding register value. | ||
| 2108 | */ | ||
| 2109 | scrub_interval = (unsigned long long)freq_dclk_mhz * | ||
| 2110 | cache_line_size * 1000000; | ||
| 2111 | do_div(scrub_interval, new_bw); | ||
| 2112 | |||
| 2113 | if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK) | ||
| 2114 | return -EINVAL; | ||
| 2115 | |||
| 2116 | dw_scrub = SCRUBINTERVAL_MASK & scrub_interval; | ||
| 2117 | |||
| 2118 | /* Start the patrol scrub engine */ | ||
| 2119 | pci_write_config_dword(pdev, MC_SCRUB_CONTROL, | ||
| 2120 | STARTSCRUB | dw_scrub); | ||
| 2121 | |||
| 2122 | /* Get current status of scrub rate and set bit to enable */ | ||
| 2123 | pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr); | ||
| 2124 | dw_ssr &= ~SSR_MODE_MASK; | ||
| 2125 | dw_ssr |= SSR_MODE_ENABLE; | ||
| 2126 | } | ||
| 2127 | /* Disable or enable scrubbing */ | ||
| 2128 | pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr); | ||
| 2129 | |||
| 2130 | return new_bw; | ||
| 2131 | } | ||
| 2132 | |||
| 2133 | /* | ||
| 2134 | * get_sdram_scrub_rate This routine convert current scrub rate value | ||
| 2135 | * into byte/sec bandwidth accourding to | ||
| 2136 | * SCRUBINTERVAL formula found in datasheet. | ||
| 2137 | */ | ||
| 2138 | static int get_sdram_scrub_rate(struct mem_ctl_info *mci) | ||
| 2139 | { | ||
| 2140 | struct i7core_pvt *pvt = mci->pvt_info; | ||
| 2141 | struct pci_dev *pdev; | ||
| 2142 | const u32 cache_line_size = 64; | ||
| 2143 | const u32 freq_dclk_mhz = pvt->dclk_freq; | ||
| 2144 | unsigned long long scrub_rate; | ||
| 2145 | u32 scrubval; | ||
| 2146 | |||
| 2147 | /* Get data from the MC register, function 2 */ | ||
| 2148 | pdev = pvt->pci_mcr[2]; | ||
| 2149 | if (!pdev) | ||
| 2150 | return -ENODEV; | ||
| 2151 | |||
| 2152 | /* Get current scrub control data */ | ||
| 2153 | pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval); | ||
| 2154 | |||
| 2155 | /* Mask highest 8-bits to 0 */ | ||
| 2156 | scrubval &= SCRUBINTERVAL_MASK; | ||
| 2157 | if (!scrubval) | ||
| 2158 | return 0; | ||
| 2159 | |||
| 2160 | /* Calculate scrub rate value into byte/sec bandwidth */ | ||
| 2161 | scrub_rate = (unsigned long long)freq_dclk_mhz * | ||
| 2162 | 1000000 * cache_line_size; | ||
| 2163 | do_div(scrub_rate, scrubval); | ||
| 2164 | return (int)scrub_rate; | ||
| 2165 | } | ||
| 2166 | |||
| 2167 | static void enable_sdram_scrub_setting(struct mem_ctl_info *mci) | ||
| 2168 | { | ||
| 2169 | struct i7core_pvt *pvt = mci->pvt_info; | ||
| 2170 | u32 pci_lock; | ||
| 2171 | |||
| 2172 | /* Unlock writes to pci registers */ | ||
| 2173 | pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock); | ||
| 2174 | pci_lock &= ~0x3; | ||
| 2175 | pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, | ||
| 2176 | pci_lock | MC_CFG_UNLOCK); | ||
| 2177 | |||
| 2178 | mci->set_sdram_scrub_rate = set_sdram_scrub_rate; | ||
| 2179 | mci->get_sdram_scrub_rate = get_sdram_scrub_rate; | ||
| 2180 | } | ||
| 2181 | |||
| 2182 | static void disable_sdram_scrub_setting(struct mem_ctl_info *mci) | ||
| 2183 | { | ||
| 2184 | struct i7core_pvt *pvt = mci->pvt_info; | ||
| 2185 | u32 pci_lock; | ||
| 2186 | |||
| 2187 | /* Lock writes to pci registers */ | ||
| 2188 | pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock); | ||
| 2189 | pci_lock &= ~0x3; | ||
| 2190 | pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, | ||
| 2191 | pci_lock | MC_CFG_LOCK); | ||
| 1869 | } | 2192 | } |
| 1870 | 2193 | ||
| 1871 | static void i7core_pci_ctl_create(struct i7core_pvt *pvt) | 2194 | static void i7core_pci_ctl_create(struct i7core_pvt *pvt) |
| @@ -1874,7 +2197,8 @@ static void i7core_pci_ctl_create(struct i7core_pvt *pvt) | |||
| 1874 | &pvt->i7core_dev->pdev[0]->dev, | 2197 | &pvt->i7core_dev->pdev[0]->dev, |
| 1875 | EDAC_MOD_STR); | 2198 | EDAC_MOD_STR); |
| 1876 | if (unlikely(!pvt->i7core_pci)) | 2199 | if (unlikely(!pvt->i7core_pci)) |
| 1877 | pr_warn("Unable to setup PCI error report via EDAC\n"); | 2200 | i7core_printk(KERN_WARNING, |
| 2201 | "Unable to setup PCI error report via EDAC\n"); | ||
| 1878 | } | 2202 | } |
| 1879 | 2203 | ||
| 1880 | static void i7core_pci_ctl_release(struct i7core_pvt *pvt) | 2204 | static void i7core_pci_ctl_release(struct i7core_pvt *pvt) |
| @@ -1906,8 +2230,11 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev) | |||
| 1906 | debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", | 2230 | debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", |
| 1907 | __func__, mci, &i7core_dev->pdev[0]->dev); | 2231 | __func__, mci, &i7core_dev->pdev[0]->dev); |
| 1908 | 2232 | ||
| 1909 | /* Disable MCE NMI handler */ | 2233 | /* Disable scrubrate setting */ |
| 1910 | edac_mce_unregister(&pvt->edac_mce); | 2234 | if (pvt->enable_scrub) |
| 2235 | disable_sdram_scrub_setting(mci); | ||
| 2236 | |||
| 2237 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec); | ||
| 1911 | 2238 | ||
| 1912 | /* Disable EDAC polling */ | 2239 | /* Disable EDAC polling */ |
| 1913 | i7core_pci_ctl_release(pvt); | 2240 | i7core_pci_ctl_release(pvt); |
| @@ -1979,6 +2306,10 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) | |||
| 1979 | /* Set the function pointer to an actual operation function */ | 2306 | /* Set the function pointer to an actual operation function */ |
| 1980 | mci->edac_check = i7core_check_error; | 2307 | mci->edac_check = i7core_check_error; |
| 1981 | 2308 | ||
| 2309 | /* Enable scrubrate setting */ | ||
| 2310 | if (pvt->enable_scrub) | ||
| 2311 | enable_sdram_scrub_setting(mci); | ||
| 2312 | |||
| 1982 | /* add this new MC control structure to EDAC's list of MCs */ | 2313 | /* add this new MC control structure to EDAC's list of MCs */ |
| 1983 | if (unlikely(edac_mc_add_mc(mci))) { | 2314 | if (unlikely(edac_mc_add_mc(mci))) { |
| 1984 | debugf0("MC: " __FILE__ | 2315 | debugf0("MC: " __FILE__ |
| @@ -2002,21 +2333,13 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) | |||
| 2002 | /* allocating generic PCI control info */ | 2333 | /* allocating generic PCI control info */ |
| 2003 | i7core_pci_ctl_create(pvt); | 2334 | i7core_pci_ctl_create(pvt); |
| 2004 | 2335 | ||
| 2005 | /* Registers on edac_mce in order to receive memory errors */ | 2336 | /* DCLK for scrub rate setting */ |
| 2006 | pvt->edac_mce.priv = mci; | 2337 | pvt->dclk_freq = get_dclk_freq(); |
| 2007 | pvt->edac_mce.check_error = i7core_mce_check_error; | 2338 | |
| 2008 | rc = edac_mce_register(&pvt->edac_mce); | 2339 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec); |
| 2009 | if (unlikely(rc < 0)) { | ||
| 2010 | debugf0("MC: " __FILE__ | ||
| 2011 | ": %s(): failed edac_mce_register()\n", __func__); | ||
| 2012 | goto fail1; | ||
| 2013 | } | ||
| 2014 | 2340 | ||
| 2015 | return 0; | 2341 | return 0; |
| 2016 | 2342 | ||
| 2017 | fail1: | ||
| 2018 | i7core_pci_ctl_release(pvt); | ||
| 2019 | edac_mc_del_mc(mci->dev); | ||
| 2020 | fail0: | 2343 | fail0: |
| 2021 | kfree(mci->ctl_name); | 2344 | kfree(mci->ctl_name); |
| 2022 | edac_mc_free(mci); | 2345 | edac_mc_free(mci); |
| @@ -2035,7 +2358,7 @@ fail0: | |||
| 2035 | static int __devinit i7core_probe(struct pci_dev *pdev, | 2358 | static int __devinit i7core_probe(struct pci_dev *pdev, |
| 2036 | const struct pci_device_id *id) | 2359 | const struct pci_device_id *id) |
| 2037 | { | 2360 | { |
| 2038 | int rc; | 2361 | int rc, count = 0; |
| 2039 | struct i7core_dev *i7core_dev; | 2362 | struct i7core_dev *i7core_dev; |
| 2040 | 2363 | ||
| 2041 | /* get the pci devices we want to reserve for our use */ | 2364 | /* get the pci devices we want to reserve for our use */ |
| @@ -2055,12 +2378,28 @@ static int __devinit i7core_probe(struct pci_dev *pdev, | |||
| 2055 | goto fail0; | 2378 | goto fail0; |
| 2056 | 2379 | ||
| 2057 | list_for_each_entry(i7core_dev, &i7core_edac_list, list) { | 2380 | list_for_each_entry(i7core_dev, &i7core_edac_list, list) { |
| 2381 | count++; | ||
| 2058 | rc = i7core_register_mci(i7core_dev); | 2382 | rc = i7core_register_mci(i7core_dev); |
| 2059 | if (unlikely(rc < 0)) | 2383 | if (unlikely(rc < 0)) |
| 2060 | goto fail1; | 2384 | goto fail1; |
| 2061 | } | 2385 | } |
| 2062 | 2386 | ||
| 2063 | i7core_printk(KERN_INFO, "Driver loaded.\n"); | 2387 | /* |
| 2388 | * Nehalem-EX uses a different memory controller. However, as the | ||
| 2389 | * memory controller is not visible on some Nehalem/Nehalem-EP, we | ||
| 2390 | * need to indirectly probe via a X58 PCI device. The same devices | ||
| 2391 | * are found on (some) Nehalem-EX. So, on those machines, the | ||
| 2392 | * probe routine needs to return -ENODEV, as the actual Memory | ||
| 2393 | * Controller registers won't be detected. | ||
| 2394 | */ | ||
| 2395 | if (!count) { | ||
| 2396 | rc = -ENODEV; | ||
| 2397 | goto fail1; | ||
| 2398 | } | ||
| 2399 | |||
| 2400 | i7core_printk(KERN_INFO, | ||
| 2401 | "Driver loaded, %d memory controller(s) found.\n", | ||
| 2402 | count); | ||
| 2064 | 2403 | ||
| 2065 | mutex_unlock(&i7core_edac_lock); | 2404 | mutex_unlock(&i7core_edac_lock); |
| 2066 | return 0; | 2405 | return 0; |
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 795cfbc0bf50..d0864d9c38ad 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c | |||
| @@ -9,7 +9,7 @@ static u8 xec_mask = 0xf; | |||
| 9 | static u8 nb_err_cpumask = 0xf; | 9 | static u8 nb_err_cpumask = 0xf; |
| 10 | 10 | ||
| 11 | static bool report_gart_errors; | 11 | static bool report_gart_errors; |
| 12 | static void (*nb_bus_decoder)(int node_id, struct mce *m, u32 nbcfg); | 12 | static void (*nb_bus_decoder)(int node_id, struct mce *m); |
| 13 | 13 | ||
| 14 | void amd_report_gart_errors(bool v) | 14 | void amd_report_gart_errors(bool v) |
| 15 | { | 15 | { |
| @@ -17,13 +17,13 @@ void amd_report_gart_errors(bool v) | |||
| 17 | } | 17 | } |
| 18 | EXPORT_SYMBOL_GPL(amd_report_gart_errors); | 18 | EXPORT_SYMBOL_GPL(amd_report_gart_errors); |
| 19 | 19 | ||
| 20 | void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32)) | 20 | void amd_register_ecc_decoder(void (*f)(int, struct mce *)) |
| 21 | { | 21 | { |
| 22 | nb_bus_decoder = f; | 22 | nb_bus_decoder = f; |
| 23 | } | 23 | } |
| 24 | EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); | 24 | EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); |
| 25 | 25 | ||
| 26 | void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32)) | 26 | void amd_unregister_ecc_decoder(void (*f)(int, struct mce *)) |
| 27 | { | 27 | { |
| 28 | if (nb_bus_decoder) { | 28 | if (nb_bus_decoder) { |
| 29 | WARN_ON(nb_bus_decoder != f); | 29 | WARN_ON(nb_bus_decoder != f); |
| @@ -592,31 +592,14 @@ static bool nb_noop_mce(u16 ec, u8 xec) | |||
| 592 | return false; | 592 | return false; |
| 593 | } | 593 | } |
| 594 | 594 | ||
| 595 | void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg) | 595 | void amd_decode_nb_mce(struct mce *m) |
| 596 | { | 596 | { |
| 597 | struct cpuinfo_x86 *c = &boot_cpu_data; | 597 | struct cpuinfo_x86 *c = &boot_cpu_data; |
| 598 | u16 ec = EC(m->status); | 598 | int node_id = amd_get_nb_id(m->extcpu); |
| 599 | u8 xec = XEC(m->status, 0x1f); | 599 | u16 ec = EC(m->status); |
| 600 | u32 nbsh = (u32)(m->status >> 32); | 600 | u8 xec = XEC(m->status, 0x1f); |
| 601 | int core = -1; | ||
| 602 | |||
| 603 | pr_emerg(HW_ERR "Northbridge Error (node %d", node_id); | ||
| 604 | |||
| 605 | /* F10h, revD can disable ErrCpu[3:0] through ErrCpuVal */ | ||
| 606 | if (c->x86 == 0x10 && c->x86_model > 7) { | ||
| 607 | if (nbsh & NBSH_ERR_CPU_VAL) | ||
| 608 | core = nbsh & nb_err_cpumask; | ||
| 609 | } else { | ||
| 610 | u8 assoc_cpus = nbsh & nb_err_cpumask; | ||
| 611 | |||
| 612 | if (assoc_cpus > 0) | ||
| 613 | core = fls(assoc_cpus) - 1; | ||
| 614 | } | ||
| 615 | 601 | ||
| 616 | if (core >= 0) | 602 | pr_emerg(HW_ERR "Northbridge Error (node %d): ", node_id); |
| 617 | pr_cont(", core %d): ", core); | ||
| 618 | else | ||
| 619 | pr_cont("): "); | ||
| 620 | 603 | ||
| 621 | switch (xec) { | 604 | switch (xec) { |
| 622 | case 0x2: | 605 | case 0x2: |
| @@ -648,7 +631,7 @@ void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg) | |||
| 648 | 631 | ||
| 649 | if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x15) | 632 | if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x15) |
| 650 | if ((xec == 0x8 || xec == 0x0) && nb_bus_decoder) | 633 | if ((xec == 0x8 || xec == 0x0) && nb_bus_decoder) |
| 651 | nb_bus_decoder(node_id, m, nbcfg); | 634 | nb_bus_decoder(node_id, m); |
| 652 | 635 | ||
| 653 | return; | 636 | return; |
| 654 | 637 | ||
| @@ -764,13 +747,13 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) | |||
| 764 | { | 747 | { |
| 765 | struct mce *m = (struct mce *)data; | 748 | struct mce *m = (struct mce *)data; |
| 766 | struct cpuinfo_x86 *c = &boot_cpu_data; | 749 | struct cpuinfo_x86 *c = &boot_cpu_data; |
| 767 | int node, ecc; | 750 | int ecc; |
| 768 | 751 | ||
| 769 | if (amd_filter_mce(m)) | 752 | if (amd_filter_mce(m)) |
| 770 | return NOTIFY_STOP; | 753 | return NOTIFY_STOP; |
| 771 | 754 | ||
| 772 | pr_emerg(HW_ERR "MC%d_STATUS[%s|%s|%s|%s|%s", | 755 | pr_emerg(HW_ERR "CPU:%d\tMC%d_STATUS[%s|%s|%s|%s|%s", |
| 773 | m->bank, | 756 | m->extcpu, m->bank, |
| 774 | ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), | 757 | ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), |
| 775 | ((m->status & MCI_STATUS_UC) ? "UE" : "CE"), | 758 | ((m->status & MCI_STATUS_UC) ? "UE" : "CE"), |
| 776 | ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"), | 759 | ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"), |
| @@ -789,6 +772,8 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) | |||
| 789 | 772 | ||
| 790 | pr_cont("]: 0x%016llx\n", m->status); | 773 | pr_cont("]: 0x%016llx\n", m->status); |
| 791 | 774 | ||
| 775 | if (m->status & MCI_STATUS_ADDRV) | ||
| 776 | pr_emerg(HW_ERR "\tMC%d_ADDR: 0x%016llx\n", m->bank, m->addr); | ||
| 792 | 777 | ||
| 793 | switch (m->bank) { | 778 | switch (m->bank) { |
| 794 | case 0: | 779 | case 0: |
| @@ -811,8 +796,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) | |||
| 811 | break; | 796 | break; |
| 812 | 797 | ||
| 813 | case 4: | 798 | case 4: |
| 814 | node = amd_get_nb_id(m->extcpu); | 799 | amd_decode_nb_mce(m); |
| 815 | amd_decode_nb_mce(node, m, 0); | ||
| 816 | break; | 800 | break; |
| 817 | 801 | ||
| 818 | case 5: | 802 | case 5: |
diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 795a3206acf5..0106747e240c 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h | |||
| @@ -86,9 +86,9 @@ struct amd_decoder_ops { | |||
| 86 | }; | 86 | }; |
| 87 | 87 | ||
| 88 | void amd_report_gart_errors(bool); | 88 | void amd_report_gart_errors(bool); |
| 89 | void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32)); | 89 | void amd_register_ecc_decoder(void (*f)(int, struct mce *)); |
| 90 | void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32)); | 90 | void amd_unregister_ecc_decoder(void (*f)(int, struct mce *)); |
| 91 | void amd_decode_nb_mce(int, struct mce *, u32); | 91 | void amd_decode_nb_mce(struct mce *); |
| 92 | int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data); | 92 | int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data); |
| 93 | 93 | ||
| 94 | #endif /* _EDAC_MCE_AMD_H */ | 94 | #endif /* _EDAC_MCE_AMD_H */ |
diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c index a4987e03f59e..73c3e26a0bce 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/drivers/edac/mce_amd_inj.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/kobject.h> | 13 | #include <linux/kobject.h> |
| 14 | #include <linux/sysdev.h> | 14 | #include <linux/sysdev.h> |
| 15 | #include <linux/edac.h> | 15 | #include <linux/edac.h> |
| 16 | #include <linux/module.h> | ||
| 16 | #include <asm/mce.h> | 17 | #include <asm/mce.h> |
| 17 | 18 | ||
| 18 | #include "mce_amd.h" | 19 | #include "mce_amd.h" |
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index 0de7d8770891..38400963e245 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c | |||
| @@ -205,7 +205,7 @@ static struct platform_driver ppc4xx_edac_driver = { | |||
| 205 | .remove = ppc4xx_edac_remove, | 205 | .remove = ppc4xx_edac_remove, |
| 206 | .driver = { | 206 | .driver = { |
| 207 | .owner = THIS_MODULE, | 207 | .owner = THIS_MODULE, |
| 208 | .name = PPC4XX_EDAC_MODULE_NAME | 208 | .name = PPC4XX_EDAC_MODULE_NAME, |
| 209 | .of_match_table = ppc4xx_edac_match, | 209 | .of_match_table = ppc4xx_edac_match, |
| 210 | }, | 210 | }, |
| 211 | }; | 211 | }; |
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c new file mode 100644 index 000000000000..7a402bfbee7d --- /dev/null +++ b/drivers/edac/sb_edac.c | |||
| @@ -0,0 +1,1893 @@ | |||
| 1 | /* Intel Sandy Bridge -EN/-EP/-EX Memory Controller kernel module | ||
| 2 | * | ||
| 3 | * This driver supports the memory controllers found on the Intel | ||
| 4 | * processor family Sandy Bridge. | ||
| 5 | * | ||
| 6 | * This file may be distributed under the terms of the | ||
| 7 | * GNU General Public License version 2 only. | ||
| 8 | * | ||
| 9 | * Copyright (c) 2011 by: | ||
| 10 | * Mauro Carvalho Chehab <mchehab@redhat.com> | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/module.h> | ||
| 14 | #include <linux/init.h> | ||
| 15 | #include <linux/pci.h> | ||
| 16 | #include <linux/pci_ids.h> | ||
| 17 | #include <linux/slab.h> | ||
| 18 | #include <linux/delay.h> | ||
| 19 | #include <linux/edac.h> | ||
| 20 | #include <linux/mmzone.h> | ||
| 21 | #include <linux/smp.h> | ||
| 22 | #include <linux/bitmap.h> | ||
| 23 | #include <asm/processor.h> | ||
| 24 | #include <asm/mce.h> | ||
| 25 | |||
| 26 | #include "edac_core.h" | ||
| 27 | |||
| 28 | /* Static vars */ | ||
| 29 | static LIST_HEAD(sbridge_edac_list); | ||
| 30 | static DEFINE_MUTEX(sbridge_edac_lock); | ||
| 31 | static int probed; | ||
| 32 | |||
| 33 | /* | ||
| 34 | * Alter this version for the module when modifications are made | ||
| 35 | */ | ||
| 36 | #define SBRIDGE_REVISION " Ver: 1.0.0 " | ||
| 37 | #define EDAC_MOD_STR "sbridge_edac" | ||
| 38 | |||
| 39 | /* | ||
| 40 | * Debug macros | ||
| 41 | */ | ||
| 42 | #define sbridge_printk(level, fmt, arg...) \ | ||
| 43 | edac_printk(level, "sbridge", fmt, ##arg) | ||
| 44 | |||
| 45 | #define sbridge_mc_printk(mci, level, fmt, arg...) \ | ||
| 46 | edac_mc_chipset_printk(mci, level, "sbridge", fmt, ##arg) | ||
| 47 | |||
| 48 | /* | ||
| 49 | * Get a bit field at register value <v>, from bit <lo> to bit <hi> | ||
| 50 | */ | ||
| 51 | #define GET_BITFIELD(v, lo, hi) \ | ||
| 52 | (((v) & ((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) >> (lo)) | ||
| 53 | |||
| 54 | /* | ||
| 55 | * sbridge Memory Controller Registers | ||
| 56 | */ | ||
| 57 | |||
| 58 | /* | ||
| 59 | * FIXME: For now, let's order by device function, as it makes | ||
| 60 | * easier for driver's development proccess. This table should be | ||
| 61 | * moved to pci_id.h when submitted upstream | ||
| 62 | */ | ||
| 63 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */ | ||
| 64 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */ | ||
| 65 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */ | ||
| 66 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0 0x3ca0 /* 14.0 */ | ||
| 67 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA 0x3ca8 /* 15.0 */ | ||
| 68 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS 0x3c71 /* 15.1 */ | ||
| 69 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0 0x3caa /* 15.2 */ | ||
| 70 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1 0x3cab /* 15.3 */ | ||
| 71 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2 0x3cac /* 15.4 */ | ||
| 72 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3 0x3cad /* 15.5 */ | ||
| 73 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO 0x3cb8 /* 17.0 */ | ||
| 74 | |||
| 75 | /* | ||
| 76 | * Currently, unused, but will be needed in the future | ||
| 77 | * implementations, as they hold the error counters | ||
| 78 | */ | ||
| 79 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0 0x3c72 /* 16.2 */ | ||
| 80 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1 0x3c73 /* 16.3 */ | ||
| 81 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2 0x3c76 /* 16.6 */ | ||
| 82 | #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3 0x3c77 /* 16.7 */ | ||
| 83 | |||
| 84 | /* Devices 12 Function 6, Offsets 0x80 to 0xcc */ | ||
| 85 | static const u32 dram_rule[] = { | ||
| 86 | 0x80, 0x88, 0x90, 0x98, 0xa0, | ||
| 87 | 0xa8, 0xb0, 0xb8, 0xc0, 0xc8, | ||
| 88 | }; | ||
| 89 | #define MAX_SAD ARRAY_SIZE(dram_rule) | ||
| 90 | |||
| 91 | #define SAD_LIMIT(reg) ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff) | ||
| 92 | #define DRAM_ATTR(reg) GET_BITFIELD(reg, 2, 3) | ||
| 93 | #define INTERLEAVE_MODE(reg) GET_BITFIELD(reg, 1, 1) | ||
| 94 | #define DRAM_RULE_ENABLE(reg) GET_BITFIELD(reg, 0, 0) | ||
| 95 | |||
| 96 | static char *get_dram_attr(u32 reg) | ||
| 97 | { | ||
| 98 | switch(DRAM_ATTR(reg)) { | ||
| 99 | case 0: | ||
| 100 | return "DRAM"; | ||
| 101 | case 1: | ||
| 102 | return "MMCFG"; | ||
| 103 | case 2: | ||
| 104 | return "NXM"; | ||
| 105 | default: | ||
| 106 | return "unknown"; | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | static const u32 interleave_list[] = { | ||
| 111 | 0x84, 0x8c, 0x94, 0x9c, 0xa4, | ||
| 112 | 0xac, 0xb4, 0xbc, 0xc4, 0xcc, | ||
| 113 | }; | ||
| 114 | #define MAX_INTERLEAVE ARRAY_SIZE(interleave_list) | ||
| 115 | |||
| 116 | #define SAD_PKG0(reg) GET_BITFIELD(reg, 0, 2) | ||
| 117 | #define SAD_PKG1(reg) GET_BITFIELD(reg, 3, 5) | ||
| 118 | #define SAD_PKG2(reg) GET_BITFIELD(reg, 8, 10) | ||
| 119 | #define SAD_PKG3(reg) GET_BITFIELD(reg, 11, 13) | ||
| 120 | #define SAD_PKG4(reg) GET_BITFIELD(reg, 16, 18) | ||
| 121 | #define SAD_PKG5(reg) GET_BITFIELD(reg, 19, 21) | ||
| 122 | #define SAD_PKG6(reg) GET_BITFIELD(reg, 24, 26) | ||
| 123 | #define SAD_PKG7(reg) GET_BITFIELD(reg, 27, 29) | ||
| 124 | |||
| 125 | static inline int sad_pkg(u32 reg, int interleave) | ||
| 126 | { | ||
| 127 | switch (interleave) { | ||
| 128 | case 0: | ||
| 129 | return SAD_PKG0(reg); | ||
| 130 | case 1: | ||
| 131 | return SAD_PKG1(reg); | ||
| 132 | case 2: | ||
| 133 | return SAD_PKG2(reg); | ||
| 134 | case 3: | ||
| 135 | return SAD_PKG3(reg); | ||
| 136 | case 4: | ||
| 137 | return SAD_PKG4(reg); | ||
| 138 | case 5: | ||
| 139 | return SAD_PKG5(reg); | ||
| 140 | case 6: | ||
| 141 | return SAD_PKG6(reg); | ||
| 142 | case 7: | ||
| 143 | return SAD_PKG7(reg); | ||
| 144 | default: | ||
| 145 | return -EINVAL; | ||
| 146 | } | ||
| 147 | } | ||
| 148 | |||
| 149 | /* Devices 12 Function 7 */ | ||
| 150 | |||
| 151 | #define TOLM 0x80 | ||
| 152 | #define TOHM 0x84 | ||
| 153 | |||
| 154 | #define GET_TOLM(reg) ((GET_BITFIELD(reg, 0, 3) << 28) | 0x3ffffff) | ||
| 155 | #define GET_TOHM(reg) ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff) | ||
| 156 | |||
| 157 | /* Device 13 Function 6 */ | ||
| 158 | |||
| 159 | #define SAD_TARGET 0xf0 | ||
| 160 | |||
| 161 | #define SOURCE_ID(reg) GET_BITFIELD(reg, 9, 11) | ||
| 162 | |||
| 163 | #define SAD_CONTROL 0xf4 | ||
| 164 | |||
| 165 | #define NODE_ID(reg) GET_BITFIELD(reg, 0, 2) | ||
| 166 | |||
| 167 | /* Device 14 function 0 */ | ||
| 168 | |||
| 169 | static const u32 tad_dram_rule[] = { | ||
| 170 | 0x40, 0x44, 0x48, 0x4c, | ||
| 171 | 0x50, 0x54, 0x58, 0x5c, | ||
| 172 | 0x60, 0x64, 0x68, 0x6c, | ||
| 173 | }; | ||
| 174 | #define MAX_TAD ARRAY_SIZE(tad_dram_rule) | ||
| 175 | |||
| 176 | #define TAD_LIMIT(reg) ((GET_BITFIELD(reg, 12, 31) << 26) | 0x3ffffff) | ||
| 177 | #define TAD_SOCK(reg) GET_BITFIELD(reg, 10, 11) | ||
| 178 | #define TAD_CH(reg) GET_BITFIELD(reg, 8, 9) | ||
| 179 | #define TAD_TGT3(reg) GET_BITFIELD(reg, 6, 7) | ||
| 180 | #define TAD_TGT2(reg) GET_BITFIELD(reg, 4, 5) | ||
| 181 | #define TAD_TGT1(reg) GET_BITFIELD(reg, 2, 3) | ||
| 182 | #define TAD_TGT0(reg) GET_BITFIELD(reg, 0, 1) | ||
| 183 | |||
| 184 | /* Device 15, function 0 */ | ||
| 185 | |||
| 186 | #define MCMTR 0x7c | ||
| 187 | |||
| 188 | #define IS_ECC_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 2, 2) | ||
| 189 | #define IS_LOCKSTEP_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 1, 1) | ||
| 190 | #define IS_CLOSE_PG(mcmtr) GET_BITFIELD(mcmtr, 0, 0) | ||
| 191 | |||
| 192 | /* Device 15, function 1 */ | ||
| 193 | |||
| 194 | #define RASENABLES 0xac | ||
| 195 | #define IS_MIRROR_ENABLED(reg) GET_BITFIELD(reg, 0, 0) | ||
| 196 | |||
| 197 | /* Device 15, functions 2-5 */ | ||
| 198 | |||
| 199 | static const int mtr_regs[] = { | ||
| 200 | 0x80, 0x84, 0x88, | ||
| 201 | }; | ||
| 202 | |||
| 203 | #define RANK_DISABLE(mtr) GET_BITFIELD(mtr, 16, 19) | ||
| 204 | #define IS_DIMM_PRESENT(mtr) GET_BITFIELD(mtr, 14, 14) | ||
| 205 | #define RANK_CNT_BITS(mtr) GET_BITFIELD(mtr, 12, 13) | ||
| 206 | #define RANK_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 2, 4) | ||
| 207 | #define COL_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 0, 1) | ||
| 208 | |||
| 209 | static const u32 tad_ch_nilv_offset[] = { | ||
| 210 | 0x90, 0x94, 0x98, 0x9c, | ||
| 211 | 0xa0, 0xa4, 0xa8, 0xac, | ||
| 212 | 0xb0, 0xb4, 0xb8, 0xbc, | ||
| 213 | }; | ||
| 214 | #define CHN_IDX_OFFSET(reg) GET_BITFIELD(reg, 28, 29) | ||
| 215 | #define TAD_OFFSET(reg) (GET_BITFIELD(reg, 6, 25) << 26) | ||
| 216 | |||
| 217 | static const u32 rir_way_limit[] = { | ||
| 218 | 0x108, 0x10c, 0x110, 0x114, 0x118, | ||
| 219 | }; | ||
| 220 | #define MAX_RIR_RANGES ARRAY_SIZE(rir_way_limit) | ||
| 221 | |||
| 222 | #define IS_RIR_VALID(reg) GET_BITFIELD(reg, 31, 31) | ||
| 223 | #define RIR_WAY(reg) GET_BITFIELD(reg, 28, 29) | ||
| 224 | #define RIR_LIMIT(reg) ((GET_BITFIELD(reg, 1, 10) << 29)| 0x1fffffff) | ||
| 225 | |||
| 226 | #define MAX_RIR_WAY 8 | ||
| 227 | |||
| 228 | static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = { | ||
| 229 | { 0x120, 0x124, 0x128, 0x12c, 0x130, 0x134, 0x138, 0x13c }, | ||
| 230 | { 0x140, 0x144, 0x148, 0x14c, 0x150, 0x154, 0x158, 0x15c }, | ||
| 231 | { 0x160, 0x164, 0x168, 0x16c, 0x170, 0x174, 0x178, 0x17c }, | ||
| 232 | { 0x180, 0x184, 0x188, 0x18c, 0x190, 0x194, 0x198, 0x19c }, | ||
| 233 | { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc }, | ||
| 234 | }; | ||
| 235 | |||
| 236 | #define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19) | ||
| 237 | #define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14) | ||
| 238 | |||
| 239 | /* Device 16, functions 2-7 */ | ||
| 240 | |||
| 241 | /* | ||
| 242 | * FIXME: Implement the error count reads directly | ||
| 243 | */ | ||
| 244 | |||
| 245 | static const u32 correrrcnt[] = { | ||
| 246 | 0x104, 0x108, 0x10c, 0x110, | ||
| 247 | }; | ||
| 248 | |||
| 249 | #define RANK_ODD_OV(reg) GET_BITFIELD(reg, 31, 31) | ||
| 250 | #define RANK_ODD_ERR_CNT(reg) GET_BITFIELD(reg, 16, 30) | ||
| 251 | #define RANK_EVEN_OV(reg) GET_BITFIELD(reg, 15, 15) | ||
| 252 | #define RANK_EVEN_ERR_CNT(reg) GET_BITFIELD(reg, 0, 14) | ||
| 253 | |||
| 254 | static const u32 correrrthrsld[] = { | ||
| 255 | 0x11c, 0x120, 0x124, 0x128, | ||
| 256 | }; | ||
| 257 | |||
| 258 | #define RANK_ODD_ERR_THRSLD(reg) GET_BITFIELD(reg, 16, 30) | ||
| 259 | #define RANK_EVEN_ERR_THRSLD(reg) GET_BITFIELD(reg, 0, 14) | ||
| 260 | |||
| 261 | |||
| 262 | /* Device 17, function 0 */ | ||
| 263 | |||
| 264 | #define RANK_CFG_A 0x0328 | ||
| 265 | |||
| 266 | #define IS_RDIMM_ENABLED(reg) GET_BITFIELD(reg, 11, 11) | ||
| 267 | |||
| 268 | /* | ||
| 269 | * sbridge structs | ||
| 270 | */ | ||
| 271 | |||
| 272 | #define NUM_CHANNELS 4 | ||
| 273 | #define MAX_DIMMS 3 /* Max DIMMS per channel */ | ||
| 274 | |||
| 275 | struct sbridge_info { | ||
| 276 | u32 mcmtr; | ||
| 277 | }; | ||
| 278 | |||
| 279 | struct sbridge_channel { | ||
| 280 | u32 ranks; | ||
| 281 | u32 dimms; | ||
| 282 | }; | ||
| 283 | |||
| 284 | struct pci_id_descr { | ||
| 285 | int dev; | ||
| 286 | int func; | ||
| 287 | int dev_id; | ||
| 288 | int optional; | ||
| 289 | }; | ||
| 290 | |||
| 291 | struct pci_id_table { | ||
| 292 | const struct pci_id_descr *descr; | ||
| 293 | int n_devs; | ||
| 294 | }; | ||
| 295 | |||
| 296 | struct sbridge_dev { | ||
| 297 | struct list_head list; | ||
| 298 | u8 bus, mc; | ||
| 299 | u8 node_id, source_id; | ||
| 300 | struct pci_dev **pdev; | ||
| 301 | int n_devs; | ||
| 302 | struct mem_ctl_info *mci; | ||
| 303 | }; | ||
| 304 | |||
| 305 | struct sbridge_pvt { | ||
| 306 | struct pci_dev *pci_ta, *pci_ddrio, *pci_ras; | ||
| 307 | struct pci_dev *pci_sad0, *pci_sad1, *pci_ha0; | ||
| 308 | struct pci_dev *pci_br; | ||
| 309 | struct pci_dev *pci_tad[NUM_CHANNELS]; | ||
| 310 | |||
| 311 | struct sbridge_dev *sbridge_dev; | ||
| 312 | |||
| 313 | struct sbridge_info info; | ||
| 314 | struct sbridge_channel channel[NUM_CHANNELS]; | ||
| 315 | |||
| 316 | int csrow_map[NUM_CHANNELS][MAX_DIMMS]; | ||
| 317 | |||
| 318 | /* Memory type detection */ | ||
| 319 | bool is_mirrored, is_lockstep, is_close_pg; | ||
| 320 | |||
| 321 | /* Fifo double buffers */ | ||
| 322 | struct mce mce_entry[MCE_LOG_LEN]; | ||
| 323 | struct mce mce_outentry[MCE_LOG_LEN]; | ||
| 324 | |||
| 325 | /* Fifo in/out counters */ | ||
| 326 | unsigned mce_in, mce_out; | ||
| 327 | |||
| 328 | /* Count indicator to show errors not got */ | ||
| 329 | unsigned mce_overrun; | ||
| 330 | |||
| 331 | /* Memory description */ | ||
| 332 | u64 tolm, tohm; | ||
| 333 | }; | ||
| 334 | |||
| 335 | #define PCI_DESCR(device, function, device_id) \ | ||
| 336 | .dev = (device), \ | ||
| 337 | .func = (function), \ | ||
| 338 | .dev_id = (device_id) | ||
| 339 | |||
| 340 | static const struct pci_id_descr pci_dev_descr_sbridge[] = { | ||
| 341 | /* Processor Home Agent */ | ||
| 342 | { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0) }, | ||
| 343 | |||
| 344 | /* Memory controller */ | ||
| 345 | { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA) }, | ||
| 346 | { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS) }, | ||
| 347 | { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0) }, | ||
| 348 | { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1) }, | ||
| 349 | { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2) }, | ||
| 350 | { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3) }, | ||
| 351 | { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO) }, | ||
| 352 | |||
| 353 | /* System Address Decoder */ | ||
| 354 | { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0) }, | ||
| 355 | { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1) }, | ||
| 356 | |||
| 357 | /* Broadcast Registers */ | ||
| 358 | { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR) }, | ||
| 359 | }; | ||
| 360 | |||
| 361 | #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } | ||
| 362 | static const struct pci_id_table pci_dev_descr_sbridge_table[] = { | ||
| 363 | PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge), | ||
| 364 | {0,} /* 0 terminated list. */ | ||
| 365 | }; | ||
| 366 | |||
| 367 | /* | ||
| 368 | * pci_device_id table for which devices we are looking for | ||
| 369 | */ | ||
| 370 | static const struct pci_device_id sbridge_pci_tbl[] __devinitdata = { | ||
| 371 | {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)}, | ||
| 372 | {0,} /* 0 terminated list. */ | ||
| 373 | }; | ||
| 374 | |||
| 375 | |||
| 376 | /**************************************************************************** | ||
| 377 | Anciliary status routines | ||
| 378 | ****************************************************************************/ | ||
| 379 | |||
| 380 | static inline int numrank(u32 mtr) | ||
| 381 | { | ||
| 382 | int ranks = (1 << RANK_CNT_BITS(mtr)); | ||
| 383 | |||
| 384 | if (ranks > 4) { | ||
| 385 | debugf0("Invalid number of ranks: %d (max = 4) raw value = %x (%04x)", | ||
| 386 | ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr); | ||
| 387 | return -EINVAL; | ||
| 388 | } | ||
| 389 | |||
| 390 | return ranks; | ||
| 391 | } | ||
| 392 | |||
| 393 | static inline int numrow(u32 mtr) | ||
| 394 | { | ||
| 395 | int rows = (RANK_WIDTH_BITS(mtr) + 12); | ||
| 396 | |||
| 397 | if (rows < 13 || rows > 18) { | ||
| 398 | debugf0("Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)", | ||
| 399 | rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr); | ||
| 400 | return -EINVAL; | ||
| 401 | } | ||
| 402 | |||
| 403 | return 1 << rows; | ||
| 404 | } | ||
| 405 | |||
| 406 | static inline int numcol(u32 mtr) | ||
| 407 | { | ||
| 408 | int cols = (COL_WIDTH_BITS(mtr) + 10); | ||
| 409 | |||
| 410 | if (cols > 12) { | ||
| 411 | debugf0("Invalid number of cols: %d (max = 4) raw value = %x (%04x)", | ||
| 412 | cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr); | ||
| 413 | return -EINVAL; | ||
| 414 | } | ||
| 415 | |||
| 416 | return 1 << cols; | ||
| 417 | } | ||
| 418 | |||
| 419 | static struct sbridge_dev *get_sbridge_dev(u8 bus) | ||
| 420 | { | ||
| 421 | struct sbridge_dev *sbridge_dev; | ||
| 422 | |||
| 423 | list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { | ||
| 424 | if (sbridge_dev->bus == bus) | ||
| 425 | return sbridge_dev; | ||
| 426 | } | ||
| 427 | |||
| 428 | return NULL; | ||
| 429 | } | ||
| 430 | |||
| 431 | static struct sbridge_dev *alloc_sbridge_dev(u8 bus, | ||
| 432 | const struct pci_id_table *table) | ||
| 433 | { | ||
| 434 | struct sbridge_dev *sbridge_dev; | ||
| 435 | |||
| 436 | sbridge_dev = kzalloc(sizeof(*sbridge_dev), GFP_KERNEL); | ||
| 437 | if (!sbridge_dev) | ||
| 438 | return NULL; | ||
| 439 | |||
| 440 | sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs, | ||
| 441 | GFP_KERNEL); | ||
| 442 | if (!sbridge_dev->pdev) { | ||
| 443 | kfree(sbridge_dev); | ||
| 444 | return NULL; | ||
| 445 | } | ||
| 446 | |||
| 447 | sbridge_dev->bus = bus; | ||
| 448 | sbridge_dev->n_devs = table->n_devs; | ||
| 449 | list_add_tail(&sbridge_dev->list, &sbridge_edac_list); | ||
| 450 | |||
| 451 | return sbridge_dev; | ||
| 452 | } | ||
| 453 | |||
| 454 | static void free_sbridge_dev(struct sbridge_dev *sbridge_dev) | ||
| 455 | { | ||
| 456 | list_del(&sbridge_dev->list); | ||
| 457 | kfree(sbridge_dev->pdev); | ||
| 458 | kfree(sbridge_dev); | ||
| 459 | } | ||
| 460 | |||
| 461 | /**************************************************************************** | ||
| 462 | Memory check routines | ||
| 463 | ****************************************************************************/ | ||
| 464 | static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot, | ||
| 465 | unsigned func) | ||
| 466 | { | ||
| 467 | struct sbridge_dev *sbridge_dev = get_sbridge_dev(bus); | ||
| 468 | int i; | ||
| 469 | |||
| 470 | if (!sbridge_dev) | ||
| 471 | return NULL; | ||
| 472 | |||
| 473 | for (i = 0; i < sbridge_dev->n_devs; i++) { | ||
| 474 | if (!sbridge_dev->pdev[i]) | ||
| 475 | continue; | ||
| 476 | |||
| 477 | if (PCI_SLOT(sbridge_dev->pdev[i]->devfn) == slot && | ||
| 478 | PCI_FUNC(sbridge_dev->pdev[i]->devfn) == func) { | ||
| 479 | debugf1("Associated %02x.%02x.%d with %p\n", | ||
| 480 | bus, slot, func, sbridge_dev->pdev[i]); | ||
| 481 | return sbridge_dev->pdev[i]; | ||
| 482 | } | ||
| 483 | } | ||
| 484 | |||
| 485 | return NULL; | ||
| 486 | } | ||
| 487 | |||
| 488 | /** | ||
| 489 | * sbridge_get_active_channels() - gets the number of channels and csrows | ||
| 490 | * bus: Device bus | ||
| 491 | * @channels: Number of channels that will be returned | ||
| 492 | * @csrows: Number of csrows found | ||
| 493 | * | ||
| 494 | * Since EDAC core needs to know in advance the number of available channels | ||
| 495 | * and csrows, in order to allocate memory for csrows/channels, it is needed | ||
| 496 | * to run two similar steps. At the first step, implemented on this function, | ||
| 497 | * it checks the number of csrows/channels present at one socket, identified | ||
| 498 | * by the associated PCI bus. | ||
| 499 | * this is used in order to properly allocate the size of mci components. | ||
| 500 | * Note: one csrow is one dimm. | ||
| 501 | */ | ||
| 502 | static int sbridge_get_active_channels(const u8 bus, unsigned *channels, | ||
| 503 | unsigned *csrows) | ||
| 504 | { | ||
| 505 | struct pci_dev *pdev = NULL; | ||
| 506 | int i, j; | ||
| 507 | u32 mcmtr; | ||
| 508 | |||
| 509 | *channels = 0; | ||
| 510 | *csrows = 0; | ||
| 511 | |||
| 512 | pdev = get_pdev_slot_func(bus, 15, 0); | ||
| 513 | if (!pdev) { | ||
| 514 | sbridge_printk(KERN_ERR, "Couldn't find PCI device " | ||
| 515 | "%2x.%02d.%d!!!\n", | ||
| 516 | bus, 15, 0); | ||
| 517 | return -ENODEV; | ||
| 518 | } | ||
| 519 | |||
| 520 | pci_read_config_dword(pdev, MCMTR, &mcmtr); | ||
| 521 | if (!IS_ECC_ENABLED(mcmtr)) { | ||
| 522 | sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n"); | ||
| 523 | return -ENODEV; | ||
| 524 | } | ||
| 525 | |||
| 526 | for (i = 0; i < NUM_CHANNELS; i++) { | ||
| 527 | u32 mtr; | ||
| 528 | |||
| 529 | /* Device 15 functions 2 - 5 */ | ||
| 530 | pdev = get_pdev_slot_func(bus, 15, 2 + i); | ||
| 531 | if (!pdev) { | ||
| 532 | sbridge_printk(KERN_ERR, "Couldn't find PCI device " | ||
| 533 | "%2x.%02d.%d!!!\n", | ||
| 534 | bus, 15, 2 + i); | ||
| 535 | return -ENODEV; | ||
| 536 | } | ||
| 537 | (*channels)++; | ||
| 538 | |||
| 539 | for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { | ||
| 540 | pci_read_config_dword(pdev, mtr_regs[j], &mtr); | ||
| 541 | debugf1("Bus#%02x channel #%d MTR%d = %x\n", bus, i, j, mtr); | ||
| 542 | if (IS_DIMM_PRESENT(mtr)) | ||
| 543 | (*csrows)++; | ||
| 544 | } | ||
| 545 | } | ||
| 546 | |||
| 547 | debugf0("Number of active channels: %d, number of active dimms: %d\n", | ||
| 548 | *channels, *csrows); | ||
| 549 | |||
| 550 | return 0; | ||
| 551 | } | ||
| 552 | |||
| 553 | static int get_dimm_config(const struct mem_ctl_info *mci) | ||
| 554 | { | ||
| 555 | struct sbridge_pvt *pvt = mci->pvt_info; | ||
| 556 | struct csrow_info *csr; | ||
| 557 | int i, j, banks, ranks, rows, cols, size, npages; | ||
| 558 | int csrow = 0; | ||
| 559 | unsigned long last_page = 0; | ||
| 560 | u32 reg; | ||
| 561 | enum edac_type mode; | ||
| 562 | enum mem_type mtype; | ||
| 563 | |||
| 564 | pci_read_config_dword(pvt->pci_br, SAD_TARGET, ®); | ||
| 565 | pvt->sbridge_dev->source_id = SOURCE_ID(reg); | ||
| 566 | |||
| 567 | pci_read_config_dword(pvt->pci_br, SAD_CONTROL, ®); | ||
| 568 | pvt->sbridge_dev->node_id = NODE_ID(reg); | ||
| 569 | debugf0("mc#%d: Node ID: %d, source ID: %d\n", | ||
| 570 | pvt->sbridge_dev->mc, | ||
| 571 | pvt->sbridge_dev->node_id, | ||
| 572 | pvt->sbridge_dev->source_id); | ||
| 573 | |||
| 574 | pci_read_config_dword(pvt->pci_ras, RASENABLES, ®); | ||
| 575 | if (IS_MIRROR_ENABLED(reg)) { | ||
| 576 | debugf0("Memory mirror is enabled\n"); | ||
| 577 | pvt->is_mirrored = true; | ||
| 578 | } else { | ||
| 579 | debugf0("Memory mirror is disabled\n"); | ||
| 580 | pvt->is_mirrored = false; | ||
| 581 | } | ||
| 582 | |||
| 583 | pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr); | ||
| 584 | if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) { | ||
| 585 | debugf0("Lockstep is enabled\n"); | ||
| 586 | mode = EDAC_S8ECD8ED; | ||
| 587 | pvt->is_lockstep = true; | ||
| 588 | } else { | ||
| 589 | debugf0("Lockstep is disabled\n"); | ||
| 590 | mode = EDAC_S4ECD4ED; | ||
| 591 | pvt->is_lockstep = false; | ||
| 592 | } | ||
| 593 | if (IS_CLOSE_PG(pvt->info.mcmtr)) { | ||
| 594 | debugf0("address map is on closed page mode\n"); | ||
| 595 | pvt->is_close_pg = true; | ||
| 596 | } else { | ||
| 597 | debugf0("address map is on open page mode\n"); | ||
| 598 | pvt->is_close_pg = false; | ||
| 599 | } | ||
| 600 | |||
| 601 | pci_read_config_dword(pvt->pci_ta, RANK_CFG_A, ®); | ||
| 602 | if (IS_RDIMM_ENABLED(reg)) { | ||
| 603 | /* FIXME: Can also be LRDIMM */ | ||
| 604 | debugf0("Memory is registered\n"); | ||
| 605 | mtype = MEM_RDDR3; | ||
| 606 | } else { | ||
| 607 | debugf0("Memory is unregistered\n"); | ||
| 608 | mtype = MEM_DDR3; | ||
| 609 | } | ||
| 610 | |||
| 611 | /* On all supported DDR3 DIMM types, there are 8 banks available */ | ||
| 612 | banks = 8; | ||
| 613 | |||
| 614 | for (i = 0; i < NUM_CHANNELS; i++) { | ||
| 615 | u32 mtr; | ||
| 616 | |||
| 617 | for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { | ||
| 618 | pci_read_config_dword(pvt->pci_tad[i], | ||
| 619 | mtr_regs[j], &mtr); | ||
| 620 | debugf4("Channel #%d MTR%d = %x\n", i, j, mtr); | ||
| 621 | if (IS_DIMM_PRESENT(mtr)) { | ||
| 622 | pvt->channel[i].dimms++; | ||
| 623 | |||
| 624 | ranks = numrank(mtr); | ||
| 625 | rows = numrow(mtr); | ||
| 626 | cols = numcol(mtr); | ||
| 627 | |||
| 628 | /* DDR3 has 8 I/O banks */ | ||
| 629 | size = (rows * cols * banks * ranks) >> (20 - 3); | ||
| 630 | npages = MiB_TO_PAGES(size); | ||
| 631 | |||
| 632 | debugf0("mc#%d: channel %d, dimm %d, %d Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", | ||
| 633 | pvt->sbridge_dev->mc, i, j, | ||
| 634 | size, npages, | ||
| 635 | banks, ranks, rows, cols); | ||
| 636 | csr = &mci->csrows[csrow]; | ||
| 637 | |||
| 638 | csr->first_page = last_page; | ||
| 639 | csr->last_page = last_page + npages - 1; | ||
| 640 | csr->page_mask = 0UL; /* Unused */ | ||
| 641 | csr->nr_pages = npages; | ||
| 642 | csr->grain = 32; | ||
| 643 | csr->csrow_idx = csrow; | ||
| 644 | csr->dtype = (banks == 8) ? DEV_X8 : DEV_X4; | ||
| 645 | csr->ce_count = 0; | ||
| 646 | csr->ue_count = 0; | ||
| 647 | csr->mtype = mtype; | ||
| 648 | csr->edac_mode = mode; | ||
| 649 | csr->nr_channels = 1; | ||
| 650 | csr->channels[0].chan_idx = i; | ||
| 651 | csr->channels[0].ce_count = 0; | ||
| 652 | pvt->csrow_map[i][j] = csrow; | ||
| 653 | snprintf(csr->channels[0].label, | ||
| 654 | sizeof(csr->channels[0].label), | ||
| 655 | "CPU_SrcID#%u_Channel#%u_DIMM#%u", | ||
| 656 | pvt->sbridge_dev->source_id, i, j); | ||
| 657 | last_page += npages; | ||
| 658 | csrow++; | ||
| 659 | } | ||
| 660 | } | ||
| 661 | } | ||
| 662 | |||
| 663 | return 0; | ||
| 664 | } | ||
| 665 | |||
| 666 | static void get_memory_layout(const struct mem_ctl_info *mci) | ||
| 667 | { | ||
| 668 | struct sbridge_pvt *pvt = mci->pvt_info; | ||
| 669 | int i, j, k, n_sads, n_tads, sad_interl; | ||
| 670 | u32 reg; | ||
| 671 | u64 limit, prv = 0; | ||
| 672 | u64 tmp_mb; | ||
| 673 | u32 rir_way; | ||
| 674 | |||
| 675 | /* | ||
| 676 | * Step 1) Get TOLM/TOHM ranges | ||
| 677 | */ | ||
| 678 | |||
| 679 | /* Address range is 32:28 */ | ||
| 680 | pci_read_config_dword(pvt->pci_sad1, TOLM, | ||
| 681 | ®); | ||
| 682 | pvt->tolm = GET_TOLM(reg); | ||
| 683 | tmp_mb = (1 + pvt->tolm) >> 20; | ||
| 684 | |||
| 685 | debugf0("TOLM: %Lu.%03Lu GB (0x%016Lx)\n", | ||
| 686 | tmp_mb / 1000, tmp_mb % 1000, (u64)pvt->tolm); | ||
| 687 | |||
| 688 | /* Address range is already 45:25 */ | ||
| 689 | pci_read_config_dword(pvt->pci_sad1, TOHM, | ||
| 690 | ®); | ||
| 691 | pvt->tohm = GET_TOHM(reg); | ||
| 692 | tmp_mb = (1 + pvt->tohm) >> 20; | ||
| 693 | |||
| 694 | debugf0("TOHM: %Lu.%03Lu GB (0x%016Lx)", | ||
| 695 | tmp_mb / 1000, tmp_mb % 1000, (u64)pvt->tohm); | ||
| 696 | |||
| 697 | /* | ||
| 698 | * Step 2) Get SAD range and SAD Interleave list | ||
| 699 | * TAD registers contain the interleave wayness. However, it | ||
| 700 | * seems simpler to just discover it indirectly, with the | ||
| 701 | * algorithm bellow. | ||
| 702 | */ | ||
| 703 | prv = 0; | ||
| 704 | for (n_sads = 0; n_sads < MAX_SAD; n_sads++) { | ||
| 705 | /* SAD_LIMIT Address range is 45:26 */ | ||
| 706 | pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads], | ||
| 707 | ®); | ||
| 708 | limit = SAD_LIMIT(reg); | ||
| 709 | |||
| 710 | if (!DRAM_RULE_ENABLE(reg)) | ||
| 711 | continue; | ||
| 712 | |||
| 713 | if (limit <= prv) | ||
| 714 | break; | ||
| 715 | |||
| 716 | tmp_mb = (limit + 1) >> 20; | ||
| 717 | debugf0("SAD#%d %s up to %Lu.%03Lu GB (0x%016Lx) %s reg=0x%08x\n", | ||
| 718 | n_sads, | ||
| 719 | get_dram_attr(reg), | ||
| 720 | tmp_mb / 1000, tmp_mb % 1000, | ||
| 721 | ((u64)tmp_mb) << 20L, | ||
| 722 | INTERLEAVE_MODE(reg) ? "Interleave: 8:6" : "Interleave: [8:6]XOR[18:16]", | ||
| 723 | reg); | ||
| 724 | prv = limit; | ||
| 725 | |||
| 726 | pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads], | ||
| 727 | ®); | ||
| 728 | sad_interl = sad_pkg(reg, 0); | ||
| 729 | for (j = 0; j < 8; j++) { | ||
| 730 | if (j > 0 && sad_interl == sad_pkg(reg, j)) | ||
| 731 | break; | ||
| 732 | |||
| 733 | debugf0("SAD#%d, interleave #%d: %d\n", | ||
| 734 | n_sads, j, sad_pkg(reg, j)); | ||
| 735 | } | ||
| 736 | } | ||
| 737 | |||
| 738 | /* | ||
| 739 | * Step 3) Get TAD range | ||
| 740 | */ | ||
| 741 | prv = 0; | ||
| 742 | for (n_tads = 0; n_tads < MAX_TAD; n_tads++) { | ||
| 743 | pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads], | ||
| 744 | ®); | ||
| 745 | limit = TAD_LIMIT(reg); | ||
| 746 | if (limit <= prv) | ||
| 747 | break; | ||
| 748 | tmp_mb = (limit + 1) >> 20; | ||
| 749 | |||
| 750 | debugf0("TAD#%d: up to %Lu.%03Lu GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", | ||
| 751 | n_tads, tmp_mb / 1000, tmp_mb % 1000, | ||
| 752 | ((u64)tmp_mb) << 20L, | ||
| 753 | (u32)TAD_SOCK(reg), | ||
| 754 | (u32)TAD_CH(reg), | ||
| 755 | (u32)TAD_TGT0(reg), | ||
| 756 | (u32)TAD_TGT1(reg), | ||
| 757 | (u32)TAD_TGT2(reg), | ||
| 758 | (u32)TAD_TGT3(reg), | ||
| 759 | reg); | ||
| 760 | prv = tmp_mb; | ||
| 761 | } | ||
| 762 | |||
| 763 | /* | ||
| 764 | * Step 4) Get TAD offsets, per each channel | ||
| 765 | */ | ||
| 766 | for (i = 0; i < NUM_CHANNELS; i++) { | ||
| 767 | if (!pvt->channel[i].dimms) | ||
| 768 | continue; | ||
| 769 | for (j = 0; j < n_tads; j++) { | ||
| 770 | pci_read_config_dword(pvt->pci_tad[i], | ||
| 771 | tad_ch_nilv_offset[j], | ||
| 772 | ®); | ||
| 773 | tmp_mb = TAD_OFFSET(reg) >> 20; | ||
| 774 | debugf0("TAD CH#%d, offset #%d: %Lu.%03Lu GB (0x%016Lx), reg=0x%08x\n", | ||
| 775 | i, j, | ||
| 776 | tmp_mb / 1000, tmp_mb % 1000, | ||
| 777 | ((u64)tmp_mb) << 20L, | ||
| 778 | reg); | ||
| 779 | } | ||
| 780 | } | ||
| 781 | |||
| 782 | /* | ||
| 783 | * Step 6) Get RIR Wayness/Limit, per each channel | ||
| 784 | */ | ||
| 785 | for (i = 0; i < NUM_CHANNELS; i++) { | ||
| 786 | if (!pvt->channel[i].dimms) | ||
| 787 | continue; | ||
| 788 | for (j = 0; j < MAX_RIR_RANGES; j++) { | ||
| 789 | pci_read_config_dword(pvt->pci_tad[i], | ||
| 790 | rir_way_limit[j], | ||
| 791 | ®); | ||
| 792 | |||
| 793 | if (!IS_RIR_VALID(reg)) | ||
| 794 | continue; | ||
| 795 | |||
| 796 | tmp_mb = RIR_LIMIT(reg) >> 20; | ||
| 797 | rir_way = 1 << RIR_WAY(reg); | ||
| 798 | debugf0("CH#%d RIR#%d, limit: %Lu.%03Lu GB (0x%016Lx), way: %d, reg=0x%08x\n", | ||
| 799 | i, j, | ||
| 800 | tmp_mb / 1000, tmp_mb % 1000, | ||
| 801 | ((u64)tmp_mb) << 20L, | ||
| 802 | rir_way, | ||
| 803 | reg); | ||
| 804 | |||
| 805 | for (k = 0; k < rir_way; k++) { | ||
| 806 | pci_read_config_dword(pvt->pci_tad[i], | ||
| 807 | rir_offset[j][k], | ||
| 808 | ®); | ||
| 809 | tmp_mb = RIR_OFFSET(reg) << 6; | ||
| 810 | |||
| 811 | debugf0("CH#%d RIR#%d INTL#%d, offset %Lu.%03Lu GB (0x%016Lx), tgt: %d, reg=0x%08x\n", | ||
| 812 | i, j, k, | ||
| 813 | tmp_mb / 1000, tmp_mb % 1000, | ||
| 814 | ((u64)tmp_mb) << 20L, | ||
| 815 | (u32)RIR_RNK_TGT(reg), | ||
| 816 | reg); | ||
| 817 | } | ||
| 818 | } | ||
| 819 | } | ||
| 820 | } | ||
| 821 | |||
| 822 | struct mem_ctl_info *get_mci_for_node_id(u8 node_id) | ||
| 823 | { | ||
| 824 | struct sbridge_dev *sbridge_dev; | ||
| 825 | |||
| 826 | list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { | ||
| 827 | if (sbridge_dev->node_id == node_id) | ||
| 828 | return sbridge_dev->mci; | ||
| 829 | } | ||
| 830 | return NULL; | ||
| 831 | } | ||
| 832 | |||
| 833 | static int get_memory_error_data(struct mem_ctl_info *mci, | ||
| 834 | u64 addr, | ||
| 835 | u8 *socket, | ||
| 836 | long *channel_mask, | ||
| 837 | u8 *rank, | ||
| 838 | char *area_type) | ||
| 839 | { | ||
| 840 | struct mem_ctl_info *new_mci; | ||
| 841 | struct sbridge_pvt *pvt = mci->pvt_info; | ||
| 842 | char msg[256]; | ||
| 843 | int n_rir, n_sads, n_tads, sad_way, sck_xch; | ||
| 844 | int sad_interl, idx, base_ch; | ||
| 845 | int interleave_mode; | ||
| 846 | unsigned sad_interleave[MAX_INTERLEAVE]; | ||
| 847 | u32 reg; | ||
| 848 | u8 ch_way,sck_way; | ||
| 849 | u32 tad_offset; | ||
| 850 | u32 rir_way; | ||
| 851 | u64 ch_addr, offset, limit, prv = 0; | ||
| 852 | |||
| 853 | |||
| 854 | /* | ||
| 855 | * Step 0) Check if the address is at special memory ranges | ||
| 856 | * The check bellow is probably enough to fill all cases where | ||
| 857 | * the error is not inside a memory, except for the legacy | ||
| 858 | * range (e. g. VGA addresses). It is unlikely, however, that the | ||
| 859 | * memory controller would generate an error on that range. | ||
| 860 | */ | ||
| 861 | if ((addr > (u64) pvt->tolm) && (addr < (1L << 32))) { | ||
| 862 | sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr); | ||
| 863 | edac_mc_handle_ce_no_info(mci, msg); | ||
| 864 | return -EINVAL; | ||
| 865 | } | ||
| 866 | if (addr >= (u64)pvt->tohm) { | ||
| 867 | sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr); | ||
| 868 | edac_mc_handle_ce_no_info(mci, msg); | ||
| 869 | return -EINVAL; | ||
| 870 | } | ||
| 871 | |||
| 872 | /* | ||
| 873 | * Step 1) Get socket | ||
| 874 | */ | ||
| 875 | for (n_sads = 0; n_sads < MAX_SAD; n_sads++) { | ||
| 876 | pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads], | ||
| 877 | ®); | ||
| 878 | |||
| 879 | if (!DRAM_RULE_ENABLE(reg)) | ||
| 880 | continue; | ||
| 881 | |||
| 882 | limit = SAD_LIMIT(reg); | ||
| 883 | if (limit <= prv) { | ||
| 884 | sprintf(msg, "Can't discover the memory socket"); | ||
| 885 | edac_mc_handle_ce_no_info(mci, msg); | ||
| 886 | return -EINVAL; | ||
| 887 | } | ||
| 888 | if (addr <= limit) | ||
| 889 | break; | ||
| 890 | prv = limit; | ||
| 891 | } | ||
| 892 | if (n_sads == MAX_SAD) { | ||
| 893 | sprintf(msg, "Can't discover the memory socket"); | ||
| 894 | edac_mc_handle_ce_no_info(mci, msg); | ||
| 895 | return -EINVAL; | ||
| 896 | } | ||
| 897 | area_type = get_dram_attr(reg); | ||
| 898 | interleave_mode = INTERLEAVE_MODE(reg); | ||
| 899 | |||
| 900 | pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads], | ||
| 901 | ®); | ||
| 902 | sad_interl = sad_pkg(reg, 0); | ||
| 903 | for (sad_way = 0; sad_way < 8; sad_way++) { | ||
| 904 | if (sad_way > 0 && sad_interl == sad_pkg(reg, sad_way)) | ||
| 905 | break; | ||
| 906 | sad_interleave[sad_way] = sad_pkg(reg, sad_way); | ||
| 907 | debugf0("SAD interleave #%d: %d\n", | ||
| 908 | sad_way, sad_interleave[sad_way]); | ||
| 909 | } | ||
| 910 | debugf0("mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n", | ||
| 911 | pvt->sbridge_dev->mc, | ||
| 912 | n_sads, | ||
| 913 | addr, | ||
| 914 | limit, | ||
| 915 | sad_way + 7, | ||
| 916 | INTERLEAVE_MODE(reg) ? "" : "XOR[18:16]"); | ||
| 917 | if (interleave_mode) | ||
| 918 | idx = ((addr >> 6) ^ (addr >> 16)) & 7; | ||
| 919 | else | ||
| 920 | idx = (addr >> 6) & 7; | ||
| 921 | switch (sad_way) { | ||
| 922 | case 1: | ||
| 923 | idx = 0; | ||
| 924 | break; | ||
| 925 | case 2: | ||
| 926 | idx = idx & 1; | ||
| 927 | break; | ||
| 928 | case 4: | ||
| 929 | idx = idx & 3; | ||
| 930 | break; | ||
| 931 | case 8: | ||
| 932 | break; | ||
| 933 | default: | ||
| 934 | sprintf(msg, "Can't discover socket interleave"); | ||
| 935 | edac_mc_handle_ce_no_info(mci, msg); | ||
| 936 | return -EINVAL; | ||
| 937 | } | ||
| 938 | *socket = sad_interleave[idx]; | ||
| 939 | debugf0("SAD interleave index: %d (wayness %d) = CPU socket %d\n", | ||
| 940 | idx, sad_way, *socket); | ||
| 941 | |||
| 942 | /* | ||
| 943 | * Move to the proper node structure, in order to access the | ||
| 944 | * right PCI registers | ||
| 945 | */ | ||
| 946 | new_mci = get_mci_for_node_id(*socket); | ||
| 947 | if (!new_mci) { | ||
| 948 | sprintf(msg, "Struct for socket #%u wasn't initialized", | ||
| 949 | *socket); | ||
| 950 | edac_mc_handle_ce_no_info(mci, msg); | ||
| 951 | return -EINVAL; | ||
| 952 | } | ||
| 953 | mci = new_mci; | ||
| 954 | pvt = mci->pvt_info; | ||
| 955 | |||
| 956 | /* | ||
| 957 | * Step 2) Get memory channel | ||
| 958 | */ | ||
| 959 | prv = 0; | ||
| 960 | for (n_tads = 0; n_tads < MAX_TAD; n_tads++) { | ||
| 961 | pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads], | ||
| 962 | ®); | ||
| 963 | limit = TAD_LIMIT(reg); | ||
| 964 | if (limit <= prv) { | ||
| 965 | sprintf(msg, "Can't discover the memory channel"); | ||
| 966 | edac_mc_handle_ce_no_info(mci, msg); | ||
| 967 | return -EINVAL; | ||
| 968 | } | ||
| 969 | if (addr <= limit) | ||
| 970 | break; | ||
| 971 | prv = limit; | ||
| 972 | } | ||
| 973 | ch_way = TAD_CH(reg) + 1; | ||
| 974 | sck_way = TAD_SOCK(reg) + 1; | ||
| 975 | /* | ||
| 976 | * FIXME: Is it right to always use channel 0 for offsets? | ||
| 977 | */ | ||
| 978 | pci_read_config_dword(pvt->pci_tad[0], | ||
| 979 | tad_ch_nilv_offset[n_tads], | ||
| 980 | &tad_offset); | ||
| 981 | |||
| 982 | if (ch_way == 3) | ||
| 983 | idx = addr >> 6; | ||
| 984 | else | ||
| 985 | idx = addr >> (6 + sck_way); | ||
| 986 | idx = idx % ch_way; | ||
| 987 | |||
| 988 | /* | ||
| 989 | * FIXME: Shouldn't we use CHN_IDX_OFFSET() here, when ch_way == 3 ??? | ||
| 990 | */ | ||
| 991 | switch (idx) { | ||
| 992 | case 0: | ||
| 993 | base_ch = TAD_TGT0(reg); | ||
| 994 | break; | ||
| 995 | case 1: | ||
| 996 | base_ch = TAD_TGT1(reg); | ||
| 997 | break; | ||
| 998 | case 2: | ||
| 999 | base_ch = TAD_TGT2(reg); | ||
| 1000 | break; | ||
| 1001 | case 3: | ||
| 1002 | base_ch = TAD_TGT3(reg); | ||
| 1003 | break; | ||
| 1004 | default: | ||
| 1005 | sprintf(msg, "Can't discover the TAD target"); | ||
| 1006 | edac_mc_handle_ce_no_info(mci, msg); | ||
| 1007 | return -EINVAL; | ||
| 1008 | } | ||
| 1009 | *channel_mask = 1 << base_ch; | ||
| 1010 | |||
| 1011 | if (pvt->is_mirrored) { | ||
| 1012 | *channel_mask |= 1 << ((base_ch + 2) % 4); | ||
| 1013 | switch(ch_way) { | ||
| 1014 | case 2: | ||
| 1015 | case 4: | ||
| 1016 | sck_xch = 1 << sck_way * (ch_way >> 1); | ||
| 1017 | break; | ||
| 1018 | default: | ||
| 1019 | sprintf(msg, "Invalid mirror set. Can't decode addr"); | ||
| 1020 | edac_mc_handle_ce_no_info(mci, msg); | ||
| 1021 | return -EINVAL; | ||
| 1022 | } | ||
| 1023 | } else | ||
| 1024 | sck_xch = (1 << sck_way) * ch_way; | ||
| 1025 | |||
| 1026 | if (pvt->is_lockstep) | ||
| 1027 | *channel_mask |= 1 << ((base_ch + 1) % 4); | ||
| 1028 | |||
| 1029 | offset = TAD_OFFSET(tad_offset); | ||
| 1030 | |||
| 1031 | debugf0("TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n", | ||
| 1032 | n_tads, | ||
| 1033 | addr, | ||
| 1034 | limit, | ||
| 1035 | (u32)TAD_SOCK(reg), | ||
| 1036 | ch_way, | ||
| 1037 | offset, | ||
| 1038 | idx, | ||
| 1039 | base_ch, | ||
| 1040 | *channel_mask); | ||
| 1041 | |||
| 1042 | /* Calculate channel address */ | ||
| 1043 | /* Remove the TAD offset */ | ||
| 1044 | |||
| 1045 | if (offset > addr) { | ||
| 1046 | sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!", | ||
| 1047 | offset, addr); | ||
| 1048 | edac_mc_handle_ce_no_info(mci, msg); | ||
| 1049 | return -EINVAL; | ||
| 1050 | } | ||
| 1051 | addr -= offset; | ||
| 1052 | /* Store the low bits [0:6] of the addr */ | ||
| 1053 | ch_addr = addr & 0x7f; | ||
| 1054 | /* Remove socket wayness and remove 6 bits */ | ||
| 1055 | addr >>= 6; | ||
| 1056 | addr /= sck_xch; | ||
| 1057 | #if 0 | ||
| 1058 | /* Divide by channel way */ | ||
| 1059 | addr = addr / ch_way; | ||
| 1060 | #endif | ||
| 1061 | /* Recover the last 6 bits */ | ||
| 1062 | ch_addr |= addr << 6; | ||
| 1063 | |||
| 1064 | /* | ||
| 1065 | * Step 3) Decode rank | ||
| 1066 | */ | ||
| 1067 | for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) { | ||
| 1068 | pci_read_config_dword(pvt->pci_tad[base_ch], | ||
| 1069 | rir_way_limit[n_rir], | ||
| 1070 | ®); | ||
| 1071 | |||
| 1072 | if (!IS_RIR_VALID(reg)) | ||
| 1073 | continue; | ||
| 1074 | |||
| 1075 | limit = RIR_LIMIT(reg); | ||
| 1076 | |||
| 1077 | debugf0("RIR#%d, limit: %Lu.%03Lu GB (0x%016Lx), way: %d\n", | ||
| 1078 | n_rir, | ||
| 1079 | (limit >> 20) / 1000, (limit >> 20) % 1000, | ||
| 1080 | limit, | ||
| 1081 | 1 << RIR_WAY(reg)); | ||
| 1082 | if (ch_addr <= limit) | ||
| 1083 | break; | ||
| 1084 | } | ||
| 1085 | if (n_rir == MAX_RIR_RANGES) { | ||
| 1086 | sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx", | ||
| 1087 | ch_addr); | ||
| 1088 | edac_mc_handle_ce_no_info(mci, msg); | ||
| 1089 | return -EINVAL; | ||
| 1090 | } | ||
| 1091 | rir_way = RIR_WAY(reg); | ||
| 1092 | if (pvt->is_close_pg) | ||
| 1093 | idx = (ch_addr >> 6); | ||
| 1094 | else | ||
| 1095 | idx = (ch_addr >> 13); /* FIXME: Datasheet says to shift by 15 */ | ||
| 1096 | idx %= 1 << rir_way; | ||
| 1097 | |||
| 1098 | pci_read_config_dword(pvt->pci_tad[base_ch], | ||
| 1099 | rir_offset[n_rir][idx], | ||
| 1100 | ®); | ||
| 1101 | *rank = RIR_RNK_TGT(reg); | ||
| 1102 | |||
| 1103 | debugf0("RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", | ||
| 1104 | n_rir, | ||
| 1105 | ch_addr, | ||
| 1106 | limit, | ||
| 1107 | rir_way, | ||
| 1108 | idx); | ||
| 1109 | |||
| 1110 | return 0; | ||
| 1111 | } | ||
| 1112 | |||
| 1113 | /**************************************************************************** | ||
| 1114 | Device initialization routines: put/get, init/exit | ||
| 1115 | ****************************************************************************/ | ||
| 1116 | |||
| 1117 | /* | ||
| 1118 | * sbridge_put_all_devices 'put' all the devices that we have | ||
| 1119 | * reserved via 'get' | ||
| 1120 | */ | ||
| 1121 | static void sbridge_put_devices(struct sbridge_dev *sbridge_dev) | ||
| 1122 | { | ||
| 1123 | int i; | ||
| 1124 | |||
| 1125 | debugf0(__FILE__ ": %s()\n", __func__); | ||
| 1126 | for (i = 0; i < sbridge_dev->n_devs; i++) { | ||
| 1127 | struct pci_dev *pdev = sbridge_dev->pdev[i]; | ||
| 1128 | if (!pdev) | ||
| 1129 | continue; | ||
| 1130 | debugf0("Removing dev %02x:%02x.%d\n", | ||
| 1131 | pdev->bus->number, | ||
| 1132 | PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); | ||
| 1133 | pci_dev_put(pdev); | ||
| 1134 | } | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | static void sbridge_put_all_devices(void) | ||
| 1138 | { | ||
| 1139 | struct sbridge_dev *sbridge_dev, *tmp; | ||
| 1140 | |||
| 1141 | list_for_each_entry_safe(sbridge_dev, tmp, &sbridge_edac_list, list) { | ||
| 1142 | sbridge_put_devices(sbridge_dev); | ||
| 1143 | free_sbridge_dev(sbridge_dev); | ||
| 1144 | } | ||
| 1145 | } | ||
| 1146 | |||
| 1147 | /* | ||
| 1148 | * sbridge_get_all_devices Find and perform 'get' operation on the MCH's | ||
| 1149 | * device/functions we want to reference for this driver | ||
| 1150 | * | ||
| 1151 | * Need to 'get' device 16 func 1 and func 2 | ||
| 1152 | */ | ||
| 1153 | static int sbridge_get_onedevice(struct pci_dev **prev, | ||
| 1154 | u8 *num_mc, | ||
| 1155 | const struct pci_id_table *table, | ||
| 1156 | const unsigned devno) | ||
| 1157 | { | ||
| 1158 | struct sbridge_dev *sbridge_dev; | ||
| 1159 | const struct pci_id_descr *dev_descr = &table->descr[devno]; | ||
| 1160 | |||
| 1161 | struct pci_dev *pdev = NULL; | ||
| 1162 | u8 bus = 0; | ||
| 1163 | |||
| 1164 | sbridge_printk(KERN_INFO, | ||
| 1165 | "Seeking for: dev %02x.%d PCI ID %04x:%04x\n", | ||
| 1166 | dev_descr->dev, dev_descr->func, | ||
| 1167 | PCI_VENDOR_ID_INTEL, dev_descr->dev_id); | ||
| 1168 | |||
| 1169 | pdev = pci_get_device(PCI_VENDOR_ID_INTEL, | ||
| 1170 | dev_descr->dev_id, *prev); | ||
| 1171 | |||
| 1172 | if (!pdev) { | ||
| 1173 | if (*prev) { | ||
| 1174 | *prev = pdev; | ||
| 1175 | return 0; | ||
| 1176 | } | ||
| 1177 | |||
| 1178 | if (dev_descr->optional) | ||
| 1179 | return 0; | ||
| 1180 | |||
| 1181 | if (devno == 0) | ||
| 1182 | return -ENODEV; | ||
| 1183 | |||
| 1184 | sbridge_printk(KERN_INFO, | ||
| 1185 | "Device not found: dev %02x.%d PCI ID %04x:%04x\n", | ||
| 1186 | dev_descr->dev, dev_descr->func, | ||
| 1187 | PCI_VENDOR_ID_INTEL, dev_descr->dev_id); | ||
| 1188 | |||
| 1189 | /* End of list, leave */ | ||
| 1190 | return -ENODEV; | ||
| 1191 | } | ||
| 1192 | bus = pdev->bus->number; | ||
| 1193 | |||
| 1194 | sbridge_dev = get_sbridge_dev(bus); | ||
| 1195 | if (!sbridge_dev) { | ||
| 1196 | sbridge_dev = alloc_sbridge_dev(bus, table); | ||
| 1197 | if (!sbridge_dev) { | ||
| 1198 | pci_dev_put(pdev); | ||
| 1199 | return -ENOMEM; | ||
| 1200 | } | ||
| 1201 | (*num_mc)++; | ||
| 1202 | } | ||
| 1203 | |||
| 1204 | if (sbridge_dev->pdev[devno]) { | ||
| 1205 | sbridge_printk(KERN_ERR, | ||
| 1206 | "Duplicated device for " | ||
| 1207 | "dev %02x:%d.%d PCI ID %04x:%04x\n", | ||
| 1208 | bus, dev_descr->dev, dev_descr->func, | ||
| 1209 | PCI_VENDOR_ID_INTEL, dev_descr->dev_id); | ||
| 1210 | pci_dev_put(pdev); | ||
| 1211 | return -ENODEV; | ||
| 1212 | } | ||
| 1213 | |||
| 1214 | sbridge_dev->pdev[devno] = pdev; | ||
| 1215 | |||
| 1216 | /* Sanity check */ | ||
| 1217 | if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev || | ||
| 1218 | PCI_FUNC(pdev->devfn) != dev_descr->func)) { | ||
| 1219 | sbridge_printk(KERN_ERR, | ||
| 1220 | "Device PCI ID %04x:%04x " | ||
| 1221 | "has dev %02x:%d.%d instead of dev %02x:%02x.%d\n", | ||
| 1222 | PCI_VENDOR_ID_INTEL, dev_descr->dev_id, | ||
| 1223 | bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), | ||
| 1224 | bus, dev_descr->dev, dev_descr->func); | ||
| 1225 | return -ENODEV; | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | /* Be sure that the device is enabled */ | ||
| 1229 | if (unlikely(pci_enable_device(pdev) < 0)) { | ||
| 1230 | sbridge_printk(KERN_ERR, | ||
| 1231 | "Couldn't enable " | ||
| 1232 | "dev %02x:%d.%d PCI ID %04x:%04x\n", | ||
| 1233 | bus, dev_descr->dev, dev_descr->func, | ||
| 1234 | PCI_VENDOR_ID_INTEL, dev_descr->dev_id); | ||
| 1235 | return -ENODEV; | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | debugf0("Detected dev %02x:%d.%d PCI ID %04x:%04x\n", | ||
| 1239 | bus, dev_descr->dev, | ||
| 1240 | dev_descr->func, | ||
| 1241 | PCI_VENDOR_ID_INTEL, dev_descr->dev_id); | ||
| 1242 | |||
| 1243 | /* | ||
| 1244 | * As stated on drivers/pci/search.c, the reference count for | ||
| 1245 | * @from is always decremented if it is not %NULL. So, as we need | ||
| 1246 | * to get all devices up to null, we need to do a get for the device | ||
| 1247 | */ | ||
| 1248 | pci_dev_get(pdev); | ||
| 1249 | |||
| 1250 | *prev = pdev; | ||
| 1251 | |||
| 1252 | return 0; | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | static int sbridge_get_all_devices(u8 *num_mc) | ||
| 1256 | { | ||
| 1257 | int i, rc; | ||
| 1258 | struct pci_dev *pdev = NULL; | ||
| 1259 | const struct pci_id_table *table = pci_dev_descr_sbridge_table; | ||
| 1260 | |||
| 1261 | while (table && table->descr) { | ||
| 1262 | for (i = 0; i < table->n_devs; i++) { | ||
| 1263 | pdev = NULL; | ||
| 1264 | do { | ||
| 1265 | rc = sbridge_get_onedevice(&pdev, num_mc, | ||
| 1266 | table, i); | ||
| 1267 | if (rc < 0) { | ||
| 1268 | if (i == 0) { | ||
| 1269 | i = table->n_devs; | ||
| 1270 | break; | ||
| 1271 | } | ||
| 1272 | sbridge_put_all_devices(); | ||
| 1273 | return -ENODEV; | ||
| 1274 | } | ||
| 1275 | } while (pdev); | ||
| 1276 | } | ||
| 1277 | table++; | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | return 0; | ||
| 1281 | } | ||
| 1282 | |||
| 1283 | static int mci_bind_devs(struct mem_ctl_info *mci, | ||
| 1284 | struct sbridge_dev *sbridge_dev) | ||
| 1285 | { | ||
| 1286 | struct sbridge_pvt *pvt = mci->pvt_info; | ||
| 1287 | struct pci_dev *pdev; | ||
| 1288 | int i, func, slot; | ||
| 1289 | |||
| 1290 | for (i = 0; i < sbridge_dev->n_devs; i++) { | ||
| 1291 | pdev = sbridge_dev->pdev[i]; | ||
| 1292 | if (!pdev) | ||
| 1293 | continue; | ||
| 1294 | slot = PCI_SLOT(pdev->devfn); | ||
| 1295 | func = PCI_FUNC(pdev->devfn); | ||
| 1296 | switch (slot) { | ||
| 1297 | case 12: | ||
| 1298 | switch (func) { | ||
| 1299 | case 6: | ||
| 1300 | pvt->pci_sad0 = pdev; | ||
| 1301 | break; | ||
| 1302 | case 7: | ||
| 1303 | pvt->pci_sad1 = pdev; | ||
| 1304 | break; | ||
| 1305 | default: | ||
| 1306 | goto error; | ||
| 1307 | } | ||
| 1308 | break; | ||
| 1309 | case 13: | ||
| 1310 | switch (func) { | ||
| 1311 | case 6: | ||
| 1312 | pvt->pci_br = pdev; | ||
| 1313 | break; | ||
| 1314 | default: | ||
| 1315 | goto error; | ||
| 1316 | } | ||
| 1317 | break; | ||
| 1318 | case 14: | ||
| 1319 | switch (func) { | ||
| 1320 | case 0: | ||
| 1321 | pvt->pci_ha0 = pdev; | ||
| 1322 | break; | ||
| 1323 | default: | ||
| 1324 | goto error; | ||
| 1325 | } | ||
| 1326 | break; | ||
| 1327 | case 15: | ||
| 1328 | switch (func) { | ||
| 1329 | case 0: | ||
| 1330 | pvt->pci_ta = pdev; | ||
| 1331 | break; | ||
| 1332 | case 1: | ||
| 1333 | pvt->pci_ras = pdev; | ||
| 1334 | break; | ||
| 1335 | case 2: | ||
| 1336 | case 3: | ||
| 1337 | case 4: | ||
| 1338 | case 5: | ||
| 1339 | pvt->pci_tad[func - 2] = pdev; | ||
| 1340 | break; | ||
| 1341 | default: | ||
| 1342 | goto error; | ||
| 1343 | } | ||
| 1344 | break; | ||
| 1345 | case 17: | ||
| 1346 | switch (func) { | ||
| 1347 | case 0: | ||
| 1348 | pvt->pci_ddrio = pdev; | ||
| 1349 | break; | ||
| 1350 | default: | ||
| 1351 | goto error; | ||
| 1352 | } | ||
| 1353 | break; | ||
| 1354 | default: | ||
| 1355 | goto error; | ||
| 1356 | } | ||
| 1357 | |||
| 1358 | debugf0("Associated PCI %02x.%02d.%d with dev = %p\n", | ||
| 1359 | sbridge_dev->bus, | ||
| 1360 | PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), | ||
| 1361 | pdev); | ||
| 1362 | } | ||
| 1363 | |||
| 1364 | /* Check if everything were registered */ | ||
| 1365 | if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 || | ||
| 1366 | !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta || | ||
| 1367 | !pvt->pci_ddrio) | ||
| 1368 | goto enodev; | ||
| 1369 | |||
| 1370 | for (i = 0; i < NUM_CHANNELS; i++) { | ||
| 1371 | if (!pvt->pci_tad[i]) | ||
| 1372 | goto enodev; | ||
| 1373 | } | ||
| 1374 | return 0; | ||
| 1375 | |||
| 1376 | enodev: | ||
| 1377 | sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); | ||
| 1378 | return -ENODEV; | ||
| 1379 | |||
| 1380 | error: | ||
| 1381 | sbridge_printk(KERN_ERR, "Device %d, function %d " | ||
| 1382 | "is out of the expected range\n", | ||
| 1383 | slot, func); | ||
| 1384 | return -EINVAL; | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | /**************************************************************************** | ||
| 1388 | Error check routines | ||
| 1389 | ****************************************************************************/ | ||
| 1390 | |||
| 1391 | /* | ||
| 1392 | * While Sandy Bridge has error count registers, SMI BIOS read values from | ||
| 1393 | * and resets the counters. So, they are not reliable for the OS to read | ||
| 1394 | * from them. So, we have no option but to just trust on whatever MCE is | ||
| 1395 | * telling us about the errors. | ||
| 1396 | */ | ||
| 1397 | static void sbridge_mce_output_error(struct mem_ctl_info *mci, | ||
| 1398 | const struct mce *m) | ||
| 1399 | { | ||
| 1400 | struct mem_ctl_info *new_mci; | ||
| 1401 | struct sbridge_pvt *pvt = mci->pvt_info; | ||
| 1402 | char *type, *optype, *msg, *recoverable_msg; | ||
| 1403 | bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); | ||
| 1404 | bool overflow = GET_BITFIELD(m->status, 62, 62); | ||
| 1405 | bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); | ||
| 1406 | bool recoverable = GET_BITFIELD(m->status, 56, 56); | ||
| 1407 | u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); | ||
| 1408 | u32 mscod = GET_BITFIELD(m->status, 16, 31); | ||
| 1409 | u32 errcode = GET_BITFIELD(m->status, 0, 15); | ||
| 1410 | u32 channel = GET_BITFIELD(m->status, 0, 3); | ||
| 1411 | u32 optypenum = GET_BITFIELD(m->status, 4, 6); | ||
| 1412 | long channel_mask, first_channel; | ||
| 1413 | u8 rank, socket; | ||
| 1414 | int csrow, rc, dimm; | ||
| 1415 | char *area_type = "Unknown"; | ||
| 1416 | |||
| 1417 | if (ripv) | ||
| 1418 | type = "NON_FATAL"; | ||
| 1419 | else | ||
| 1420 | type = "FATAL"; | ||
| 1421 | |||
| 1422 | /* | ||
| 1423 | * According with Table 15-9 of the Intel Archictecture spec vol 3A, | ||
| 1424 | * memory errors should fit in this mask: | ||
| 1425 | * 000f 0000 1mmm cccc (binary) | ||
| 1426 | * where: | ||
| 1427 | * f = Correction Report Filtering Bit. If 1, subsequent errors | ||
| 1428 | * won't be shown | ||
| 1429 | * mmm = error type | ||
| 1430 | * cccc = channel | ||
| 1431 | * If the mask doesn't match, report an error to the parsing logic | ||
| 1432 | */ | ||
| 1433 | if (! ((errcode & 0xef80) == 0x80)) { | ||
| 1434 | optype = "Can't parse: it is not a mem"; | ||
| 1435 | } else { | ||
| 1436 | switch (optypenum) { | ||
| 1437 | case 0: | ||
| 1438 | optype = "generic undef request"; | ||
| 1439 | break; | ||
| 1440 | case 1: | ||
| 1441 | optype = "memory read"; | ||
| 1442 | break; | ||
| 1443 | case 2: | ||
| 1444 | optype = "memory write"; | ||
| 1445 | break; | ||
| 1446 | case 3: | ||
| 1447 | optype = "addr/cmd"; | ||
| 1448 | break; | ||
| 1449 | case 4: | ||
| 1450 | optype = "memory scrubbing"; | ||
| 1451 | break; | ||
| 1452 | default: | ||
| 1453 | optype = "reserved"; | ||
| 1454 | break; | ||
| 1455 | } | ||
| 1456 | } | ||
| 1457 | |||
| 1458 | rc = get_memory_error_data(mci, m->addr, &socket, | ||
| 1459 | &channel_mask, &rank, area_type); | ||
| 1460 | if (rc < 0) | ||
| 1461 | return; | ||
| 1462 | new_mci = get_mci_for_node_id(socket); | ||
| 1463 | if (!new_mci) { | ||
| 1464 | edac_mc_handle_ce_no_info(mci, "Error: socket got corrupted!"); | ||
| 1465 | return; | ||
| 1466 | } | ||
| 1467 | mci = new_mci; | ||
| 1468 | pvt = mci->pvt_info; | ||
| 1469 | |||
| 1470 | first_channel = find_first_bit(&channel_mask, NUM_CHANNELS); | ||
| 1471 | |||
| 1472 | if (rank < 4) | ||
| 1473 | dimm = 0; | ||
| 1474 | else if (rank < 8) | ||
| 1475 | dimm = 1; | ||
| 1476 | else | ||
| 1477 | dimm = 2; | ||
| 1478 | |||
| 1479 | csrow = pvt->csrow_map[first_channel][dimm]; | ||
| 1480 | |||
| 1481 | if (uncorrected_error && recoverable) | ||
| 1482 | recoverable_msg = " recoverable"; | ||
| 1483 | else | ||
| 1484 | recoverable_msg = ""; | ||
| 1485 | |||
| 1486 | /* | ||
| 1487 | * FIXME: What should we do with "channel" information on mcelog? | ||
| 1488 | * Probably, we can just discard it, as the channel information | ||
| 1489 | * comes from the get_memory_error_data() address decoding | ||
| 1490 | */ | ||
| 1491 | msg = kasprintf(GFP_ATOMIC, | ||
| 1492 | "%d %s error(s): %s on %s area %s%s: cpu=%d Err=%04x:%04x (ch=%d), " | ||
| 1493 | "addr = 0x%08llx => socket=%d, Channel=%ld(mask=%ld), rank=%d\n", | ||
| 1494 | core_err_cnt, | ||
| 1495 | area_type, | ||
| 1496 | optype, | ||
| 1497 | type, | ||
| 1498 | recoverable_msg, | ||
| 1499 | overflow ? "OVERFLOW" : "", | ||
| 1500 | m->cpu, | ||
| 1501 | mscod, errcode, | ||
| 1502 | channel, /* 1111b means not specified */ | ||
| 1503 | (long long) m->addr, | ||
| 1504 | socket, | ||
| 1505 | first_channel, /* This is the real channel on SB */ | ||
| 1506 | channel_mask, | ||
| 1507 | rank); | ||
| 1508 | |||
| 1509 | debugf0("%s", msg); | ||
| 1510 | |||
| 1511 | /* Call the helper to output message */ | ||
| 1512 | if (uncorrected_error) | ||
| 1513 | edac_mc_handle_fbd_ue(mci, csrow, 0, 0, msg); | ||
| 1514 | else | ||
| 1515 | edac_mc_handle_fbd_ce(mci, csrow, 0, msg); | ||
| 1516 | |||
| 1517 | kfree(msg); | ||
| 1518 | } | ||
| 1519 | |||
| 1520 | /* | ||
| 1521 | * sbridge_check_error Retrieve and process errors reported by the | ||
| 1522 | * hardware. Called by the Core module. | ||
| 1523 | */ | ||
| 1524 | static void sbridge_check_error(struct mem_ctl_info *mci) | ||
| 1525 | { | ||
| 1526 | struct sbridge_pvt *pvt = mci->pvt_info; | ||
| 1527 | int i; | ||
| 1528 | unsigned count = 0; | ||
| 1529 | struct mce *m; | ||
| 1530 | |||
| 1531 | /* | ||
| 1532 | * MCE first step: Copy all mce errors into a temporary buffer | ||
| 1533 | * We use a double buffering here, to reduce the risk of | ||
| 1534 | * loosing an error. | ||
| 1535 | */ | ||
| 1536 | smp_rmb(); | ||
| 1537 | count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in) | ||
| 1538 | % MCE_LOG_LEN; | ||
| 1539 | if (!count) | ||
| 1540 | return; | ||
| 1541 | |||
| 1542 | m = pvt->mce_outentry; | ||
| 1543 | if (pvt->mce_in + count > MCE_LOG_LEN) { | ||
| 1544 | unsigned l = MCE_LOG_LEN - pvt->mce_in; | ||
| 1545 | |||
| 1546 | memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l); | ||
| 1547 | smp_wmb(); | ||
| 1548 | pvt->mce_in = 0; | ||
| 1549 | count -= l; | ||
| 1550 | m += l; | ||
| 1551 | } | ||
| 1552 | memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count); | ||
| 1553 | smp_wmb(); | ||
| 1554 | pvt->mce_in += count; | ||
| 1555 | |||
| 1556 | smp_rmb(); | ||
| 1557 | if (pvt->mce_overrun) { | ||
| 1558 | sbridge_printk(KERN_ERR, "Lost %d memory errors\n", | ||
| 1559 | pvt->mce_overrun); | ||
| 1560 | smp_wmb(); | ||
| 1561 | pvt->mce_overrun = 0; | ||
| 1562 | } | ||
| 1563 | |||
| 1564 | /* | ||
| 1565 | * MCE second step: parse errors and display | ||
| 1566 | */ | ||
| 1567 | for (i = 0; i < count; i++) | ||
| 1568 | sbridge_mce_output_error(mci, &pvt->mce_outentry[i]); | ||
| 1569 | } | ||
| 1570 | |||
| 1571 | /* | ||
| 1572 | * sbridge_mce_check_error Replicates mcelog routine to get errors | ||
| 1573 | * This routine simply queues mcelog errors, and | ||
| 1574 | * return. The error itself should be handled later | ||
| 1575 | * by sbridge_check_error. | ||
| 1576 | * WARNING: As this routine should be called at NMI time, extra care should | ||
| 1577 | * be taken to avoid deadlocks, and to be as fast as possible. | ||
| 1578 | */ | ||
| 1579 | static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, | ||
| 1580 | void *data) | ||
| 1581 | { | ||
| 1582 | struct mce *mce = (struct mce *)data; | ||
| 1583 | struct mem_ctl_info *mci; | ||
| 1584 | struct sbridge_pvt *pvt; | ||
| 1585 | |||
| 1586 | mci = get_mci_for_node_id(mce->socketid); | ||
| 1587 | if (!mci) | ||
| 1588 | return NOTIFY_BAD; | ||
| 1589 | pvt = mci->pvt_info; | ||
| 1590 | |||
| 1591 | /* | ||
| 1592 | * Just let mcelog handle it if the error is | ||
| 1593 | * outside the memory controller. A memory error | ||
| 1594 | * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0. | ||
| 1595 | * bit 12 has an special meaning. | ||
| 1596 | */ | ||
| 1597 | if ((mce->status & 0xefff) >> 7 != 1) | ||
| 1598 | return NOTIFY_DONE; | ||
| 1599 | |||
| 1600 | printk("sbridge: HANDLING MCE MEMORY ERROR\n"); | ||
| 1601 | |||
| 1602 | printk("CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", | ||
| 1603 | mce->extcpu, mce->mcgstatus, mce->bank, mce->status); | ||
| 1604 | printk("TSC %llx ", mce->tsc); | ||
| 1605 | printk("ADDR %llx ", mce->addr); | ||
| 1606 | printk("MISC %llx ", mce->misc); | ||
| 1607 | |||
| 1608 | printk("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | ||
| 1609 | mce->cpuvendor, mce->cpuid, mce->time, | ||
| 1610 | mce->socketid, mce->apicid); | ||
| 1611 | |||
| 1612 | #ifdef CONFIG_SMP | ||
| 1613 | /* Only handle if it is the right mc controller */ | ||
| 1614 | if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc) | ||
| 1615 | return NOTIFY_DONE; | ||
| 1616 | #endif | ||
| 1617 | |||
| 1618 | smp_rmb(); | ||
| 1619 | if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { | ||
| 1620 | smp_wmb(); | ||
| 1621 | pvt->mce_overrun++; | ||
| 1622 | return NOTIFY_DONE; | ||
| 1623 | } | ||
| 1624 | |||
| 1625 | /* Copy memory error at the ringbuffer */ | ||
| 1626 | memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce)); | ||
| 1627 | smp_wmb(); | ||
| 1628 | pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN; | ||
| 1629 | |||
| 1630 | /* Handle fatal errors immediately */ | ||
| 1631 | if (mce->mcgstatus & 1) | ||
| 1632 | sbridge_check_error(mci); | ||
| 1633 | |||
| 1634 | /* Advice mcelog that the error were handled */ | ||
| 1635 | return NOTIFY_STOP; | ||
| 1636 | } | ||
| 1637 | |||
| 1638 | static struct notifier_block sbridge_mce_dec = { | ||
| 1639 | .notifier_call = sbridge_mce_check_error, | ||
| 1640 | }; | ||
| 1641 | |||
| 1642 | /**************************************************************************** | ||
| 1643 | EDAC register/unregister logic | ||
| 1644 | ****************************************************************************/ | ||
| 1645 | |||
| 1646 | static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) | ||
| 1647 | { | ||
| 1648 | struct mem_ctl_info *mci = sbridge_dev->mci; | ||
| 1649 | struct sbridge_pvt *pvt; | ||
| 1650 | |||
| 1651 | if (unlikely(!mci || !mci->pvt_info)) { | ||
| 1652 | debugf0("MC: " __FILE__ ": %s(): dev = %p\n", | ||
| 1653 | __func__, &sbridge_dev->pdev[0]->dev); | ||
| 1654 | |||
| 1655 | sbridge_printk(KERN_ERR, "Couldn't find mci handler\n"); | ||
| 1656 | return; | ||
| 1657 | } | ||
| 1658 | |||
| 1659 | pvt = mci->pvt_info; | ||
| 1660 | |||
| 1661 | debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", | ||
| 1662 | __func__, mci, &sbridge_dev->pdev[0]->dev); | ||
| 1663 | |||
| 1664 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, | ||
| 1665 | &sbridge_mce_dec); | ||
| 1666 | |||
| 1667 | /* Remove MC sysfs nodes */ | ||
| 1668 | edac_mc_del_mc(mci->dev); | ||
| 1669 | |||
| 1670 | debugf1("%s: free mci struct\n", mci->ctl_name); | ||
| 1671 | kfree(mci->ctl_name); | ||
| 1672 | edac_mc_free(mci); | ||
| 1673 | sbridge_dev->mci = NULL; | ||
| 1674 | } | ||
| 1675 | |||
| 1676 | static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) | ||
| 1677 | { | ||
| 1678 | struct mem_ctl_info *mci; | ||
| 1679 | struct sbridge_pvt *pvt; | ||
| 1680 | int rc, channels, csrows; | ||
| 1681 | |||
| 1682 | /* Check the number of active and not disabled channels */ | ||
| 1683 | rc = sbridge_get_active_channels(sbridge_dev->bus, &channels, &csrows); | ||
| 1684 | if (unlikely(rc < 0)) | ||
| 1685 | return rc; | ||
| 1686 | |||
| 1687 | /* allocate a new MC control structure */ | ||
| 1688 | mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, sbridge_dev->mc); | ||
| 1689 | if (unlikely(!mci)) | ||
| 1690 | return -ENOMEM; | ||
| 1691 | |||
| 1692 | debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", | ||
| 1693 | __func__, mci, &sbridge_dev->pdev[0]->dev); | ||
| 1694 | |||
| 1695 | pvt = mci->pvt_info; | ||
| 1696 | memset(pvt, 0, sizeof(*pvt)); | ||
| 1697 | |||
| 1698 | /* Associate sbridge_dev and mci for future usage */ | ||
| 1699 | pvt->sbridge_dev = sbridge_dev; | ||
| 1700 | sbridge_dev->mci = mci; | ||
| 1701 | |||
| 1702 | mci->mtype_cap = MEM_FLAG_DDR3; | ||
| 1703 | mci->edac_ctl_cap = EDAC_FLAG_NONE; | ||
| 1704 | mci->edac_cap = EDAC_FLAG_NONE; | ||
| 1705 | mci->mod_name = "sbridge_edac.c"; | ||
| 1706 | mci->mod_ver = SBRIDGE_REVISION; | ||
| 1707 | mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx); | ||
| 1708 | mci->dev_name = pci_name(sbridge_dev->pdev[0]); | ||
| 1709 | mci->ctl_page_to_phys = NULL; | ||
| 1710 | |||
| 1711 | /* Set the function pointer to an actual operation function */ | ||
| 1712 | mci->edac_check = sbridge_check_error; | ||
| 1713 | |||
| 1714 | /* Store pci devices at mci for faster access */ | ||
| 1715 | rc = mci_bind_devs(mci, sbridge_dev); | ||
| 1716 | if (unlikely(rc < 0)) | ||
| 1717 | goto fail0; | ||
| 1718 | |||
| 1719 | /* Get dimm basic config and the memory layout */ | ||
| 1720 | get_dimm_config(mci); | ||
| 1721 | get_memory_layout(mci); | ||
| 1722 | |||
| 1723 | /* record ptr to the generic device */ | ||
| 1724 | mci->dev = &sbridge_dev->pdev[0]->dev; | ||
| 1725 | |||
| 1726 | /* add this new MC control structure to EDAC's list of MCs */ | ||
| 1727 | if (unlikely(edac_mc_add_mc(mci))) { | ||
| 1728 | debugf0("MC: " __FILE__ | ||
| 1729 | ": %s(): failed edac_mc_add_mc()\n", __func__); | ||
| 1730 | rc = -EINVAL; | ||
| 1731 | goto fail0; | ||
| 1732 | } | ||
| 1733 | |||
| 1734 | atomic_notifier_chain_register(&x86_mce_decoder_chain, | ||
| 1735 | &sbridge_mce_dec); | ||
| 1736 | return 0; | ||
| 1737 | |||
| 1738 | fail0: | ||
| 1739 | kfree(mci->ctl_name); | ||
| 1740 | edac_mc_free(mci); | ||
| 1741 | sbridge_dev->mci = NULL; | ||
| 1742 | return rc; | ||
| 1743 | } | ||
| 1744 | |||
| 1745 | /* | ||
| 1746 | * sbridge_probe Probe for ONE instance of device to see if it is | ||
| 1747 | * present. | ||
| 1748 | * return: | ||
| 1749 | * 0 for FOUND a device | ||
| 1750 | * < 0 for error code | ||
| 1751 | */ | ||
| 1752 | |||
| 1753 | static int __devinit sbridge_probe(struct pci_dev *pdev, | ||
| 1754 | const struct pci_device_id *id) | ||
| 1755 | { | ||
| 1756 | int rc; | ||
| 1757 | u8 mc, num_mc = 0; | ||
| 1758 | struct sbridge_dev *sbridge_dev; | ||
| 1759 | |||
| 1760 | /* get the pci devices we want to reserve for our use */ | ||
| 1761 | mutex_lock(&sbridge_edac_lock); | ||
| 1762 | |||
| 1763 | /* | ||
| 1764 | * All memory controllers are allocated at the first pass. | ||
| 1765 | */ | ||
| 1766 | if (unlikely(probed >= 1)) { | ||
| 1767 | mutex_unlock(&sbridge_edac_lock); | ||
| 1768 | return -ENODEV; | ||
| 1769 | } | ||
| 1770 | probed++; | ||
| 1771 | |||
| 1772 | rc = sbridge_get_all_devices(&num_mc); | ||
| 1773 | if (unlikely(rc < 0)) | ||
| 1774 | goto fail0; | ||
| 1775 | mc = 0; | ||
| 1776 | |||
| 1777 | list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { | ||
| 1778 | debugf0("Registering MC#%d (%d of %d)\n", mc, mc + 1, num_mc); | ||
| 1779 | sbridge_dev->mc = mc++; | ||
| 1780 | rc = sbridge_register_mci(sbridge_dev); | ||
| 1781 | if (unlikely(rc < 0)) | ||
| 1782 | goto fail1; | ||
| 1783 | } | ||
| 1784 | |||
| 1785 | sbridge_printk(KERN_INFO, "Driver loaded.\n"); | ||
| 1786 | |||
| 1787 | mutex_unlock(&sbridge_edac_lock); | ||
| 1788 | return 0; | ||
| 1789 | |||
| 1790 | fail1: | ||
| 1791 | list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) | ||
| 1792 | sbridge_unregister_mci(sbridge_dev); | ||
| 1793 | |||
| 1794 | sbridge_put_all_devices(); | ||
| 1795 | fail0: | ||
| 1796 | mutex_unlock(&sbridge_edac_lock); | ||
| 1797 | return rc; | ||
| 1798 | } | ||
| 1799 | |||
| 1800 | /* | ||
| 1801 | * sbridge_remove destructor for one instance of device | ||
| 1802 | * | ||
| 1803 | */ | ||
| 1804 | static void __devexit sbridge_remove(struct pci_dev *pdev) | ||
| 1805 | { | ||
| 1806 | struct sbridge_dev *sbridge_dev; | ||
| 1807 | |||
| 1808 | debugf0(__FILE__ ": %s()\n", __func__); | ||
| 1809 | |||
| 1810 | /* | ||
| 1811 | * we have a trouble here: pdev value for removal will be wrong, since | ||
| 1812 | * it will point to the X58 register used to detect that the machine | ||
| 1813 | * is a Nehalem or upper design. However, due to the way several PCI | ||
| 1814 | * devices are grouped together to provide MC functionality, we need | ||
| 1815 | * to use a different method for releasing the devices | ||
| 1816 | */ | ||
| 1817 | |||
| 1818 | mutex_lock(&sbridge_edac_lock); | ||
| 1819 | |||
| 1820 | if (unlikely(!probed)) { | ||
| 1821 | mutex_unlock(&sbridge_edac_lock); | ||
| 1822 | return; | ||
| 1823 | } | ||
| 1824 | |||
| 1825 | list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) | ||
| 1826 | sbridge_unregister_mci(sbridge_dev); | ||
| 1827 | |||
| 1828 | /* Release PCI resources */ | ||
| 1829 | sbridge_put_all_devices(); | ||
| 1830 | |||
| 1831 | probed--; | ||
| 1832 | |||
| 1833 | mutex_unlock(&sbridge_edac_lock); | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl); | ||
| 1837 | |||
| 1838 | /* | ||
| 1839 | * sbridge_driver pci_driver structure for this module | ||
| 1840 | * | ||
| 1841 | */ | ||
| 1842 | static struct pci_driver sbridge_driver = { | ||
| 1843 | .name = "sbridge_edac", | ||
| 1844 | .probe = sbridge_probe, | ||
| 1845 | .remove = __devexit_p(sbridge_remove), | ||
| 1846 | .id_table = sbridge_pci_tbl, | ||
| 1847 | }; | ||
| 1848 | |||
| 1849 | /* | ||
| 1850 | * sbridge_init Module entry function | ||
| 1851 | * Try to initialize this module for its devices | ||
| 1852 | */ | ||
| 1853 | static int __init sbridge_init(void) | ||
| 1854 | { | ||
| 1855 | int pci_rc; | ||
| 1856 | |||
| 1857 | debugf2("MC: " __FILE__ ": %s()\n", __func__); | ||
| 1858 | |||
| 1859 | /* Ensure that the OPSTATE is set correctly for POLL or NMI */ | ||
| 1860 | opstate_init(); | ||
| 1861 | |||
| 1862 | pci_rc = pci_register_driver(&sbridge_driver); | ||
| 1863 | |||
| 1864 | if (pci_rc >= 0) | ||
| 1865 | return 0; | ||
| 1866 | |||
| 1867 | sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n", | ||
| 1868 | pci_rc); | ||
| 1869 | |||
| 1870 | return pci_rc; | ||
| 1871 | } | ||
| 1872 | |||
| 1873 | /* | ||
| 1874 | * sbridge_exit() Module exit function | ||
| 1875 | * Unregister the driver | ||
| 1876 | */ | ||
| 1877 | static void __exit sbridge_exit(void) | ||
| 1878 | { | ||
| 1879 | debugf2("MC: " __FILE__ ": %s()\n", __func__); | ||
| 1880 | pci_unregister_driver(&sbridge_driver); | ||
| 1881 | } | ||
| 1882 | |||
| 1883 | module_init(sbridge_init); | ||
| 1884 | module_exit(sbridge_exit); | ||
| 1885 | |||
| 1886 | module_param(edac_op_state, int, 0444); | ||
| 1887 | MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); | ||
| 1888 | |||
| 1889 | MODULE_LICENSE("GPL"); | ||
| 1890 | MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); | ||
| 1891 | MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); | ||
| 1892 | MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge memory controllers - " | ||
| 1893 | SBRIDGE_REVISION); | ||
