aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig23
-rw-r--r--drivers/edac/Makefile4
-rw-r--r--drivers/edac/amd64_edac.c17
-rw-r--r--drivers/edac/amd64_edac.h5
-rw-r--r--drivers/edac/amd64_edac_dbg.c207
-rw-r--r--drivers/edac/edac_device_sysfs.c18
-rw-r--r--drivers/edac/edac_mc_sysfs.c11
-rw-r--r--drivers/edac/edac_mce_amd.c452
-rw-r--r--drivers/edac/edac_module.c79
-rw-r--r--drivers/edac/edac_module.h1
-rw-r--r--drivers/edac/edac_pci_sysfs.c10
-rw-r--r--drivers/edac/edac_stub.c51
-rw-r--r--drivers/edac/i7300_edac.c1247
-rw-r--r--drivers/edac/i82443bxgx_edac.c2
-rw-r--r--drivers/edac/mce_amd.c680
-rw-r--r--drivers/edac/mce_amd.h (renamed from drivers/edac/edac_mce_amd.h)59
-rw-r--r--drivers/edac/mce_amd_inj.c171
17 files changed, 2274 insertions, 763 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 70bb350de996..f436a2fa9f38 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -39,7 +39,7 @@ config EDAC_DEBUG
39 there're four debug levels (x=0,1,2,3 from low to high). 39 there're four debug levels (x=0,1,2,3 from low to high).
40 Usually you should select 'N'. 40 Usually you should select 'N'.
41 41
42 config EDAC_DECODE_MCE 42config EDAC_DECODE_MCE
43 tristate "Decode MCEs in human-readable form (only on AMD for now)" 43 tristate "Decode MCEs in human-readable form (only on AMD for now)"
44 depends on CPU_SUP_AMD && X86_MCE 44 depends on CPU_SUP_AMD && X86_MCE
45 default y 45 default y
@@ -51,6 +51,16 @@ config EDAC_DEBUG
51 which occur really early upon boot, before the module infrastructure 51 which occur really early upon boot, before the module infrastructure
52 has been initialized. 52 has been initialized.
53 53
54config EDAC_MCE_INJ
55 tristate "Simple MCE injection interface over /sysfs"
56 depends on EDAC_DECODE_MCE
57 default n
58 help
59 This is a simple interface to inject MCEs over /sysfs and test
60 the MCE decoding code in EDAC.
61
62 This is currently AMD-only.
63
54config EDAC_MM_EDAC 64config EDAC_MM_EDAC
55 tristate "Main Memory EDAC (Error Detection And Correction) reporting" 65 tristate "Main Memory EDAC (Error Detection And Correction) reporting"
56 help 66 help
@@ -66,13 +76,13 @@ config EDAC_MCE
66 76
67config EDAC_AMD64 77config EDAC_AMD64
68 tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h" 78 tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
69 depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && EDAC_DECODE_MCE 79 depends on EDAC_MM_EDAC && AMD_NB && X86_64 && PCI && EDAC_DECODE_MCE
70 help 80 help
71 Support for error detection and correction on the AMD 64 81 Support for error detection and correction on the AMD 64
72 Families of Memory Controllers (K8, F10h and F11h) 82 Families of Memory Controllers (K8, F10h and F11h)
73 83
74config EDAC_AMD64_ERROR_INJECTION 84config EDAC_AMD64_ERROR_INJECTION
75 bool "Sysfs Error Injection facilities" 85 bool "Sysfs HW Error injection facilities"
76 depends on EDAC_AMD64 86 depends on EDAC_AMD64
77 help 87 help
78 Recent Opterons (Family 10h and later) provide for Memory Error 88 Recent Opterons (Family 10h and later) provide for Memory Error
@@ -199,6 +209,13 @@ config EDAC_I5100
199 Support for error detection and correction the Intel 209 Support for error detection and correction the Intel
200 San Clemente MCH. 210 San Clemente MCH.
201 211
212config EDAC_I7300
213 tristate "Intel Clarksboro MCH"
214 depends on EDAC_MM_EDAC && X86 && PCI
215 help
216 Support for error detection and correction the Intel
217 Clarksboro MCH (Intel 7300 chipset).
218
202config EDAC_MPC85XX 219config EDAC_MPC85XX
203 tristate "Freescale MPC83xx / MPC85xx" 220 tristate "Freescale MPC83xx / MPC85xx"
204 depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx) 221 depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx)
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index ca6b1bb24ccc..b3781399b38a 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -17,6 +17,9 @@ ifdef CONFIG_PCI
17edac_core-objs += edac_pci.o edac_pci_sysfs.o 17edac_core-objs += edac_pci.o edac_pci_sysfs.o
18endif 18endif
19 19
20obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o
21
22edac_mce_amd-objs := mce_amd.o
20obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o 23obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o
21 24
22obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o 25obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
@@ -24,6 +27,7 @@ obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o
24obj-$(CONFIG_EDAC_I5000) += i5000_edac.o 27obj-$(CONFIG_EDAC_I5000) += i5000_edac.o
25obj-$(CONFIG_EDAC_I5100) += i5100_edac.o 28obj-$(CONFIG_EDAC_I5100) += i5100_edac.o
26obj-$(CONFIG_EDAC_I5400) += i5400_edac.o 29obj-$(CONFIG_EDAC_I5400) += i5400_edac.o
30obj-$(CONFIG_EDAC_I7300) += i7300_edac.o
27obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o 31obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o
28obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o 32obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o
29obj-$(CONFIG_EDAC_E752X) += e752x_edac.o 33obj-$(CONFIG_EDAC_E752X) += e752x_edac.o
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index e7d5d6b5dcf6..8521401bbd75 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1,5 +1,5 @@
1#include "amd64_edac.h" 1#include "amd64_edac.h"
2#include <asm/k8.h> 2#include <asm/amd_nb.h>
3 3
4static struct edac_pci_ctl_info *amd64_ctl_pci; 4static struct edac_pci_ctl_info *amd64_ctl_pci;
5 5
@@ -2073,11 +2073,18 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
2073 amd64_handle_ue(mci, info); 2073 amd64_handle_ue(mci, info);
2074} 2074}
2075 2075
2076void amd64_decode_bus_error(int node_id, struct err_regs *regs) 2076void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg)
2077{ 2077{
2078 struct mem_ctl_info *mci = mci_lookup[node_id]; 2078 struct mem_ctl_info *mci = mci_lookup[node_id];
2079 struct err_regs regs;
2079 2080
2080 __amd64_decode_bus_error(mci, regs); 2081 regs.nbsl = (u32) m->status;
2082 regs.nbsh = (u32)(m->status >> 32);
2083 regs.nbeal = (u32) m->addr;
2084 regs.nbeah = (u32)(m->addr >> 32);
2085 regs.nbcfg = nbcfg;
2086
2087 __amd64_decode_bus_error(mci, &regs);
2081 2088
2082 /* 2089 /*
2083 * Check the UE bit of the NB status high register, if set generate some 2090 * Check the UE bit of the NB status high register, if set generate some
@@ -2086,7 +2093,7 @@ void amd64_decode_bus_error(int node_id, struct err_regs *regs)
2086 * 2093 *
2087 * FIXME: this should go somewhere else, if at all. 2094 * FIXME: this should go somewhere else, if at all.
2088 */ 2095 */
2089 if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors) 2096 if (regs.nbsh & K8_NBSH_UC_ERR && !report_gart_errors)
2090 edac_mc_handle_ue_no_info(mci, "UE bit is set"); 2097 edac_mc_handle_ue_no_info(mci, "UE bit is set");
2091 2098
2092} 2099}
@@ -2927,7 +2934,7 @@ static int __init amd64_edac_init(void)
2927 * to finish initialization of the MC instances. 2934 * to finish initialization of the MC instances.
2928 */ 2935 */
2929 err = -ENODEV; 2936 err = -ENODEV;
2930 for (nb = 0; nb < num_k8_northbridges; nb++) { 2937 for (nb = 0; nb < k8_northbridges.num; nb++) {
2931 if (!pvt_lookup[nb]) 2938 if (!pvt_lookup[nb])
2932 continue; 2939 continue;
2933 2940
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 613b9381e71a..044aee4f944d 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -72,7 +72,7 @@
72#include <linux/edac.h> 72#include <linux/edac.h>
73#include <asm/msr.h> 73#include <asm/msr.h>
74#include "edac_core.h" 74#include "edac_core.h"
75#include "edac_mce_amd.h" 75#include "mce_amd.h"
76 76
77#define amd64_printk(level, fmt, arg...) \ 77#define amd64_printk(level, fmt, arg...) \
78 edac_printk(level, "amd64", fmt, ##arg) 78 edac_printk(level, "amd64", fmt, ##arg)
@@ -482,11 +482,10 @@ extern const char *rrrr_msgs[16];
482extern const char *to_msgs[2]; 482extern const char *to_msgs[2];
483extern const char *pp_msgs[4]; 483extern const char *pp_msgs[4];
484extern const char *ii_msgs[4]; 484extern const char *ii_msgs[4];
485extern const char *ext_msgs[32];
486extern const char *htlink_msgs[8]; 485extern const char *htlink_msgs[8];
487 486
488#ifdef CONFIG_EDAC_DEBUG 487#ifdef CONFIG_EDAC_DEBUG
489#define NUM_DBG_ATTRS 9 488#define NUM_DBG_ATTRS 5
490#else 489#else
491#define NUM_DBG_ATTRS 0 490#define NUM_DBG_ATTRS 0
492#endif 491#endif
diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c
index 59cf2cf6e11e..e3562288f4ce 100644
--- a/drivers/edac/amd64_edac_dbg.c
+++ b/drivers/edac/amd64_edac_dbg.c
@@ -1,167 +1,16 @@
1#include "amd64_edac.h" 1#include "amd64_edac.h"
2 2
3/* 3#define EDAC_DCT_ATTR_SHOW(reg) \
4 * accept a hex value and store it into the virtual error register file, field: 4static ssize_t amd64_##reg##_show(struct mem_ctl_info *mci, char *data) \
5 * nbeal and nbeah. Assume virtual error values have already been set for: NBSL, 5{ \
6 * NBSH and NBCFG. Then proceed to map the error values to a MC, CSROW and 6 struct amd64_pvt *pvt = mci->pvt_info; \
7 * CHANNEL 7 return sprintf(data, "0x%016llx\n", (u64)pvt->reg); \
8 */
9static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data,
10 size_t count)
11{
12 struct amd64_pvt *pvt = mci->pvt_info;
13 unsigned long long value;
14 int ret = 0;
15
16 ret = strict_strtoull(data, 16, &value);
17 if (ret != -EINVAL) {
18 debugf0("received NBEA= 0x%llx\n", value);
19
20 /* place the value into the virtual error packet */
21 pvt->ctl_error_info.nbeal = (u32) value;
22 value >>= 32;
23 pvt->ctl_error_info.nbeah = (u32) value;
24
25 /* Process the Mapping request */
26 /* TODO: Add race prevention */
27 amd_decode_nb_mce(pvt->mc_node_id, &pvt->ctl_error_info, 1);
28
29 return count;
30 }
31 return ret;
32}
33
34/* display back what the last NBEA (MCA NB Address (MC4_ADDR)) was written */
35static ssize_t amd64_nbea_show(struct mem_ctl_info *mci, char *data)
36{
37 struct amd64_pvt *pvt = mci->pvt_info;
38 u64 value;
39
40 value = pvt->ctl_error_info.nbeah;
41 value <<= 32;
42 value |= pvt->ctl_error_info.nbeal;
43
44 return sprintf(data, "%llx\n", value);
45}
46
47/* store the NBSL (MCA NB Status Low (MC4_STATUS)) value user desires */
48static ssize_t amd64_nbsl_store(struct mem_ctl_info *mci, const char *data,
49 size_t count)
50{
51 struct amd64_pvt *pvt = mci->pvt_info;
52 unsigned long value;
53 int ret = 0;
54
55 ret = strict_strtoul(data, 16, &value);
56 if (ret != -EINVAL) {
57 debugf0("received NBSL= 0x%lx\n", value);
58
59 pvt->ctl_error_info.nbsl = (u32) value;
60
61 return count;
62 }
63 return ret;
64}
65
66/* display back what the last NBSL value written */
67static ssize_t amd64_nbsl_show(struct mem_ctl_info *mci, char *data)
68{
69 struct amd64_pvt *pvt = mci->pvt_info;
70 u32 value;
71
72 value = pvt->ctl_error_info.nbsl;
73
74 return sprintf(data, "%x\n", value);
75}
76
77/* store the NBSH (MCA NB Status High) value user desires */
78static ssize_t amd64_nbsh_store(struct mem_ctl_info *mci, const char *data,
79 size_t count)
80{
81 struct amd64_pvt *pvt = mci->pvt_info;
82 unsigned long value;
83 int ret = 0;
84
85 ret = strict_strtoul(data, 16, &value);
86 if (ret != -EINVAL) {
87 debugf0("received NBSH= 0x%lx\n", value);
88
89 pvt->ctl_error_info.nbsh = (u32) value;
90
91 return count;
92 }
93 return ret;
94}
95
96/* display back what the last NBSH value written */
97static ssize_t amd64_nbsh_show(struct mem_ctl_info *mci, char *data)
98{
99 struct amd64_pvt *pvt = mci->pvt_info;
100 u32 value;
101
102 value = pvt->ctl_error_info.nbsh;
103
104 return sprintf(data, "%x\n", value);
105} 8}
106 9
107/* accept and store the NBCFG (MCA NB Configuration) value user desires */ 10EDAC_DCT_ATTR_SHOW(dhar);
108static ssize_t amd64_nbcfg_store(struct mem_ctl_info *mci, 11EDAC_DCT_ATTR_SHOW(dbam0);
109 const char *data, size_t count) 12EDAC_DCT_ATTR_SHOW(top_mem);
110{ 13EDAC_DCT_ATTR_SHOW(top_mem2);
111 struct amd64_pvt *pvt = mci->pvt_info;
112 unsigned long value;
113 int ret = 0;
114
115 ret = strict_strtoul(data, 16, &value);
116 if (ret != -EINVAL) {
117 debugf0("received NBCFG= 0x%lx\n", value);
118
119 pvt->ctl_error_info.nbcfg = (u32) value;
120
121 return count;
122 }
123 return ret;
124}
125
126/* various show routines for the controls of a MCI */
127static ssize_t amd64_nbcfg_show(struct mem_ctl_info *mci, char *data)
128{
129 struct amd64_pvt *pvt = mci->pvt_info;
130
131 return sprintf(data, "%x\n", pvt->ctl_error_info.nbcfg);
132}
133
134
135static ssize_t amd64_dhar_show(struct mem_ctl_info *mci, char *data)
136{
137 struct amd64_pvt *pvt = mci->pvt_info;
138
139 return sprintf(data, "%x\n", pvt->dhar);
140}
141
142
143static ssize_t amd64_dbam_show(struct mem_ctl_info *mci, char *data)
144{
145 struct amd64_pvt *pvt = mci->pvt_info;
146
147 return sprintf(data, "%x\n", pvt->dbam0);
148}
149
150
151static ssize_t amd64_topmem_show(struct mem_ctl_info *mci, char *data)
152{
153 struct amd64_pvt *pvt = mci->pvt_info;
154
155 return sprintf(data, "%llx\n", pvt->top_mem);
156}
157
158
159static ssize_t amd64_topmem2_show(struct mem_ctl_info *mci, char *data)
160{
161 struct amd64_pvt *pvt = mci->pvt_info;
162
163 return sprintf(data, "%llx\n", pvt->top_mem2);
164}
165 14
166static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data) 15static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data)
167{ 16{
@@ -182,38 +31,6 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
182 31
183 { 32 {
184 .attr = { 33 .attr = {
185 .name = "nbea_ctl",
186 .mode = (S_IRUGO | S_IWUSR)
187 },
188 .show = amd64_nbea_show,
189 .store = amd64_nbea_store,
190 },
191 {
192 .attr = {
193 .name = "nbsl_ctl",
194 .mode = (S_IRUGO | S_IWUSR)
195 },
196 .show = amd64_nbsl_show,
197 .store = amd64_nbsl_store,
198 },
199 {
200 .attr = {
201 .name = "nbsh_ctl",
202 .mode = (S_IRUGO | S_IWUSR)
203 },
204 .show = amd64_nbsh_show,
205 .store = amd64_nbsh_store,
206 },
207 {
208 .attr = {
209 .name = "nbcfg_ctl",
210 .mode = (S_IRUGO | S_IWUSR)
211 },
212 .show = amd64_nbcfg_show,
213 .store = amd64_nbcfg_store,
214 },
215 {
216 .attr = {
217 .name = "dhar", 34 .name = "dhar",
218 .mode = (S_IRUGO) 35 .mode = (S_IRUGO)
219 }, 36 },
@@ -225,7 +42,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
225 .name = "dbam", 42 .name = "dbam",
226 .mode = (S_IRUGO) 43 .mode = (S_IRUGO)
227 }, 44 },
228 .show = amd64_dbam_show, 45 .show = amd64_dbam0_show,
229 .store = NULL, 46 .store = NULL,
230 }, 47 },
231 { 48 {
@@ -233,7 +50,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
233 .name = "topmem", 50 .name = "topmem",
234 .mode = (S_IRUGO) 51 .mode = (S_IRUGO)
235 }, 52 },
236 .show = amd64_topmem_show, 53 .show = amd64_top_mem_show,
237 .store = NULL, 54 .store = NULL,
238 }, 55 },
239 { 56 {
@@ -241,7 +58,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
241 .name = "topmem2", 58 .name = "topmem2",
242 .mode = (S_IRUGO) 59 .mode = (S_IRUGO)
243 }, 60 },
244 .show = amd64_topmem2_show, 61 .show = amd64_top_mem2_show,
245 .store = NULL, 62 .store = NULL,
246 }, 63 },
247 { 64 {
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 070968178a24..400de071cabc 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * file for managing the edac_device class of devices for EDAC 2 * file for managing the edac_device class of devices for EDAC
3 * 3 *
4 * (C) 2007 SoftwareBitMaker (http://www.softwarebitmaker.com) 4 * (C) 2007 SoftwareBitMaker
5 * 5 *
6 * This file may be distributed under the terms of the 6 * This file may be distributed under the terms of the
7 * GNU General Public License. 7 * GNU General Public License.
@@ -13,6 +13,7 @@
13#include <linux/ctype.h> 13#include <linux/ctype.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/edac.h>
16 17
17#include "edac_core.h" 18#include "edac_core.h"
18#include "edac_module.h" 19#include "edac_module.h"
@@ -235,7 +236,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
235 debugf1("%s()\n", __func__); 236 debugf1("%s()\n", __func__);
236 237
237 /* get the /sys/devices/system/edac reference */ 238 /* get the /sys/devices/system/edac reference */
238 edac_class = edac_get_edac_class(); 239 edac_class = edac_get_sysfs_class();
239 if (edac_class == NULL) { 240 if (edac_class == NULL) {
240 debugf1("%s() no edac_class error\n", __func__); 241 debugf1("%s() no edac_class error\n", __func__);
241 err = -ENODEV; 242 err = -ENODEV;
@@ -255,7 +256,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
255 256
256 if (!try_module_get(edac_dev->owner)) { 257 if (!try_module_get(edac_dev->owner)) {
257 err = -ENODEV; 258 err = -ENODEV;
258 goto err_out; 259 goto err_mod_get;
259 } 260 }
260 261
261 /* register */ 262 /* register */
@@ -282,6 +283,9 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
282err_kobj_reg: 283err_kobj_reg:
283 module_put(edac_dev->owner); 284 module_put(edac_dev->owner);
284 285
286err_mod_get:
287 edac_put_sysfs_class();
288
285err_out: 289err_out:
286 return err; 290 return err;
287} 291}
@@ -290,12 +294,11 @@ err_out:
290 * edac_device_unregister_sysfs_main_kobj: 294 * edac_device_unregister_sysfs_main_kobj:
291 * the '..../edac/<name>' kobject 295 * the '..../edac/<name>' kobject
292 */ 296 */
293void edac_device_unregister_sysfs_main_kobj( 297void edac_device_unregister_sysfs_main_kobj(struct edac_device_ctl_info *dev)
294 struct edac_device_ctl_info *edac_dev)
295{ 298{
296 debugf0("%s()\n", __func__); 299 debugf0("%s()\n", __func__);
297 debugf4("%s() name of kobject is: %s\n", 300 debugf4("%s() name of kobject is: %s\n",
298 __func__, kobject_name(&edac_dev->kobj)); 301 __func__, kobject_name(&dev->kobj));
299 302
300 /* 303 /*
301 * Unregister the edac device's kobject and 304 * Unregister the edac device's kobject and
@@ -304,7 +307,8 @@ void edac_device_unregister_sysfs_main_kobj(
304 * a) module_put() this module 307 * a) module_put() this module
305 * b) 'kfree' the memory 308 * b) 'kfree' the memory
306 */ 309 */
307 kobject_put(&edac_dev->kobj); 310 kobject_put(&dev->kobj);
311 edac_put_sysfs_class();
308} 312}
309 313
310/* edac_dev -> instance information */ 314/* edac_dev -> instance information */
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 2905dc103393..dce61f7ba38b 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/ctype.h> 12#include <linux/ctype.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/edac.h>
14#include <linux/bug.h> 15#include <linux/bug.h>
15 16
16#include "edac_core.h" 17#include "edac_core.h"
@@ -1021,13 +1022,13 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
1021 */ 1022 */
1022int edac_sysfs_setup_mc_kset(void) 1023int edac_sysfs_setup_mc_kset(void)
1023{ 1024{
1024 int err = 0; 1025 int err = -EINVAL;
1025 struct sysdev_class *edac_class; 1026 struct sysdev_class *edac_class;
1026 1027
1027 debugf1("%s()\n", __func__); 1028 debugf1("%s()\n", __func__);
1028 1029
1029 /* get the /sys/devices/system/edac class reference */ 1030 /* get the /sys/devices/system/edac class reference */
1030 edac_class = edac_get_edac_class(); 1031 edac_class = edac_get_sysfs_class();
1031 if (edac_class == NULL) { 1032 if (edac_class == NULL) {
1032 debugf1("%s() no edac_class error=%d\n", __func__, err); 1033 debugf1("%s() no edac_class error=%d\n", __func__, err);
1033 goto fail_out; 1034 goto fail_out;
@@ -1038,15 +1039,16 @@ int edac_sysfs_setup_mc_kset(void)
1038 if (!mc_kset) { 1039 if (!mc_kset) {
1039 err = -ENOMEM; 1040 err = -ENOMEM;
1040 debugf1("%s() Failed to register '.../edac/mc'\n", __func__); 1041 debugf1("%s() Failed to register '.../edac/mc'\n", __func__);
1041 goto fail_out; 1042 goto fail_kset;
1042 } 1043 }
1043 1044
1044 debugf1("%s() Registered '.../edac/mc' kobject\n", __func__); 1045 debugf1("%s() Registered '.../edac/mc' kobject\n", __func__);
1045 1046
1046 return 0; 1047 return 0;
1047 1048
1049fail_kset:
1050 edac_put_sysfs_class();
1048 1051
1049 /* error unwind stack */
1050fail_out: 1052fail_out:
1051 return err; 1053 return err;
1052} 1054}
@@ -1059,5 +1061,6 @@ fail_out:
1059void edac_sysfs_teardown_mc_kset(void) 1061void edac_sysfs_teardown_mc_kset(void)
1060{ 1062{
1061 kset_unregister(mc_kset); 1063 kset_unregister(mc_kset);
1064 edac_put_sysfs_class();
1062} 1065}
1063 1066
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c
deleted file mode 100644
index 9014df6f605d..000000000000
--- a/drivers/edac/edac_mce_amd.c
+++ /dev/null
@@ -1,452 +0,0 @@
1#include <linux/module.h>
2#include "edac_mce_amd.h"
3
4static bool report_gart_errors;
5static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
6
7void amd_report_gart_errors(bool v)
8{
9 report_gart_errors = v;
10}
11EXPORT_SYMBOL_GPL(amd_report_gart_errors);
12
13void amd_register_ecc_decoder(void (*f)(int, struct err_regs *))
14{
15 nb_bus_decoder = f;
16}
17EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
18
19void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *))
20{
21 if (nb_bus_decoder) {
22 WARN_ON(nb_bus_decoder != f);
23
24 nb_bus_decoder = NULL;
25 }
26}
27EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
28
29/*
30 * string representation for the different MCA reported error types, see F3x48
31 * or MSR0000_0411.
32 */
33const char *tt_msgs[] = { /* transaction type */
34 "instruction",
35 "data",
36 "generic",
37 "reserved"
38};
39EXPORT_SYMBOL_GPL(tt_msgs);
40
41const char *ll_msgs[] = { /* cache level */
42 "L0",
43 "L1",
44 "L2",
45 "L3/generic"
46};
47EXPORT_SYMBOL_GPL(ll_msgs);
48
49const char *rrrr_msgs[] = {
50 "generic",
51 "generic read",
52 "generic write",
53 "data read",
54 "data write",
55 "inst fetch",
56 "prefetch",
57 "evict",
58 "snoop",
59 "reserved RRRR= 9",
60 "reserved RRRR= 10",
61 "reserved RRRR= 11",
62 "reserved RRRR= 12",
63 "reserved RRRR= 13",
64 "reserved RRRR= 14",
65 "reserved RRRR= 15"
66};
67EXPORT_SYMBOL_GPL(rrrr_msgs);
68
69const char *pp_msgs[] = { /* participating processor */
70 "local node originated (SRC)",
71 "local node responded to request (RES)",
72 "local node observed as 3rd party (OBS)",
73 "generic"
74};
75EXPORT_SYMBOL_GPL(pp_msgs);
76
77const char *to_msgs[] = {
78 "no timeout",
79 "timed out"
80};
81EXPORT_SYMBOL_GPL(to_msgs);
82
83const char *ii_msgs[] = { /* memory or i/o */
84 "mem access",
85 "reserved",
86 "i/o access",
87 "generic"
88};
89EXPORT_SYMBOL_GPL(ii_msgs);
90
91/*
92 * Map the 4 or 5 (family-specific) bits of Extended Error code to the
93 * string table.
94 */
95const char *ext_msgs[] = {
96 "K8 ECC error", /* 0_0000b */
97 "CRC error on link", /* 0_0001b */
98 "Sync error packets on link", /* 0_0010b */
99 "Master Abort during link operation", /* 0_0011b */
100 "Target Abort during link operation", /* 0_0100b */
101 "Invalid GART PTE entry during table walk", /* 0_0101b */
102 "Unsupported atomic RMW command received", /* 0_0110b */
103 "WDT error: NB transaction timeout", /* 0_0111b */
104 "ECC/ChipKill ECC error", /* 0_1000b */
105 "SVM DEV Error", /* 0_1001b */
106 "Link Data error", /* 0_1010b */
107 "Link/L3/Probe Filter Protocol error", /* 0_1011b */
108 "NB Internal Arrays Parity error", /* 0_1100b */
109 "DRAM Address/Control Parity error", /* 0_1101b */
110 "Link Transmission error", /* 0_1110b */
111 "GART/DEV Table Walk Data error" /* 0_1111b */
112 "Res 0x100 error", /* 1_0000b */
113 "Res 0x101 error", /* 1_0001b */
114 "Res 0x102 error", /* 1_0010b */
115 "Res 0x103 error", /* 1_0011b */
116 "Res 0x104 error", /* 1_0100b */
117 "Res 0x105 error", /* 1_0101b */
118 "Res 0x106 error", /* 1_0110b */
119 "Res 0x107 error", /* 1_0111b */
120 "Res 0x108 error", /* 1_1000b */
121 "Res 0x109 error", /* 1_1001b */
122 "Res 0x10A error", /* 1_1010b */
123 "Res 0x10B error", /* 1_1011b */
124 "ECC error in L3 Cache Data", /* 1_1100b */
125 "L3 Cache Tag error", /* 1_1101b */
126 "L3 Cache LRU Parity error", /* 1_1110b */
127 "Probe Filter error" /* 1_1111b */
128};
129EXPORT_SYMBOL_GPL(ext_msgs);
130
131static void amd_decode_dc_mce(u64 mc0_status)
132{
133 u32 ec = mc0_status & 0xffff;
134 u32 xec = (mc0_status >> 16) & 0xf;
135
136 pr_emerg("Data Cache Error");
137
138 if (xec == 1 && TLB_ERROR(ec))
139 pr_cont(": %s TLB multimatch.\n", LL_MSG(ec));
140 else if (xec == 0) {
141 if (mc0_status & (1ULL << 40))
142 pr_cont(" during Data Scrub.\n");
143 else if (TLB_ERROR(ec))
144 pr_cont(": %s TLB parity error.\n", LL_MSG(ec));
145 else if (MEM_ERROR(ec)) {
146 u8 ll = ec & 0x3;
147 u8 tt = (ec >> 2) & 0x3;
148 u8 rrrr = (ec >> 4) & 0xf;
149
150 /* see F10h BKDG (31116), Table 92. */
151 if (ll == 0x1) {
152 if (tt != 0x1)
153 goto wrong_dc_mce;
154
155 pr_cont(": Data/Tag %s error.\n", RRRR_MSG(ec));
156
157 } else if (ll == 0x2 && rrrr == 0x3)
158 pr_cont(" during L1 linefill from L2.\n");
159 else
160 goto wrong_dc_mce;
161 } else if (BUS_ERROR(ec) && boot_cpu_data.x86 == 0xf)
162 pr_cont(" during system linefill.\n");
163 else
164 goto wrong_dc_mce;
165 } else
166 goto wrong_dc_mce;
167
168 return;
169
170wrong_dc_mce:
171 pr_warning("Corrupted DC MCE info?\n");
172}
173
174static void amd_decode_ic_mce(u64 mc1_status)
175{
176 u32 ec = mc1_status & 0xffff;
177 u32 xec = (mc1_status >> 16) & 0xf;
178
179 pr_emerg("Instruction Cache Error");
180
181 if (xec == 1 && TLB_ERROR(ec))
182 pr_cont(": %s TLB multimatch.\n", LL_MSG(ec));
183 else if (xec == 0) {
184 if (TLB_ERROR(ec))
185 pr_cont(": %s TLB Parity error.\n", LL_MSG(ec));
186 else if (BUS_ERROR(ec)) {
187 if (boot_cpu_data.x86 == 0xf &&
188 (mc1_status & (1ULL << 58)))
189 pr_cont(" during system linefill.\n");
190 else
191 pr_cont(" during attempted NB data read.\n");
192 } else if (MEM_ERROR(ec)) {
193 u8 ll = ec & 0x3;
194 u8 rrrr = (ec >> 4) & 0xf;
195
196 if (ll == 0x2)
197 pr_cont(" during a linefill from L2.\n");
198 else if (ll == 0x1) {
199
200 switch (rrrr) {
201 case 0x5:
202 pr_cont(": Parity error during "
203 "data load.\n");
204 break;
205
206 case 0x7:
207 pr_cont(": Copyback Parity/Victim"
208 " error.\n");
209 break;
210
211 case 0x8:
212 pr_cont(": Tag Snoop error.\n");
213 break;
214
215 default:
216 goto wrong_ic_mce;
217 break;
218 }
219 }
220 } else
221 goto wrong_ic_mce;
222 } else
223 goto wrong_ic_mce;
224
225 return;
226
227wrong_ic_mce:
228 pr_warning("Corrupted IC MCE info?\n");
229}
230
231static void amd_decode_bu_mce(u64 mc2_status)
232{
233 u32 ec = mc2_status & 0xffff;
234 u32 xec = (mc2_status >> 16) & 0xf;
235
236 pr_emerg("Bus Unit Error");
237
238 if (xec == 0x1)
239 pr_cont(" in the write data buffers.\n");
240 else if (xec == 0x3)
241 pr_cont(" in the victim data buffers.\n");
242 else if (xec == 0x2 && MEM_ERROR(ec))
243 pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec));
244 else if (xec == 0x0) {
245 if (TLB_ERROR(ec))
246 pr_cont(": %s error in a Page Descriptor Cache or "
247 "Guest TLB.\n", TT_MSG(ec));
248 else if (BUS_ERROR(ec))
249 pr_cont(": %s/ECC error in data read from NB: %s.\n",
250 RRRR_MSG(ec), PP_MSG(ec));
251 else if (MEM_ERROR(ec)) {
252 u8 rrrr = (ec >> 4) & 0xf;
253
254 if (rrrr >= 0x7)
255 pr_cont(": %s error during data copyback.\n",
256 RRRR_MSG(ec));
257 else if (rrrr <= 0x1)
258 pr_cont(": %s parity/ECC error during data "
259 "access from L2.\n", RRRR_MSG(ec));
260 else
261 goto wrong_bu_mce;
262 } else
263 goto wrong_bu_mce;
264 } else
265 goto wrong_bu_mce;
266
267 return;
268
269wrong_bu_mce:
270 pr_warning("Corrupted BU MCE info?\n");
271}
272
273static void amd_decode_ls_mce(u64 mc3_status)
274{
275 u32 ec = mc3_status & 0xffff;
276 u32 xec = (mc3_status >> 16) & 0xf;
277
278 pr_emerg("Load Store Error");
279
280 if (xec == 0x0) {
281 u8 rrrr = (ec >> 4) & 0xf;
282
283 if (!BUS_ERROR(ec) || (rrrr != 0x3 && rrrr != 0x4))
284 goto wrong_ls_mce;
285
286 pr_cont(" during %s.\n", RRRR_MSG(ec));
287 }
288 return;
289
290wrong_ls_mce:
291 pr_warning("Corrupted LS MCE info?\n");
292}
293
294void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors)
295{
296 u32 ec = ERROR_CODE(regs->nbsl);
297
298 if (!handle_errors)
299 return;
300
301 /*
302 * GART TLB error reporting is disabled by default. Bail out early.
303 */
304 if (TLB_ERROR(ec) && !report_gart_errors)
305 return;
306
307 pr_emerg("Northbridge Error, node %d", node_id);
308
309 /*
310 * F10h, revD can disable ErrCpu[3:0] so check that first and also the
311 * value encoding has changed so interpret those differently
312 */
313 if ((boot_cpu_data.x86 == 0x10) &&
314 (boot_cpu_data.x86_model > 7)) {
315 if (regs->nbsh & K8_NBSH_ERR_CPU_VAL)
316 pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf));
317 } else {
318 u8 assoc_cpus = regs->nbsh & 0xf;
319
320 if (assoc_cpus > 0)
321 pr_cont(", core: %d", fls(assoc_cpus) - 1);
322
323 pr_cont("\n");
324 }
325
326 pr_emerg("%s.\n", EXT_ERR_MSG(regs->nbsl));
327
328 if (BUS_ERROR(ec) && nb_bus_decoder)
329 nb_bus_decoder(node_id, regs);
330}
331EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
332
333static void amd_decode_fr_mce(u64 mc5_status)
334{
335 /* we have only one error signature so match all fields at once. */
336 if ((mc5_status & 0xffff) == 0x0f0f)
337 pr_emerg(" FR Error: CPU Watchdog timer expire.\n");
338 else
339 pr_warning("Corrupted FR MCE info?\n");
340}
341
342static inline void amd_decode_err_code(unsigned int ec)
343{
344 if (TLB_ERROR(ec)) {
345 pr_emerg("Transaction: %s, Cache Level %s\n",
346 TT_MSG(ec), LL_MSG(ec));
347 } else if (MEM_ERROR(ec)) {
348 pr_emerg("Transaction: %s, Type: %s, Cache Level: %s",
349 RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec));
350 } else if (BUS_ERROR(ec)) {
351 pr_emerg("Transaction type: %s(%s), %s, Cache Level: %s, "
352 "Participating Processor: %s\n",
353 RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec),
354 PP_MSG(ec));
355 } else
356 pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
357}
358
359static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
360 void *data)
361{
362 struct mce *m = (struct mce *)data;
363 struct err_regs regs;
364 int node, ecc;
365
366 pr_emerg("MC%d_STATUS: ", m->bank);
367
368 pr_cont("%sorrected error, other errors lost: %s, "
369 "CPU context corrupt: %s",
370 ((m->status & MCI_STATUS_UC) ? "Unc" : "C"),
371 ((m->status & MCI_STATUS_OVER) ? "yes" : "no"),
372 ((m->status & MCI_STATUS_PCC) ? "yes" : "no"));
373
374 /* do the two bits[14:13] together */
375 ecc = (m->status >> 45) & 0x3;
376 if (ecc)
377 pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U"));
378
379 pr_cont("\n");
380
381 switch (m->bank) {
382 case 0:
383 amd_decode_dc_mce(m->status);
384 break;
385
386 case 1:
387 amd_decode_ic_mce(m->status);
388 break;
389
390 case 2:
391 amd_decode_bu_mce(m->status);
392 break;
393
394 case 3:
395 amd_decode_ls_mce(m->status);
396 break;
397
398 case 4:
399 regs.nbsl = (u32) m->status;
400 regs.nbsh = (u32)(m->status >> 32);
401 regs.nbeal = (u32) m->addr;
402 regs.nbeah = (u32)(m->addr >> 32);
403 node = amd_get_nb_id(m->extcpu);
404
405 amd_decode_nb_mce(node, &regs, 1);
406 break;
407
408 case 5:
409 amd_decode_fr_mce(m->status);
410 break;
411
412 default:
413 break;
414 }
415
416 amd_decode_err_code(m->status & 0xffff);
417
418 return NOTIFY_STOP;
419}
420
421static struct notifier_block amd_mce_dec_nb = {
422 .notifier_call = amd_decode_mce,
423};
424
425static int __init mce_amd_init(void)
426{
427 /*
428 * We can decode MCEs for K8, F10h and F11h CPUs:
429 */
430 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
431 return 0;
432
433 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
434 return 0;
435
436 atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
437
438 return 0;
439}
440early_initcall(mce_amd_init);
441
442#ifdef MODULE
443static void __exit mce_amd_exit(void)
444{
445 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
446}
447
448MODULE_DESCRIPTION("AMD MCE decoder");
449MODULE_ALIAS("edac-mce-amd");
450MODULE_LICENSE("GPL");
451module_exit(mce_amd_exit);
452#endif
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c
index 7e1374afd967..be4b075c3098 100644
--- a/drivers/edac/edac_module.c
+++ b/drivers/edac/edac_module.c
@@ -27,15 +27,6 @@ EXPORT_SYMBOL_GPL(edac_debug_level);
27struct workqueue_struct *edac_workqueue; 27struct workqueue_struct *edac_workqueue;
28 28
29/* 29/*
30 * sysfs object: /sys/devices/system/edac
31 * need to export to other files in this modules
32 */
33static struct sysdev_class edac_class = {
34 .name = "edac",
35};
36static int edac_class_valid;
37
38/*
39 * edac_op_state_to_string() 30 * edac_op_state_to_string()
40 */ 31 */
41char *edac_op_state_to_string(int opstate) 32char *edac_op_state_to_string(int opstate)
@@ -55,60 +46,6 @@ char *edac_op_state_to_string(int opstate)
55} 46}
56 47
57/* 48/*
58 * edac_get_edac_class()
59 *
60 * return pointer to the edac class of 'edac'
61 */
62struct sysdev_class *edac_get_edac_class(void)
63{
64 struct sysdev_class *classptr = NULL;
65
66 if (edac_class_valid)
67 classptr = &edac_class;
68
69 return classptr;
70}
71
72/*
73 * edac_register_sysfs_edac_name()
74 *
75 * register the 'edac' into /sys/devices/system
76 *
77 * return:
78 * 0 success
79 * !0 error
80 */
81static int edac_register_sysfs_edac_name(void)
82{
83 int err;
84
85 /* create the /sys/devices/system/edac directory */
86 err = sysdev_class_register(&edac_class);
87
88 if (err) {
89 debugf1("%s() error=%d\n", __func__, err);
90 return err;
91 }
92
93 edac_class_valid = 1;
94 return 0;
95}
96
97/*
98 * sysdev_class_unregister()
99 *
100 * unregister the 'edac' from /sys/devices/system
101 */
102static void edac_unregister_sysfs_edac_name(void)
103{
104 /* only if currently registered, then unregister it */
105 if (edac_class_valid)
106 sysdev_class_unregister(&edac_class);
107
108 edac_class_valid = 0;
109}
110
111/*
112 * edac_workqueue_setup 49 * edac_workqueue_setup
113 * initialize the edac work queue for polling operations 50 * initialize the edac work queue for polling operations
114 */ 51 */
@@ -154,21 +91,11 @@ static int __init edac_init(void)
154 edac_pci_clear_parity_errors(); 91 edac_pci_clear_parity_errors();
155 92
156 /* 93 /*
157 * perform the registration of the /sys/devices/system/edac class object
158 */
159 if (edac_register_sysfs_edac_name()) {
160 edac_printk(KERN_ERR, EDAC_MC,
161 "Error initializing 'edac' kobject\n");
162 err = -ENODEV;
163 goto error;
164 }
165
166 /*
167 * now set up the mc_kset under the edac class object 94 * now set up the mc_kset under the edac class object
168 */ 95 */
169 err = edac_sysfs_setup_mc_kset(); 96 err = edac_sysfs_setup_mc_kset();
170 if (err) 97 if (err)
171 goto sysfs_setup_fail; 98 goto error;
172 99
173 /* Setup/Initialize the workq for this core */ 100 /* Setup/Initialize the workq for this core */
174 err = edac_workqueue_setup(); 101 err = edac_workqueue_setup();
@@ -183,9 +110,6 @@ static int __init edac_init(void)
183workq_fail: 110workq_fail:
184 edac_sysfs_teardown_mc_kset(); 111 edac_sysfs_teardown_mc_kset();
185 112
186sysfs_setup_fail:
187 edac_unregister_sysfs_edac_name();
188
189error: 113error:
190 return err; 114 return err;
191} 115}
@@ -201,7 +125,6 @@ static void __exit edac_exit(void)
201 /* tear down the various subsystems */ 125 /* tear down the various subsystems */
202 edac_workqueue_teardown(); 126 edac_workqueue_teardown();
203 edac_sysfs_teardown_mc_kset(); 127 edac_sysfs_teardown_mc_kset();
204 edac_unregister_sysfs_edac_name();
205} 128}
206 129
207/* 130/*
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index 233d4798c3aa..17aabb7b90ec 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -42,7 +42,6 @@ extern void edac_device_unregister_sysfs_main_kobj(
42 struct edac_device_ctl_info *edac_dev); 42 struct edac_device_ctl_info *edac_dev);
43extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev); 43extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev);
44extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev); 44extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev);
45extern struct sysdev_class *edac_get_edac_class(void);
46 45
47/* edac core workqueue: single CPU mode */ 46/* edac core workqueue: single CPU mode */
48extern struct workqueue_struct *edac_workqueue; 47extern struct workqueue_struct *edac_workqueue;
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index c39697df9cb4..023b01cb5175 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -7,7 +7,7 @@
7 * 7 *
8 */ 8 */
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/sysdev.h> 10#include <linux/edac.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/ctype.h> 12#include <linux/ctype.h>
13 13
@@ -354,7 +354,7 @@ static int edac_pci_main_kobj_setup(void)
354 /* First time, so create the main kobject and its 354 /* First time, so create the main kobject and its
355 * controls and atributes 355 * controls and atributes
356 */ 356 */
357 edac_class = edac_get_edac_class(); 357 edac_class = edac_get_sysfs_class();
358 if (edac_class == NULL) { 358 if (edac_class == NULL) {
359 debugf1("%s() no edac_class\n", __func__); 359 debugf1("%s() no edac_class\n", __func__);
360 err = -ENODEV; 360 err = -ENODEV;
@@ -368,7 +368,7 @@ static int edac_pci_main_kobj_setup(void)
368 if (!try_module_get(THIS_MODULE)) { 368 if (!try_module_get(THIS_MODULE)) {
369 debugf1("%s() try_module_get() failed\n", __func__); 369 debugf1("%s() try_module_get() failed\n", __func__);
370 err = -ENODEV; 370 err = -ENODEV;
371 goto decrement_count_fail; 371 goto mod_get_fail;
372 } 372 }
373 373
374 edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); 374 edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
@@ -403,6 +403,9 @@ kobject_init_and_add_fail:
403kzalloc_fail: 403kzalloc_fail:
404 module_put(THIS_MODULE); 404 module_put(THIS_MODULE);
405 405
406mod_get_fail:
407 edac_put_sysfs_class();
408
406decrement_count_fail: 409decrement_count_fail:
407 /* if are on this error exit, nothing to tear down */ 410 /* if are on this error exit, nothing to tear down */
408 atomic_dec(&edac_pci_sysfs_refcount); 411 atomic_dec(&edac_pci_sysfs_refcount);
@@ -429,6 +432,7 @@ static void edac_pci_main_kobj_teardown(void)
429 __func__); 432 __func__);
430 kobject_put(edac_pci_top_main_kobj); 433 kobject_put(edac_pci_top_main_kobj);
431 } 434 }
435 edac_put_sysfs_class();
432} 436}
433 437
434/* 438/*
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index 20b428aa155e..aab970760b75 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -3,10 +3,13 @@
3 * 3 *
4 * Author: Dave Jiang <djiang@mvista.com> 4 * Author: Dave Jiang <djiang@mvista.com>
5 * 5 *
6 * 2007 (c) MontaVista Software, Inc. This file is licensed under 6 * 2007 (c) MontaVista Software, Inc.
7 * the terms of the GNU General Public License version 2. This program 7 * 2010 (c) Advanced Micro Devices Inc.
8 * is licensed "as is" without any warranty of any kind, whether express 8 * Borislav Petkov <borislav.petkov@amd.com>
9 * or implied. 9 *
10 * This file is licensed under the terms of the GNU General Public
11 * License version 2. This program is licensed "as is" without any
12 * warranty of any kind, whether express or implied.
10 * 13 *
11 */ 14 */
12#include <linux/module.h> 15#include <linux/module.h>
@@ -23,6 +26,8 @@ EXPORT_SYMBOL_GPL(edac_handlers);
23int edac_err_assert = 0; 26int edac_err_assert = 0;
24EXPORT_SYMBOL_GPL(edac_err_assert); 27EXPORT_SYMBOL_GPL(edac_err_assert);
25 28
29static atomic_t edac_class_valid = ATOMIC_INIT(0);
30
26/* 31/*
27 * called to determine if there is an EDAC driver interested in 32 * called to determine if there is an EDAC driver interested in
28 * knowing an event (such as NMI) occurred 33 * knowing an event (such as NMI) occurred
@@ -44,3 +49,41 @@ void edac_atomic_assert_error(void)
44 edac_err_assert++; 49 edac_err_assert++;
45} 50}
46EXPORT_SYMBOL_GPL(edac_atomic_assert_error); 51EXPORT_SYMBOL_GPL(edac_atomic_assert_error);
52
53/*
54 * sysfs object: /sys/devices/system/edac
55 * need to export to other files
56 */
57struct sysdev_class edac_class = {
58 .name = "edac",
59};
60EXPORT_SYMBOL_GPL(edac_class);
61
62/* return pointer to the 'edac' node in sysfs */
63struct sysdev_class *edac_get_sysfs_class(void)
64{
65 int err = 0;
66
67 if (atomic_read(&edac_class_valid))
68 goto out;
69
70 /* create the /sys/devices/system/edac directory */
71 err = sysdev_class_register(&edac_class);
72 if (err) {
73 printk(KERN_ERR "Error registering toplevel EDAC sysfs dir\n");
74 return NULL;
75 }
76
77out:
78 atomic_inc(&edac_class_valid);
79 return &edac_class;
80}
81EXPORT_SYMBOL_GPL(edac_get_sysfs_class);
82
83void edac_put_sysfs_class(void)
84{
85 /* last user unregisters it */
86 if (atomic_dec_and_test(&edac_class_valid))
87 sysdev_class_unregister(&edac_class);
88}
89EXPORT_SYMBOL_GPL(edac_put_sysfs_class);
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
new file mode 100644
index 000000000000..05523b504271
--- /dev/null
+++ b/drivers/edac/i7300_edac.c
@@ -0,0 +1,1247 @@
1/*
2 * Intel 7300 class Memory Controllers kernel module (Clarksboro)
3 *
4 * This file may be distributed under the terms of the
5 * GNU General Public License version 2 only.
6 *
7 * Copyright (c) 2010 by:
8 * Mauro Carvalho Chehab <mchehab@redhat.com>
9 *
10 * Red Hat Inc. http://www.redhat.com
11 *
12 * Intel 7300 Chipset Memory Controller Hub (MCH) - Datasheet
13 * http://www.intel.com/Assets/PDF/datasheet/318082.pdf
14 *
15 * TODO: The chipset allow checking for PCI Express errors also. Currently,
16 * the driver covers only memory error errors
17 *
18 * This driver uses "csrows" EDAC attribute to represent DIMM slot#
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/pci.h>
24#include <linux/pci_ids.h>
25#include <linux/slab.h>
26#include <linux/edac.h>
27#include <linux/mmzone.h>
28
29#include "edac_core.h"
30
31/*
32 * Alter this version for the I7300 module when modifications are made
33 */
34#define I7300_REVISION " Ver: 1.0.0 " __DATE__
35
36#define EDAC_MOD_STR "i7300_edac"
37
38#define i7300_printk(level, fmt, arg...) \
39 edac_printk(level, "i7300", fmt, ##arg)
40
41#define i7300_mc_printk(mci, level, fmt, arg...) \
42 edac_mc_chipset_printk(mci, level, "i7300", fmt, ##arg)
43
44/***********************************************
45 * i7300 Limit constants Structs and static vars
46 ***********************************************/
47
48/*
49 * Memory topology is organized as:
50 * Branch 0 - 2 channels: channels 0 and 1 (FDB0 PCI dev 21.0)
51 * Branch 1 - 2 channels: channels 2 and 3 (FDB1 PCI dev 22.0)
52 * Each channel can have to 8 DIMM sets (called as SLOTS)
53 * Slots should generally be filled in pairs
54 * Except on Single Channel mode of operation
55 * just slot 0/channel0 filled on this mode
56 * On normal operation mode, the two channels on a branch should be
57 * filled together for the same SLOT#
58 * When in mirrored mode, Branch 1 replicate memory at Branch 0, so, the four
59 * channels on both branches should be filled
60 */
61
62/* Limits for i7300 */
63#define MAX_SLOTS 8
64#define MAX_BRANCHES 2
65#define MAX_CH_PER_BRANCH 2
66#define MAX_CHANNELS (MAX_CH_PER_BRANCH * MAX_BRANCHES)
67#define MAX_MIR 3
68
69#define to_channel(ch, branch) ((((branch)) << 1) | (ch))
70
71#define to_csrow(slot, ch, branch) \
72 (to_channel(ch, branch) | ((slot) << 2))
73
74/* Device name and register DID (Device ID) */
75struct i7300_dev_info {
76 const char *ctl_name; /* name for this device */
77 u16 fsb_mapping_errors; /* DID for the branchmap,control */
78};
79
80/* Table of devices attributes supported by this driver */
81static const struct i7300_dev_info i7300_devs[] = {
82 {
83 .ctl_name = "I7300",
84 .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7300_MCH_ERR,
85 },
86};
87
88struct i7300_dimm_info {
89 int megabytes; /* size, 0 means not present */
90};
91
92/* driver private data structure */
93struct i7300_pvt {
94 struct pci_dev *pci_dev_16_0_fsb_ctlr; /* 16.0 */
95 struct pci_dev *pci_dev_16_1_fsb_addr_map; /* 16.1 */
96 struct pci_dev *pci_dev_16_2_fsb_err_regs; /* 16.2 */
97 struct pci_dev *pci_dev_2x_0_fbd_branch[MAX_BRANCHES]; /* 21.0 and 22.0 */
98
99 u16 tolm; /* top of low memory */
100 u64 ambase; /* AMB BAR */
101
102 u32 mc_settings; /* Report several settings */
103 u32 mc_settings_a;
104
105 u16 mir[MAX_MIR]; /* Memory Interleave Reg*/
106
107 u16 mtr[MAX_SLOTS][MAX_BRANCHES]; /* Memory Technlogy Reg */
108 u16 ambpresent[MAX_CHANNELS]; /* AMB present regs */
109
110 /* DIMM information matrix, allocating architecture maximums */
111 struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS];
112
113 /* Temporary buffer for use when preparing error messages */
114 char *tmp_prt_buffer;
115};
116
117/* FIXME: Why do we need to have this static? */
118static struct edac_pci_ctl_info *i7300_pci;
119
120/***************************************************
121 * i7300 Register definitions for memory enumeration
122 ***************************************************/
123
124/*
125 * Device 16,
126 * Function 0: System Address (not documented)
127 * Function 1: Memory Branch Map, Control, Errors Register
128 */
129
130 /* OFFSETS for Function 0 */
131#define AMBASE 0x48 /* AMB Mem Mapped Reg Region Base */
132#define MAXCH 0x56 /* Max Channel Number */
133#define MAXDIMMPERCH 0x57 /* Max DIMM PER Channel Number */
134
135 /* OFFSETS for Function 1 */
136#define MC_SETTINGS 0x40
137 #define IS_MIRRORED(mc) ((mc) & (1 << 16))
138 #define IS_ECC_ENABLED(mc) ((mc) & (1 << 5))
139 #define IS_RETRY_ENABLED(mc) ((mc) & (1 << 31))
140 #define IS_SCRBALGO_ENHANCED(mc) ((mc) & (1 << 8))
141
142#define MC_SETTINGS_A 0x58
143 #define IS_SINGLE_MODE(mca) ((mca) & (1 << 14))
144
145#define TOLM 0x6C
146
147#define MIR0 0x80
148#define MIR1 0x84
149#define MIR2 0x88
150
151/*
152 * Note: Other Intel EDAC drivers use AMBPRESENT to identify if the available
153 * memory. From datasheet item 7.3.1 (FB-DIMM technology & organization), it
154 * seems that we cannot use this information directly for the same usage.
155 * Each memory slot may have up to 2 AMB interfaces, one for income and another
156 * for outcome interface to the next slot.
157 * For now, the driver just stores the AMB present registers, but rely only at
158 * the MTR info to detect memory.
159 * Datasheet is also not clear about how to map each AMBPRESENT registers to
160 * one of the 4 available channels.
161 */
162#define AMBPRESENT_0 0x64
163#define AMBPRESENT_1 0x66
164
165const static u16 mtr_regs[MAX_SLOTS] = {
166 0x80, 0x84, 0x88, 0x8c,
167 0x82, 0x86, 0x8a, 0x8e
168};
169
170/*
171 * Defines to extract the vaious fields from the
172 * MTRx - Memory Technology Registers
173 */
174#define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 8))
175#define MTR_DIMMS_ETHROTTLE(mtr) ((mtr) & (1 << 7))
176#define MTR_DRAM_WIDTH(mtr) (((mtr) & (1 << 6)) ? 8 : 4)
177#define MTR_DRAM_BANKS(mtr) (((mtr) & (1 << 5)) ? 8 : 4)
178#define MTR_DIMM_RANKS(mtr) (((mtr) & (1 << 4)) ? 1 : 0)
179#define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3)
180#define MTR_DRAM_BANKS_ADDR_BITS 2
181#define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13)
182#define MTR_DIMM_COLS(mtr) ((mtr) & 0x3)
183#define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10)
184
185#ifdef CONFIG_EDAC_DEBUG
186/* MTR NUMROW */
187static const char *numrow_toString[] = {
188 "8,192 - 13 rows",
189 "16,384 - 14 rows",
190 "32,768 - 15 rows",
191 "65,536 - 16 rows"
192};
193
194/* MTR NUMCOL */
195static const char *numcol_toString[] = {
196 "1,024 - 10 columns",
197 "2,048 - 11 columns",
198 "4,096 - 12 columns",
199 "reserved"
200};
201#endif
202
203/************************************************
204 * i7300 Register definitions for error detection
205 ************************************************/
206
207/*
208 * Device 16.1: FBD Error Registers
209 */
210#define FERR_FAT_FBD 0x98
211static const char *ferr_fat_fbd_name[] = {
212 [22] = "Non-Redundant Fast Reset Timeout",
213 [2] = ">Tmid Thermal event with intelligent throttling disabled",
214 [1] = "Memory or FBD configuration CRC read error",
215 [0] = "Memory Write error on non-redundant retry or "
216 "FBD configuration Write error on retry",
217};
218#define GET_FBD_FAT_IDX(fbderr) (fbderr & (3 << 28))
219#define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3))
220
221#define FERR_NF_FBD 0xa0
222static const char *ferr_nf_fbd_name[] = {
223 [24] = "DIMM-Spare Copy Completed",
224 [23] = "DIMM-Spare Copy Initiated",
225 [22] = "Redundant Fast Reset Timeout",
226 [21] = "Memory Write error on redundant retry",
227 [18] = "SPD protocol Error",
228 [17] = "FBD Northbound parity error on FBD Sync Status",
229 [16] = "Correctable Patrol Data ECC",
230 [15] = "Correctable Resilver- or Spare-Copy Data ECC",
231 [14] = "Correctable Mirrored Demand Data ECC",
232 [13] = "Correctable Non-Mirrored Demand Data ECC",
233 [11] = "Memory or FBD configuration CRC read error",
234 [10] = "FBD Configuration Write error on first attempt",
235 [9] = "Memory Write error on first attempt",
236 [8] = "Non-Aliased Uncorrectable Patrol Data ECC",
237 [7] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
238 [6] = "Non-Aliased Uncorrectable Mirrored Demand Data ECC",
239 [5] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC",
240 [4] = "Aliased Uncorrectable Patrol Data ECC",
241 [3] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
242 [2] = "Aliased Uncorrectable Mirrored Demand Data ECC",
243 [1] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
244 [0] = "Uncorrectable Data ECC on Replay",
245};
246#define GET_FBD_NF_IDX(fbderr) (fbderr & (3 << 28))
247#define FERR_NF_FBD_ERR_MASK ((1 << 24) | (1 << 23) | (1 << 22) | (1 << 21) |\
248 (1 << 18) | (1 << 17) | (1 << 16) | (1 << 15) |\
249 (1 << 14) | (1 << 13) | (1 << 11) | (1 << 10) |\
250 (1 << 9) | (1 << 8) | (1 << 7) | (1 << 6) |\
251 (1 << 5) | (1 << 4) | (1 << 3) | (1 << 2) |\
252 (1 << 1) | (1 << 0))
253
254#define EMASK_FBD 0xa8
255#define EMASK_FBD_ERR_MASK ((1 << 27) | (1 << 26) | (1 << 25) | (1 << 24) |\
256 (1 << 22) | (1 << 21) | (1 << 20) | (1 << 19) |\
257 (1 << 18) | (1 << 17) | (1 << 16) | (1 << 14) |\
258 (1 << 13) | (1 << 12) | (1 << 11) | (1 << 10) |\
259 (1 << 9) | (1 << 8) | (1 << 7) | (1 << 6) |\
260 (1 << 5) | (1 << 4) | (1 << 3) | (1 << 2) |\
261 (1 << 1) | (1 << 0))
262
263/*
264 * Device 16.2: Global Error Registers
265 */
266
267#define FERR_GLOBAL_HI 0x48
268static const char *ferr_global_hi_name[] = {
269 [3] = "FSB 3 Fatal Error",
270 [2] = "FSB 2 Fatal Error",
271 [1] = "FSB 1 Fatal Error",
272 [0] = "FSB 0 Fatal Error",
273};
274#define ferr_global_hi_is_fatal(errno) 1
275
276#define FERR_GLOBAL_LO 0x40
277static const char *ferr_global_lo_name[] = {
278 [31] = "Internal MCH Fatal Error",
279 [30] = "Intel QuickData Technology Device Fatal Error",
280 [29] = "FSB1 Fatal Error",
281 [28] = "FSB0 Fatal Error",
282 [27] = "FBD Channel 3 Fatal Error",
283 [26] = "FBD Channel 2 Fatal Error",
284 [25] = "FBD Channel 1 Fatal Error",
285 [24] = "FBD Channel 0 Fatal Error",
286 [23] = "PCI Express Device 7Fatal Error",
287 [22] = "PCI Express Device 6 Fatal Error",
288 [21] = "PCI Express Device 5 Fatal Error",
289 [20] = "PCI Express Device 4 Fatal Error",
290 [19] = "PCI Express Device 3 Fatal Error",
291 [18] = "PCI Express Device 2 Fatal Error",
292 [17] = "PCI Express Device 1 Fatal Error",
293 [16] = "ESI Fatal Error",
294 [15] = "Internal MCH Non-Fatal Error",
295 [14] = "Intel QuickData Technology Device Non Fatal Error",
296 [13] = "FSB1 Non-Fatal Error",
297 [12] = "FSB 0 Non-Fatal Error",
298 [11] = "FBD Channel 3 Non-Fatal Error",
299 [10] = "FBD Channel 2 Non-Fatal Error",
300 [9] = "FBD Channel 1 Non-Fatal Error",
301 [8] = "FBD Channel 0 Non-Fatal Error",
302 [7] = "PCI Express Device 7 Non-Fatal Error",
303 [6] = "PCI Express Device 6 Non-Fatal Error",
304 [5] = "PCI Express Device 5 Non-Fatal Error",
305 [4] = "PCI Express Device 4 Non-Fatal Error",
306 [3] = "PCI Express Device 3 Non-Fatal Error",
307 [2] = "PCI Express Device 2 Non-Fatal Error",
308 [1] = "PCI Express Device 1 Non-Fatal Error",
309 [0] = "ESI Non-Fatal Error",
310};
311#define ferr_global_lo_is_fatal(errno) ((errno < 16) ? 0 : 1)
312
313#define NRECMEMA 0xbe
314 #define NRECMEMA_BANK(v) (((v) >> 12) & 7)
315 #define NRECMEMA_RANK(v) (((v) >> 8) & 15)
316
317#define NRECMEMB 0xc0
318 #define NRECMEMB_IS_WR(v) ((v) & (1 << 31))
319 #define NRECMEMB_CAS(v) (((v) >> 16) & 0x1fff)
320 #define NRECMEMB_RAS(v) ((v) & 0xffff)
321
322#define REDMEMA 0xdc
323
324#define REDMEMB 0x7c
325 #define IS_SECOND_CH(v) ((v) * (1 << 17))
326
327#define RECMEMA 0xe0
328 #define RECMEMA_BANK(v) (((v) >> 12) & 7)
329 #define RECMEMA_RANK(v) (((v) >> 8) & 15)
330
331#define RECMEMB 0xe4
332 #define RECMEMB_IS_WR(v) ((v) & (1 << 31))
333 #define RECMEMB_CAS(v) (((v) >> 16) & 0x1fff)
334 #define RECMEMB_RAS(v) ((v) & 0xffff)
335
336/********************************************
337 * i7300 Functions related to error detection
338 ********************************************/
339
340/**
341 * get_err_from_table() - Gets the error message from a table
342 * @table: table name (array of char *)
343 * @size: number of elements at the table
344 * @pos: position of the element to be returned
345 *
346 * This is a small routine that gets the pos-th element of a table. If the
347 * element doesn't exist (or it is empty), it returns "reserved".
348 * Instead of calling it directly, the better is to call via the macro
349 * GET_ERR_FROM_TABLE(), that automatically checks the table size via
350 * ARRAY_SIZE() macro
351 */
352static const char *get_err_from_table(const char *table[], int size, int pos)
353{
354 if (unlikely(pos >= size))
355 return "Reserved";
356
357 if (unlikely(!table[pos]))
358 return "Reserved";
359
360 return table[pos];
361}
362
363#define GET_ERR_FROM_TABLE(table, pos) \
364 get_err_from_table(table, ARRAY_SIZE(table), pos)
365
366/**
367 * i7300_process_error_global() - Retrieve the hardware error information from
368 * the hardware global error registers and
369 * sends it to dmesg
370 * @mci: struct mem_ctl_info pointer
371 */
372static void i7300_process_error_global(struct mem_ctl_info *mci)
373{
374 struct i7300_pvt *pvt;
375 u32 errnum, value;
376 unsigned long errors;
377 const char *specific;
378 bool is_fatal;
379
380 pvt = mci->pvt_info;
381
382 /* read in the 1st FATAL error register */
383 pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
384 FERR_GLOBAL_HI, &value);
385 if (unlikely(value)) {
386 errors = value;
387 errnum = find_first_bit(&errors,
388 ARRAY_SIZE(ferr_global_hi_name));
389 specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum);
390 is_fatal = ferr_global_hi_is_fatal(errnum);
391
392 /* Clear the error bit */
393 pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
394 FERR_GLOBAL_HI, value);
395
396 goto error_global;
397 }
398
399 pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
400 FERR_GLOBAL_LO, &value);
401 if (unlikely(value)) {
402 errors = value;
403 errnum = find_first_bit(&errors,
404 ARRAY_SIZE(ferr_global_lo_name));
405 specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum);
406 is_fatal = ferr_global_lo_is_fatal(errnum);
407
408 /* Clear the error bit */
409 pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
410 FERR_GLOBAL_LO, value);
411
412 goto error_global;
413 }
414 return;
415
416error_global:
417 i7300_mc_printk(mci, KERN_EMERG, "%s misc error: %s\n",
418 is_fatal ? "Fatal" : "NOT fatal", specific);
419}
420
421/**
422 * i7300_process_fbd_error() - Retrieve the hardware error information from
423 * the FBD error registers and sends it via
424 * EDAC error API calls
425 * @mci: struct mem_ctl_info pointer
426 */
427static void i7300_process_fbd_error(struct mem_ctl_info *mci)
428{
429 struct i7300_pvt *pvt;
430 u32 errnum, value;
431 u16 val16;
432 unsigned branch, channel, bank, rank, cas, ras;
433 u32 syndrome;
434
435 unsigned long errors;
436 const char *specific;
437 bool is_wr;
438
439 pvt = mci->pvt_info;
440
441 /* read in the 1st FATAL error register */
442 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
443 FERR_FAT_FBD, &value);
444 if (unlikely(value & FERR_FAT_FBD_ERR_MASK)) {
445 errors = value & FERR_FAT_FBD_ERR_MASK ;
446 errnum = find_first_bit(&errors,
447 ARRAY_SIZE(ferr_fat_fbd_name));
448 specific = GET_ERR_FROM_TABLE(ferr_fat_fbd_name, errnum);
449
450 branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0;
451 pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
452 NRECMEMA, &val16);
453 bank = NRECMEMA_BANK(val16);
454 rank = NRECMEMA_RANK(val16);
455
456 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
457 NRECMEMB, &value);
458
459 is_wr = NRECMEMB_IS_WR(value);
460 cas = NRECMEMB_CAS(value);
461 ras = NRECMEMB_RAS(value);
462
463 snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
464 "FATAL (Branch=%d DRAM-Bank=%d %s "
465 "RAS=%d CAS=%d Err=0x%lx (%s))",
466 branch, bank,
467 is_wr ? "RDWR" : "RD",
468 ras, cas,
469 errors, specific);
470
471 /* Call the helper to output message */
472 edac_mc_handle_fbd_ue(mci, rank, branch << 1,
473 (branch << 1) + 1,
474 pvt->tmp_prt_buffer);
475 }
476
477 /* read in the 1st NON-FATAL error register */
478 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
479 FERR_NF_FBD, &value);
480 if (unlikely(value & FERR_NF_FBD_ERR_MASK)) {
481 errors = value & FERR_NF_FBD_ERR_MASK;
482 errnum = find_first_bit(&errors,
483 ARRAY_SIZE(ferr_nf_fbd_name));
484 specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum);
485
486 /* Clear the error bit */
487 pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
488 FERR_GLOBAL_LO, value);
489
490 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
491 REDMEMA, &syndrome);
492
493 branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0;
494 pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
495 RECMEMA, &val16);
496 bank = RECMEMA_BANK(val16);
497 rank = RECMEMA_RANK(val16);
498
499 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
500 RECMEMB, &value);
501
502 is_wr = RECMEMB_IS_WR(value);
503 cas = RECMEMB_CAS(value);
504 ras = RECMEMB_RAS(value);
505
506 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
507 REDMEMB, &value);
508
509 channel = (branch << 1);
510 if (IS_SECOND_CH(value))
511 channel++;
512
513 /* Form out message */
514 snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
515 "Corrected error (Branch=%d, Channel %d), "
516 " DRAM-Bank=%d %s "
517 "RAS=%d CAS=%d, CE Err=0x%lx, Syndrome=0x%08x(%s))",
518 branch, channel,
519 bank,
520 is_wr ? "RDWR" : "RD",
521 ras, cas,
522 errors, syndrome, specific);
523
524 /*
525 * Call the helper to output message
526 * NOTE: Errors are reported per-branch, and not per-channel
527 * Currently, we don't know how to identify the right
528 * channel.
529 */
530 edac_mc_handle_fbd_ce(mci, rank, channel,
531 pvt->tmp_prt_buffer);
532 }
533 return;
534}
535
536/**
537 * i7300_check_error() - Calls the error checking subroutines
538 * @mci: struct mem_ctl_info pointer
539 */
540static void i7300_check_error(struct mem_ctl_info *mci)
541{
542 i7300_process_error_global(mci);
543 i7300_process_fbd_error(mci);
544};
545
546/**
547 * i7300_clear_error() - Clears the error registers
548 * @mci: struct mem_ctl_info pointer
549 */
550static void i7300_clear_error(struct mem_ctl_info *mci)
551{
552 struct i7300_pvt *pvt = mci->pvt_info;
553 u32 value;
554 /*
555 * All error values are RWC - we need to read and write 1 to the
556 * bit that we want to cleanup
557 */
558
559 /* Clear global error registers */
560 pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
561 FERR_GLOBAL_HI, &value);
562 pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
563 FERR_GLOBAL_HI, value);
564
565 pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
566 FERR_GLOBAL_LO, &value);
567 pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
568 FERR_GLOBAL_LO, value);
569
570 /* Clear FBD error registers */
571 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
572 FERR_FAT_FBD, &value);
573 pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
574 FERR_FAT_FBD, value);
575
576 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
577 FERR_NF_FBD, &value);
578 pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
579 FERR_NF_FBD, value);
580}
581
582/**
583 * i7300_enable_error_reporting() - Enable the memory reporting logic at the
584 * hardware
585 * @mci: struct mem_ctl_info pointer
586 */
587static void i7300_enable_error_reporting(struct mem_ctl_info *mci)
588{
589 struct i7300_pvt *pvt = mci->pvt_info;
590 u32 fbd_error_mask;
591
592 /* Read the FBD Error Mask Register */
593 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
594 EMASK_FBD, &fbd_error_mask);
595
596 /* Enable with a '0' */
597 fbd_error_mask &= ~(EMASK_FBD_ERR_MASK);
598
599 pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
600 EMASK_FBD, fbd_error_mask);
601}
602
603/************************************************
604 * i7300 Functions related to memory enumberation
605 ************************************************/
606
607/**
608 * decode_mtr() - Decodes the MTR descriptor, filling the edac structs
609 * @pvt: pointer to the private data struct used by i7300 driver
610 * @slot: DIMM slot (0 to 7)
611 * @ch: Channel number within the branch (0 or 1)
612 * @branch: Branch number (0 or 1)
613 * @dinfo: Pointer to DIMM info where dimm size is stored
614 * @p_csrow: Pointer to the struct csrow_info that corresponds to that element
615 */
616static int decode_mtr(struct i7300_pvt *pvt,
617 int slot, int ch, int branch,
618 struct i7300_dimm_info *dinfo,
619 struct csrow_info *p_csrow,
620 u32 *nr_pages)
621{
622 int mtr, ans, addrBits, channel;
623
624 channel = to_channel(ch, branch);
625
626 mtr = pvt->mtr[slot][branch];
627 ans = MTR_DIMMS_PRESENT(mtr) ? 1 : 0;
628
629 debugf2("\tMTR%d CH%d: DIMMs are %s (mtr)\n",
630 slot, channel,
631 ans ? "Present" : "NOT Present");
632
633 /* Determine if there is a DIMM present in this DIMM slot */
634 if (!ans)
635 return 0;
636
637 /* Start with the number of bits for a Bank
638 * on the DRAM */
639 addrBits = MTR_DRAM_BANKS_ADDR_BITS;
640 /* Add thenumber of ROW bits */
641 addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
642 /* add the number of COLUMN bits */
643 addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
644 /* add the number of RANK bits */
645 addrBits += MTR_DIMM_RANKS(mtr);
646
647 addrBits += 6; /* add 64 bits per DIMM */
648 addrBits -= 20; /* divide by 2^^20 */
649 addrBits -= 3; /* 8 bits per bytes */
650
651 dinfo->megabytes = 1 << addrBits;
652 *nr_pages = dinfo->megabytes << 8;
653
654 debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
655
656 debugf2("\t\tELECTRICAL THROTTLING is %s\n",
657 MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
658
659 debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
660 debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANKS(mtr) ? "double" : "single");
661 debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]);
662 debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);
663 debugf2("\t\tSIZE: %d MB\n", dinfo->megabytes);
664
665 p_csrow->grain = 8;
666 p_csrow->mtype = MEM_FB_DDR2;
667 p_csrow->csrow_idx = slot;
668 p_csrow->page_mask = 0;
669
670 /*
671 * The type of error detection actually depends of the
672 * mode of operation. When it is just one single memory chip, at
673 * socket 0, channel 0, it uses 8-byte-over-32-byte SECDED+ code.
674 * In normal or mirrored mode, it uses Lockstep mode,
675 * with the possibility of using an extended algorithm for x8 memories
676 * See datasheet Sections 7.3.6 to 7.3.8
677 */
678
679 if (IS_SINGLE_MODE(pvt->mc_settings_a)) {
680 p_csrow->edac_mode = EDAC_SECDED;
681 debugf2("\t\tECC code is 8-byte-over-32-byte SECDED+ code\n");
682 } else {
683 debugf2("\t\tECC code is on Lockstep mode\n");
684 if (MTR_DRAM_WIDTH(mtr) == 8)
685 p_csrow->edac_mode = EDAC_S8ECD8ED;
686 else
687 p_csrow->edac_mode = EDAC_S4ECD4ED;
688 }
689
690 /* ask what device type on this row */
691 if (MTR_DRAM_WIDTH(mtr) == 8) {
692 debugf2("\t\tScrub algorithm for x8 is on %s mode\n",
693 IS_SCRBALGO_ENHANCED(pvt->mc_settings) ?
694 "enhanced" : "normal");
695
696 p_csrow->dtype = DEV_X8;
697 } else
698 p_csrow->dtype = DEV_X4;
699
700 return mtr;
701}
702
703/**
704 * print_dimm_size() - Prints dump of the memory organization
705 * @pvt: pointer to the private data struct used by i7300 driver
706 *
707 * Useful for debug. If debug is disabled, this routine do nothing
708 */
709static void print_dimm_size(struct i7300_pvt *pvt)
710{
711#ifdef CONFIG_EDAC_DEBUG
712 struct i7300_dimm_info *dinfo;
713 char *p;
714 int space, n;
715 int channel, slot;
716
717 space = PAGE_SIZE;
718 p = pvt->tmp_prt_buffer;
719
720 n = snprintf(p, space, " ");
721 p += n;
722 space -= n;
723 for (channel = 0; channel < MAX_CHANNELS; channel++) {
724 n = snprintf(p, space, "channel %d | ", channel);
725 p += n;
726 space -= n;
727 }
728 debugf2("%s\n", pvt->tmp_prt_buffer);
729 p = pvt->tmp_prt_buffer;
730 space = PAGE_SIZE;
731 n = snprintf(p, space, "-------------------------------"
732 "------------------------------");
733 p += n;
734 space -= n;
735 debugf2("%s\n", pvt->tmp_prt_buffer);
736 p = pvt->tmp_prt_buffer;
737 space = PAGE_SIZE;
738
739 for (slot = 0; slot < MAX_SLOTS; slot++) {
740 n = snprintf(p, space, "csrow/SLOT %d ", slot);
741 p += n;
742 space -= n;
743
744 for (channel = 0; channel < MAX_CHANNELS; channel++) {
745 dinfo = &pvt->dimm_info[slot][channel];
746 n = snprintf(p, space, "%4d MB | ", dinfo->megabytes);
747 p += n;
748 space -= n;
749 }
750
751 debugf2("%s\n", pvt->tmp_prt_buffer);
752 p = pvt->tmp_prt_buffer;
753 space = PAGE_SIZE;
754 }
755
756 n = snprintf(p, space, "-------------------------------"
757 "------------------------------");
758 p += n;
759 space -= n;
760 debugf2("%s\n", pvt->tmp_prt_buffer);
761 p = pvt->tmp_prt_buffer;
762 space = PAGE_SIZE;
763#endif
764}
765
766/**
767 * i7300_init_csrows() - Initialize the 'csrows' table within
768 * the mci control structure with the
769 * addressing of memory.
770 * @mci: struct mem_ctl_info pointer
771 */
772static int i7300_init_csrows(struct mem_ctl_info *mci)
773{
774 struct i7300_pvt *pvt;
775 struct i7300_dimm_info *dinfo;
776 struct csrow_info *p_csrow;
777 int rc = -ENODEV;
778 int mtr;
779 int ch, branch, slot, channel;
780 u32 last_page = 0, nr_pages;
781
782 pvt = mci->pvt_info;
783
784 debugf2("Memory Technology Registers:\n");
785
786 /* Get the AMB present registers for the four channels */
787 for (branch = 0; branch < MAX_BRANCHES; branch++) {
788 /* Read and dump branch 0's MTRs */
789 channel = to_channel(0, branch);
790 pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch],
791 AMBPRESENT_0,
792 &pvt->ambpresent[channel]);
793 debugf2("\t\tAMB-present CH%d = 0x%x:\n",
794 channel, pvt->ambpresent[channel]);
795
796 channel = to_channel(1, branch);
797 pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch],
798 AMBPRESENT_1,
799 &pvt->ambpresent[channel]);
800 debugf2("\t\tAMB-present CH%d = 0x%x:\n",
801 channel, pvt->ambpresent[channel]);
802 }
803
804 /* Get the set of MTR[0-7] regs by each branch */
805 for (slot = 0; slot < MAX_SLOTS; slot++) {
806 int where = mtr_regs[slot];
807 for (branch = 0; branch < MAX_BRANCHES; branch++) {
808 pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch],
809 where,
810 &pvt->mtr[slot][branch]);
811 for (ch = 0; ch < MAX_BRANCHES; ch++) {
812 int channel = to_channel(ch, branch);
813
814 dinfo = &pvt->dimm_info[slot][channel];
815 p_csrow = &mci->csrows[slot];
816
817 mtr = decode_mtr(pvt, slot, ch, branch,
818 dinfo, p_csrow, &nr_pages);
819 /* if no DIMMS on this row, continue */
820 if (!MTR_DIMMS_PRESENT(mtr))
821 continue;
822
823 /* Update per_csrow memory count */
824 p_csrow->nr_pages += nr_pages;
825 p_csrow->first_page = last_page;
826 last_page += nr_pages;
827 p_csrow->last_page = last_page;
828
829 rc = 0;
830 }
831 }
832 }
833
834 return rc;
835}
836
837/**
838 * decode_mir() - Decodes Memory Interleave Register (MIR) info
839 * @int mir_no: number of the MIR register to decode
840 * @mir: array with the MIR data cached on the driver
841 */
842static void decode_mir(int mir_no, u16 mir[MAX_MIR])
843{
844 if (mir[mir_no] & 3)
845 debugf2("MIR%d: limit= 0x%x Branch(es) that participate:"
846 " %s %s\n",
847 mir_no,
848 (mir[mir_no] >> 4) & 0xfff,
849 (mir[mir_no] & 1) ? "B0" : "",
850 (mir[mir_no] & 2) ? "B1" : "");
851}
852
853/**
854 * i7300_get_mc_regs() - Get the contents of the MC enumeration registers
855 * @mci: struct mem_ctl_info pointer
856 *
857 * Data read is cached internally for its usage when needed
858 */
859static int i7300_get_mc_regs(struct mem_ctl_info *mci)
860{
861 struct i7300_pvt *pvt;
862 u32 actual_tolm;
863 int i, rc;
864
865 pvt = mci->pvt_info;
866
867 pci_read_config_dword(pvt->pci_dev_16_0_fsb_ctlr, AMBASE,
868 (u32 *) &pvt->ambase);
869
870 debugf2("AMBASE= 0x%lx\n", (long unsigned int)pvt->ambase);
871
872 /* Get the Branch Map regs */
873 pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, TOLM, &pvt->tolm);
874 pvt->tolm >>= 12;
875 debugf2("TOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm,
876 pvt->tolm);
877
878 actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28));
879 debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n",
880 actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28);
881
882 /* Get memory controller settings */
883 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, MC_SETTINGS,
884 &pvt->mc_settings);
885 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, MC_SETTINGS_A,
886 &pvt->mc_settings_a);
887
888 if (IS_SINGLE_MODE(pvt->mc_settings_a))
889 debugf0("Memory controller operating on single mode\n");
890 else
891 debugf0("Memory controller operating on %s mode\n",
892 IS_MIRRORED(pvt->mc_settings) ? "mirrored" : "non-mirrored");
893
894 debugf0("Error detection is %s\n",
895 IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled");
896 debugf0("Retry is %s\n",
897 IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled");
898
899 /* Get Memory Interleave Range registers */
900 pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0,
901 &pvt->mir[0]);
902 pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR1,
903 &pvt->mir[1]);
904 pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR2,
905 &pvt->mir[2]);
906
907 /* Decode the MIR regs */
908 for (i = 0; i < MAX_MIR; i++)
909 decode_mir(i, pvt->mir);
910
911 rc = i7300_init_csrows(mci);
912 if (rc < 0)
913 return rc;
914
915 /* Go and determine the size of each DIMM and place in an
916 * orderly matrix */
917 print_dimm_size(pvt);
918
919 return 0;
920}
921
922/*************************************************
923 * i7300 Functions related to device probe/release
924 *************************************************/
925
926/**
927 * i7300_put_devices() - Release the PCI devices
928 * @mci: struct mem_ctl_info pointer
929 */
930static void i7300_put_devices(struct mem_ctl_info *mci)
931{
932 struct i7300_pvt *pvt;
933 int branch;
934
935 pvt = mci->pvt_info;
936
937 /* Decrement usage count for devices */
938 for (branch = 0; branch < MAX_CH_PER_BRANCH; branch++)
939 pci_dev_put(pvt->pci_dev_2x_0_fbd_branch[branch]);
940 pci_dev_put(pvt->pci_dev_16_2_fsb_err_regs);
941 pci_dev_put(pvt->pci_dev_16_1_fsb_addr_map);
942}
943
944/**
945 * i7300_get_devices() - Find and perform 'get' operation on the MCH's
946 * device/functions we want to reference for this driver
947 * @mci: struct mem_ctl_info pointer
948 *
949 * Access and prepare the several devices for usage:
950 * I7300 devices used by this driver:
951 * Device 16, functions 0,1 and 2: PCI_DEVICE_ID_INTEL_I7300_MCH_ERR
952 * Device 21 function 0: PCI_DEVICE_ID_INTEL_I7300_MCH_FB0
953 * Device 22 function 0: PCI_DEVICE_ID_INTEL_I7300_MCH_FB1
954 */
955static int __devinit i7300_get_devices(struct mem_ctl_info *mci)
956{
957 struct i7300_pvt *pvt;
958 struct pci_dev *pdev;
959
960 pvt = mci->pvt_info;
961
962 /* Attempt to 'get' the MCH register we want */
963 pdev = NULL;
964 while (!pvt->pci_dev_16_1_fsb_addr_map ||
965 !pvt->pci_dev_16_2_fsb_err_regs) {
966 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
967 PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev);
968 if (!pdev) {
969 /* End of list, leave */
970 i7300_printk(KERN_ERR,
971 "'system address,Process Bus' "
972 "device not found:"
973 "vendor 0x%x device 0x%x ERR funcs "
974 "(broken BIOS?)\n",
975 PCI_VENDOR_ID_INTEL,
976 PCI_DEVICE_ID_INTEL_I7300_MCH_ERR);
977 goto error;
978 }
979
980 /* Store device 16 funcs 1 and 2 */
981 switch (PCI_FUNC(pdev->devfn)) {
982 case 1:
983 pvt->pci_dev_16_1_fsb_addr_map = pdev;
984 break;
985 case 2:
986 pvt->pci_dev_16_2_fsb_err_regs = pdev;
987 break;
988 }
989 }
990
991 debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n",
992 pci_name(pvt->pci_dev_16_0_fsb_ctlr),
993 pvt->pci_dev_16_0_fsb_ctlr->vendor,
994 pvt->pci_dev_16_0_fsb_ctlr->device);
995 debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n",
996 pci_name(pvt->pci_dev_16_1_fsb_addr_map),
997 pvt->pci_dev_16_1_fsb_addr_map->vendor,
998 pvt->pci_dev_16_1_fsb_addr_map->device);
999 debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n",
1000 pci_name(pvt->pci_dev_16_2_fsb_err_regs),
1001 pvt->pci_dev_16_2_fsb_err_regs->vendor,
1002 pvt->pci_dev_16_2_fsb_err_regs->device);
1003
1004 pvt->pci_dev_2x_0_fbd_branch[0] = pci_get_device(PCI_VENDOR_ID_INTEL,
1005 PCI_DEVICE_ID_INTEL_I7300_MCH_FB0,
1006 NULL);
1007 if (!pvt->pci_dev_2x_0_fbd_branch[0]) {
1008 i7300_printk(KERN_ERR,
1009 "MC: 'BRANCH 0' device not found:"
1010 "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",
1011 PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_FB0);
1012 goto error;
1013 }
1014
1015 pvt->pci_dev_2x_0_fbd_branch[1] = pci_get_device(PCI_VENDOR_ID_INTEL,
1016 PCI_DEVICE_ID_INTEL_I7300_MCH_FB1,
1017 NULL);
1018 if (!pvt->pci_dev_2x_0_fbd_branch[1]) {
1019 i7300_printk(KERN_ERR,
1020 "MC: 'BRANCH 1' device not found:"
1021 "vendor 0x%x device 0x%x Func 0 "
1022 "(broken BIOS?)\n",
1023 PCI_VENDOR_ID_INTEL,
1024 PCI_DEVICE_ID_INTEL_I7300_MCH_FB1);
1025 goto error;
1026 }
1027
1028 return 0;
1029
1030error:
1031 i7300_put_devices(mci);
1032 return -ENODEV;
1033}
1034
1035/**
1036 * i7300_init_one() - Probe for one instance of the device
1037 * @pdev: struct pci_dev pointer
1038 * @id: struct pci_device_id pointer - currently unused
1039 */
1040static int __devinit i7300_init_one(struct pci_dev *pdev,
1041 const struct pci_device_id *id)
1042{
1043 struct mem_ctl_info *mci;
1044 struct i7300_pvt *pvt;
1045 int num_channels;
1046 int num_dimms_per_channel;
1047 int num_csrows;
1048 int rc;
1049
1050 /* wake up device */
1051 rc = pci_enable_device(pdev);
1052 if (rc == -EIO)
1053 return rc;
1054
1055 debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n",
1056 __func__,
1057 pdev->bus->number,
1058 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1059
1060 /* We only are looking for func 0 of the set */
1061 if (PCI_FUNC(pdev->devfn) != 0)
1062 return -ENODEV;
1063
1064 /* As we don't have a motherboard identification routine to determine
1065 * actual number of slots/dimms per channel, we thus utilize the
1066 * resource as specified by the chipset. Thus, we might have
1067 * have more DIMMs per channel than actually on the mobo, but this
1068 * allows the driver to support upto the chipset max, without
1069 * some fancy mobo determination.
1070 */
1071 num_dimms_per_channel = MAX_SLOTS;
1072 num_channels = MAX_CHANNELS;
1073 num_csrows = MAX_SLOTS * MAX_CHANNELS;
1074
1075 debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n",
1076 __func__, num_channels, num_dimms_per_channel, num_csrows);
1077
1078 /* allocate a new MC control structure */
1079 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
1080
1081 if (mci == NULL)
1082 return -ENOMEM;
1083
1084 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1085
1086 mci->dev = &pdev->dev; /* record ptr to the generic device */
1087
1088 pvt = mci->pvt_info;
1089 pvt->pci_dev_16_0_fsb_ctlr = pdev; /* Record this device in our private */
1090
1091 pvt->tmp_prt_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
1092 if (!pvt->tmp_prt_buffer) {
1093 edac_mc_free(mci);
1094 return -ENOMEM;
1095 }
1096
1097 /* 'get' the pci devices we want to reserve for our use */
1098 if (i7300_get_devices(mci))
1099 goto fail0;
1100
1101 mci->mc_idx = 0;
1102 mci->mtype_cap = MEM_FLAG_FB_DDR2;
1103 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1104 mci->edac_cap = EDAC_FLAG_NONE;
1105 mci->mod_name = "i7300_edac.c";
1106 mci->mod_ver = I7300_REVISION;
1107 mci->ctl_name = i7300_devs[0].ctl_name;
1108 mci->dev_name = pci_name(pdev);
1109 mci->ctl_page_to_phys = NULL;
1110
1111 /* Set the function pointer to an actual operation function */
1112 mci->edac_check = i7300_check_error;
1113
1114 /* initialize the MC control structure 'csrows' table
1115 * with the mapping and control information */
1116 if (i7300_get_mc_regs(mci)) {
1117 debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n"
1118 " because i7300_init_csrows() returned nonzero "
1119 "value\n");
1120 mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */
1121 } else {
1122 debugf1("MC: Enable error reporting now\n");
1123 i7300_enable_error_reporting(mci);
1124 }
1125
1126 /* add this new MC control structure to EDAC's list of MCs */
1127 if (edac_mc_add_mc(mci)) {
1128 debugf0("MC: " __FILE__
1129 ": %s(): failed edac_mc_add_mc()\n", __func__);
1130 /* FIXME: perhaps some code should go here that disables error
1131 * reporting if we just enabled it
1132 */
1133 goto fail1;
1134 }
1135
1136 i7300_clear_error(mci);
1137
1138 /* allocating generic PCI control info */
1139 i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
1140 if (!i7300_pci) {
1141 printk(KERN_WARNING
1142 "%s(): Unable to create PCI control\n",
1143 __func__);
1144 printk(KERN_WARNING
1145 "%s(): PCI error report via EDAC not setup\n",
1146 __func__);
1147 }
1148
1149 return 0;
1150
1151 /* Error exit unwinding stack */
1152fail1:
1153
1154 i7300_put_devices(mci);
1155
1156fail0:
1157 kfree(pvt->tmp_prt_buffer);
1158 edac_mc_free(mci);
1159 return -ENODEV;
1160}
1161
1162/**
1163 * i7300_remove_one() - Remove the driver
1164 * @pdev: struct pci_dev pointer
1165 */
1166static void __devexit i7300_remove_one(struct pci_dev *pdev)
1167{
1168 struct mem_ctl_info *mci;
1169 char *tmp;
1170
1171 debugf0(__FILE__ ": %s()\n", __func__);
1172
1173 if (i7300_pci)
1174 edac_pci_release_generic_ctl(i7300_pci);
1175
1176 mci = edac_mc_del_mc(&pdev->dev);
1177 if (!mci)
1178 return;
1179
1180 tmp = ((struct i7300_pvt *)mci->pvt_info)->tmp_prt_buffer;
1181
1182 /* retrieve references to resources, and free those resources */
1183 i7300_put_devices(mci);
1184
1185 kfree(tmp);
1186 edac_mc_free(mci);
1187}
1188
1189/*
1190 * pci_device_id: table for which devices we are looking for
1191 *
1192 * Has only 8086:360c PCI ID
1193 */
1194static const struct pci_device_id i7300_pci_tbl[] __devinitdata = {
1195 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR)},
1196 {0,} /* 0 terminated list. */
1197};
1198
1199MODULE_DEVICE_TABLE(pci, i7300_pci_tbl);
1200
1201/*
1202 * i7300_driver: pci_driver structure for this module
1203 */
1204static struct pci_driver i7300_driver = {
1205 .name = "i7300_edac",
1206 .probe = i7300_init_one,
1207 .remove = __devexit_p(i7300_remove_one),
1208 .id_table = i7300_pci_tbl,
1209};
1210
1211/**
1212 * i7300_init() - Registers the driver
1213 */
1214static int __init i7300_init(void)
1215{
1216 int pci_rc;
1217
1218 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1219
1220 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1221 opstate_init();
1222
1223 pci_rc = pci_register_driver(&i7300_driver);
1224
1225 return (pci_rc < 0) ? pci_rc : 0;
1226}
1227
1228/**
1229 * i7300_init() - Unregisters the driver
1230 */
1231static void __exit i7300_exit(void)
1232{
1233 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1234 pci_unregister_driver(&i7300_driver);
1235}
1236
1237module_init(i7300_init);
1238module_exit(i7300_exit);
1239
1240MODULE_LICENSE("GPL");
1241MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1242MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1243MODULE_DESCRIPTION("MC Driver for Intel I7300 memory controllers - "
1244 I7300_REVISION);
1245
1246module_param(edac_op_state, int, 0444);
1247MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c
index a2fa1feed724..678405ab04e4 100644
--- a/drivers/edac/i82443bxgx_edac.c
+++ b/drivers/edac/i82443bxgx_edac.c
@@ -12,7 +12,7 @@
12 * 440GX fix by Jason Uhlenkott <juhlenko@akamai.com>. 12 * 440GX fix by Jason Uhlenkott <juhlenko@akamai.com>.
13 * 13 *
14 * Written with reference to 82443BX Host Bridge Datasheet: 14 * Written with reference to 82443BX Host Bridge Datasheet:
15 * http://www.intel.com/design/chipsets/440/documentation.htm 15 * http://download.intel.com/design/chipsets/datashts/29063301.pdf
16 * references to this document given in []. 16 * references to this document given in [].
17 * 17 *
18 * This module doesn't support the 440LX, but it may be possible to 18 * This module doesn't support the 440LX, but it may be possible to
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
new file mode 100644
index 000000000000..c0181093b490
--- /dev/null
+++ b/drivers/edac/mce_amd.c
@@ -0,0 +1,680 @@
1#include <linux/module.h>
2#include <linux/slab.h>
3
4#include "mce_amd.h"
5
6static struct amd_decoder_ops *fam_ops;
7
8static u8 nb_err_cpumask = 0xf;
9
10static bool report_gart_errors;
11static void (*nb_bus_decoder)(int node_id, struct mce *m, u32 nbcfg);
12
13void amd_report_gart_errors(bool v)
14{
15 report_gart_errors = v;
16}
17EXPORT_SYMBOL_GPL(amd_report_gart_errors);
18
19void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32))
20{
21 nb_bus_decoder = f;
22}
23EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
24
25void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32))
26{
27 if (nb_bus_decoder) {
28 WARN_ON(nb_bus_decoder != f);
29
30 nb_bus_decoder = NULL;
31 }
32}
33EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
34
35/*
36 * string representation for the different MCA reported error types, see F3x48
37 * or MSR0000_0411.
38 */
39
40/* transaction type */
41const char *tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
42EXPORT_SYMBOL_GPL(tt_msgs);
43
44/* cache level */
45const char *ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
46EXPORT_SYMBOL_GPL(ll_msgs);
47
48/* memory transaction type */
49const char *rrrr_msgs[] = {
50 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
51};
52EXPORT_SYMBOL_GPL(rrrr_msgs);
53
54/* participating processor */
55const char *pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
56EXPORT_SYMBOL_GPL(pp_msgs);
57
58/* request timeout */
59const char *to_msgs[] = { "no timeout", "timed out" };
60EXPORT_SYMBOL_GPL(to_msgs);
61
62/* memory or i/o */
63const char *ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
64EXPORT_SYMBOL_GPL(ii_msgs);
65
66static const char *f10h_nb_mce_desc[] = {
67 "HT link data error",
68 "Protocol error (link, L3, probe filter, etc.)",
69 "Parity error in NB-internal arrays",
70 "Link Retry due to IO link transmission error",
71 "L3 ECC data cache error",
72 "ECC error in L3 cache tag",
73 "L3 LRU parity bits error",
74 "ECC Error in the Probe Filter directory"
75};
76
77static bool f12h_dc_mce(u16 ec)
78{
79 bool ret = false;
80
81 if (MEM_ERROR(ec)) {
82 u8 ll = ec & 0x3;
83 ret = true;
84
85 if (ll == LL_L2)
86 pr_cont("during L1 linefill from L2.\n");
87 else if (ll == LL_L1)
88 pr_cont("Data/Tag %s error.\n", RRRR_MSG(ec));
89 else
90 ret = false;
91 }
92 return ret;
93}
94
95static bool f10h_dc_mce(u16 ec)
96{
97 u8 r4 = (ec >> 4) & 0xf;
98 u8 ll = ec & 0x3;
99
100 if (r4 == R4_GEN && ll == LL_L1) {
101 pr_cont("during data scrub.\n");
102 return true;
103 }
104 return f12h_dc_mce(ec);
105}
106
107static bool k8_dc_mce(u16 ec)
108{
109 if (BUS_ERROR(ec)) {
110 pr_cont("during system linefill.\n");
111 return true;
112 }
113
114 return f10h_dc_mce(ec);
115}
116
117static bool f14h_dc_mce(u16 ec)
118{
119 u8 r4 = (ec >> 4) & 0xf;
120 u8 ll = ec & 0x3;
121 u8 tt = (ec >> 2) & 0x3;
122 u8 ii = tt;
123 bool ret = true;
124
125 if (MEM_ERROR(ec)) {
126
127 if (tt != TT_DATA || ll != LL_L1)
128 return false;
129
130 switch (r4) {
131 case R4_DRD:
132 case R4_DWR:
133 pr_cont("Data/Tag parity error due to %s.\n",
134 (r4 == R4_DRD ? "load/hw prf" : "store"));
135 break;
136 case R4_EVICT:
137 pr_cont("Copyback parity error on a tag miss.\n");
138 break;
139 case R4_SNOOP:
140 pr_cont("Tag parity error during snoop.\n");
141 break;
142 default:
143 ret = false;
144 }
145 } else if (BUS_ERROR(ec)) {
146
147 if ((ii != II_MEM && ii != II_IO) || ll != LL_LG)
148 return false;
149
150 pr_cont("System read data error on a ");
151
152 switch (r4) {
153 case R4_RD:
154 pr_cont("TLB reload.\n");
155 break;
156 case R4_DWR:
157 pr_cont("store.\n");
158 break;
159 case R4_DRD:
160 pr_cont("load.\n");
161 break;
162 default:
163 ret = false;
164 }
165 } else {
166 ret = false;
167 }
168
169 return ret;
170}
171
172static void amd_decode_dc_mce(struct mce *m)
173{
174 u16 ec = m->status & 0xffff;
175 u8 xec = (m->status >> 16) & 0xf;
176
177 pr_emerg(HW_ERR "Data Cache Error: ");
178
179 /* TLB error signatures are the same across families */
180 if (TLB_ERROR(ec)) {
181 u8 tt = (ec >> 2) & 0x3;
182
183 if (tt == TT_DATA) {
184 pr_cont("%s TLB %s.\n", LL_MSG(ec),
185 (xec ? "multimatch" : "parity error"));
186 return;
187 }
188 else
189 goto wrong_dc_mce;
190 }
191
192 if (!fam_ops->dc_mce(ec))
193 goto wrong_dc_mce;
194
195 return;
196
197wrong_dc_mce:
198 pr_emerg(HW_ERR "Corrupted DC MCE info?\n");
199}
200
201static bool k8_ic_mce(u16 ec)
202{
203 u8 ll = ec & 0x3;
204 u8 r4 = (ec >> 4) & 0xf;
205 bool ret = true;
206
207 if (!MEM_ERROR(ec))
208 return false;
209
210 if (ll == 0x2)
211 pr_cont("during a linefill from L2.\n");
212 else if (ll == 0x1) {
213 switch (r4) {
214 case R4_IRD:
215 pr_cont("Parity error during data load.\n");
216 break;
217
218 case R4_EVICT:
219 pr_cont("Copyback Parity/Victim error.\n");
220 break;
221
222 case R4_SNOOP:
223 pr_cont("Tag Snoop error.\n");
224 break;
225
226 default:
227 ret = false;
228 break;
229 }
230 } else
231 ret = false;
232
233 return ret;
234}
235
236static bool f14h_ic_mce(u16 ec)
237{
238 u8 ll = ec & 0x3;
239 u8 tt = (ec >> 2) & 0x3;
240 u8 r4 = (ec >> 4) & 0xf;
241 bool ret = true;
242
243 if (MEM_ERROR(ec)) {
244 if (tt != 0 || ll != 1)
245 ret = false;
246
247 if (r4 == R4_IRD)
248 pr_cont("Data/tag array parity error for a tag hit.\n");
249 else if (r4 == R4_SNOOP)
250 pr_cont("Tag error during snoop/victimization.\n");
251 else
252 ret = false;
253 }
254 return ret;
255}
256
257static void amd_decode_ic_mce(struct mce *m)
258{
259 u16 ec = m->status & 0xffff;
260 u8 xec = (m->status >> 16) & 0xf;
261
262 pr_emerg(HW_ERR "Instruction Cache Error: ");
263
264 if (TLB_ERROR(ec))
265 pr_cont("%s TLB %s.\n", LL_MSG(ec),
266 (xec ? "multimatch" : "parity error"));
267 else if (BUS_ERROR(ec)) {
268 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
269
270 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
271 } else if (fam_ops->ic_mce(ec))
272 ;
273 else
274 pr_emerg(HW_ERR "Corrupted IC MCE info?\n");
275}
276
277static void amd_decode_bu_mce(struct mce *m)
278{
279 u32 ec = m->status & 0xffff;
280 u32 xec = (m->status >> 16) & 0xf;
281
282 pr_emerg(HW_ERR "Bus Unit Error");
283
284 if (xec == 0x1)
285 pr_cont(" in the write data buffers.\n");
286 else if (xec == 0x3)
287 pr_cont(" in the victim data buffers.\n");
288 else if (xec == 0x2 && MEM_ERROR(ec))
289 pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec));
290 else if (xec == 0x0) {
291 if (TLB_ERROR(ec))
292 pr_cont(": %s error in a Page Descriptor Cache or "
293 "Guest TLB.\n", TT_MSG(ec));
294 else if (BUS_ERROR(ec))
295 pr_cont(": %s/ECC error in data read from NB: %s.\n",
296 RRRR_MSG(ec), PP_MSG(ec));
297 else if (MEM_ERROR(ec)) {
298 u8 rrrr = (ec >> 4) & 0xf;
299
300 if (rrrr >= 0x7)
301 pr_cont(": %s error during data copyback.\n",
302 RRRR_MSG(ec));
303 else if (rrrr <= 0x1)
304 pr_cont(": %s parity/ECC error during data "
305 "access from L2.\n", RRRR_MSG(ec));
306 else
307 goto wrong_bu_mce;
308 } else
309 goto wrong_bu_mce;
310 } else
311 goto wrong_bu_mce;
312
313 return;
314
315wrong_bu_mce:
316 pr_emerg(HW_ERR "Corrupted BU MCE info?\n");
317}
318
319static void amd_decode_ls_mce(struct mce *m)
320{
321 u16 ec = m->status & 0xffff;
322 u8 xec = (m->status >> 16) & 0xf;
323
324 if (boot_cpu_data.x86 == 0x14) {
325 pr_emerg("You shouldn't be seeing an LS MCE on this cpu family,"
326 " please report on LKML.\n");
327 return;
328 }
329
330 pr_emerg(HW_ERR "Load Store Error");
331
332 if (xec == 0x0) {
333 u8 r4 = (ec >> 4) & 0xf;
334
335 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
336 goto wrong_ls_mce;
337
338 pr_cont(" during %s.\n", RRRR_MSG(ec));
339 } else
340 goto wrong_ls_mce;
341
342 return;
343
344wrong_ls_mce:
345 pr_emerg(HW_ERR "Corrupted LS MCE info?\n");
346}
347
348static bool k8_nb_mce(u16 ec, u8 xec)
349{
350 bool ret = true;
351
352 switch (xec) {
353 case 0x1:
354 pr_cont("CRC error detected on HT link.\n");
355 break;
356
357 case 0x5:
358 pr_cont("Invalid GART PTE entry during GART table walk.\n");
359 break;
360
361 case 0x6:
362 pr_cont("Unsupported atomic RMW received from an IO link.\n");
363 break;
364
365 case 0x0:
366 case 0x8:
367 if (boot_cpu_data.x86 == 0x11)
368 return false;
369
370 pr_cont("DRAM ECC error detected on the NB.\n");
371 break;
372
373 case 0xd:
374 pr_cont("Parity error on the DRAM addr/ctl signals.\n");
375 break;
376
377 default:
378 ret = false;
379 break;
380 }
381
382 return ret;
383}
384
385static bool f10h_nb_mce(u16 ec, u8 xec)
386{
387 bool ret = true;
388 u8 offset = 0;
389
390 if (k8_nb_mce(ec, xec))
391 return true;
392
393 switch(xec) {
394 case 0xa ... 0xc:
395 offset = 10;
396 break;
397
398 case 0xe:
399 offset = 11;
400 break;
401
402 case 0xf:
403 if (TLB_ERROR(ec))
404 pr_cont("GART Table Walk data error.\n");
405 else if (BUS_ERROR(ec))
406 pr_cont("DMA Exclusion Vector Table Walk error.\n");
407 else
408 ret = false;
409
410 goto out;
411 break;
412
413 case 0x1c ... 0x1f:
414 offset = 24;
415 break;
416
417 default:
418 ret = false;
419
420 goto out;
421 break;
422 }
423
424 pr_cont("%s.\n", f10h_nb_mce_desc[xec - offset]);
425
426out:
427 return ret;
428}
429
430static bool nb_noop_mce(u16 ec, u8 xec)
431{
432 return false;
433}
434
435void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg)
436{
437 u8 xec = (m->status >> 16) & 0x1f;
438 u16 ec = m->status & 0xffff;
439 u32 nbsh = (u32)(m->status >> 32);
440
441 pr_emerg(HW_ERR "Northbridge Error, node %d: ", node_id);
442
443 /*
444 * F10h, revD can disable ErrCpu[3:0] so check that first and also the
445 * value encoding has changed so interpret those differently
446 */
447 if ((boot_cpu_data.x86 == 0x10) &&
448 (boot_cpu_data.x86_model > 7)) {
449 if (nbsh & K8_NBSH_ERR_CPU_VAL)
450 pr_cont(", core: %u", (u8)(nbsh & nb_err_cpumask));
451 } else {
452 u8 assoc_cpus = nbsh & nb_err_cpumask;
453
454 if (assoc_cpus > 0)
455 pr_cont(", core: %d", fls(assoc_cpus) - 1);
456 }
457
458 switch (xec) {
459 case 0x2:
460 pr_cont("Sync error (sync packets on HT link detected).\n");
461 return;
462
463 case 0x3:
464 pr_cont("HT Master abort.\n");
465 return;
466
467 case 0x4:
468 pr_cont("HT Target abort.\n");
469 return;
470
471 case 0x7:
472 pr_cont("NB Watchdog timeout.\n");
473 return;
474
475 case 0x9:
476 pr_cont("SVM DMA Exclusion Vector error.\n");
477 return;
478
479 default:
480 break;
481 }
482
483 if (!fam_ops->nb_mce(ec, xec))
484 goto wrong_nb_mce;
485
486 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10)
487 if ((xec == 0x8 || xec == 0x0) && nb_bus_decoder)
488 nb_bus_decoder(node_id, m, nbcfg);
489
490 return;
491
492wrong_nb_mce:
493 pr_emerg(HW_ERR "Corrupted NB MCE info?\n");
494}
495EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
496
497static void amd_decode_fr_mce(struct mce *m)
498{
499 if (boot_cpu_data.x86 == 0xf ||
500 boot_cpu_data.x86 == 0x11)
501 goto wrong_fr_mce;
502
503 /* we have only one error signature so match all fields at once. */
504 if ((m->status & 0xffff) == 0x0f0f) {
505 pr_emerg(HW_ERR "FR Error: CPU Watchdog timer expire.\n");
506 return;
507 }
508
509wrong_fr_mce:
510 pr_emerg(HW_ERR "Corrupted FR MCE info?\n");
511}
512
513static inline void amd_decode_err_code(u16 ec)
514{
515 if (TLB_ERROR(ec)) {
516 pr_emerg(HW_ERR "Transaction: %s, Cache Level: %s\n",
517 TT_MSG(ec), LL_MSG(ec));
518 } else if (MEM_ERROR(ec)) {
519 pr_emerg(HW_ERR "Transaction: %s, Type: %s, Cache Level: %s\n",
520 RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec));
521 } else if (BUS_ERROR(ec)) {
522 pr_emerg(HW_ERR "Transaction: %s (%s), %s, Cache Level: %s, "
523 "Participating Processor: %s\n",
524 RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec),
525 PP_MSG(ec));
526 } else
527 pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec);
528}
529
530/*
531 * Filter out unwanted MCE signatures here.
532 */
533static bool amd_filter_mce(struct mce *m)
534{
535 u8 xec = (m->status >> 16) & 0x1f;
536
537 /*
538 * NB GART TLB error reporting is disabled by default.
539 */
540 if (m->bank == 4 && xec == 0x5 && !report_gart_errors)
541 return true;
542
543 return false;
544}
545
546int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
547{
548 struct mce *m = (struct mce *)data;
549 int node, ecc;
550
551 if (amd_filter_mce(m))
552 return NOTIFY_STOP;
553
554 pr_emerg(HW_ERR "MC%d_STATUS: ", m->bank);
555
556 pr_cont("%sorrected error, other errors lost: %s, "
557 "CPU context corrupt: %s",
558 ((m->status & MCI_STATUS_UC) ? "Unc" : "C"),
559 ((m->status & MCI_STATUS_OVER) ? "yes" : "no"),
560 ((m->status & MCI_STATUS_PCC) ? "yes" : "no"));
561
562 /* do the two bits[14:13] together */
563 ecc = (m->status >> 45) & 0x3;
564 if (ecc)
565 pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U"));
566
567 pr_cont("\n");
568
569 switch (m->bank) {
570 case 0:
571 amd_decode_dc_mce(m);
572 break;
573
574 case 1:
575 amd_decode_ic_mce(m);
576 break;
577
578 case 2:
579 amd_decode_bu_mce(m);
580 break;
581
582 case 3:
583 amd_decode_ls_mce(m);
584 break;
585
586 case 4:
587 node = amd_get_nb_id(m->extcpu);
588 amd_decode_nb_mce(node, m, 0);
589 break;
590
591 case 5:
592 amd_decode_fr_mce(m);
593 break;
594
595 default:
596 break;
597 }
598
599 amd_decode_err_code(m->status & 0xffff);
600
601 return NOTIFY_STOP;
602}
603EXPORT_SYMBOL_GPL(amd_decode_mce);
604
605static struct notifier_block amd_mce_dec_nb = {
606 .notifier_call = amd_decode_mce,
607};
608
609static int __init mce_amd_init(void)
610{
611 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
612 return 0;
613
614 if ((boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x12) &&
615 (boot_cpu_data.x86 != 0x14 || boot_cpu_data.x86_model > 0xf))
616 return 0;
617
618 fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
619 if (!fam_ops)
620 return -ENOMEM;
621
622 switch (boot_cpu_data.x86) {
623 case 0xf:
624 fam_ops->dc_mce = k8_dc_mce;
625 fam_ops->ic_mce = k8_ic_mce;
626 fam_ops->nb_mce = k8_nb_mce;
627 break;
628
629 case 0x10:
630 fam_ops->dc_mce = f10h_dc_mce;
631 fam_ops->ic_mce = k8_ic_mce;
632 fam_ops->nb_mce = f10h_nb_mce;
633 break;
634
635 case 0x11:
636 fam_ops->dc_mce = k8_dc_mce;
637 fam_ops->ic_mce = k8_ic_mce;
638 fam_ops->nb_mce = f10h_nb_mce;
639 break;
640
641 case 0x12:
642 fam_ops->dc_mce = f12h_dc_mce;
643 fam_ops->ic_mce = k8_ic_mce;
644 fam_ops->nb_mce = nb_noop_mce;
645 break;
646
647 case 0x14:
648 nb_err_cpumask = 0x3;
649 fam_ops->dc_mce = f14h_dc_mce;
650 fam_ops->ic_mce = f14h_ic_mce;
651 fam_ops->nb_mce = nb_noop_mce;
652 break;
653
654 default:
655 printk(KERN_WARNING "Huh? What family is that: %d?!\n",
656 boot_cpu_data.x86);
657 kfree(fam_ops);
658 return -EINVAL;
659 }
660
661 pr_info("MCE: In-kernel MCE decoding enabled.\n");
662
663 atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
664
665 return 0;
666}
667early_initcall(mce_amd_init);
668
669#ifdef MODULE
670static void __exit mce_amd_exit(void)
671{
672 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
673 kfree(fam_ops);
674}
675
676MODULE_DESCRIPTION("AMD MCE decoder");
677MODULE_ALIAS("edac-mce-amd");
678MODULE_LICENSE("GPL");
679module_exit(mce_amd_exit);
680#endif
diff --git a/drivers/edac/edac_mce_amd.h b/drivers/edac/mce_amd.h
index df23ee065f79..35f6e0e3b297 100644
--- a/drivers/edac/edac_mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -1,11 +1,14 @@
1#ifndef _EDAC_MCE_AMD_H 1#ifndef _EDAC_MCE_AMD_H
2#define _EDAC_MCE_AMD_H 2#define _EDAC_MCE_AMD_H
3 3
4#include <linux/notifier.h>
5
4#include <asm/mce.h> 6#include <asm/mce.h>
5 7
8#define BIT_64(n) (U64_C(1) << (n))
9
6#define ERROR_CODE(x) ((x) & 0xffff) 10#define ERROR_CODE(x) ((x) & 0xffff)
7#define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f) 11#define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f)
8#define EXT_ERR_MSG(x) ext_msgs[EXT_ERROR_CODE(x)]
9 12
10#define LOW_SYNDROME(x) (((x) >> 15) & 0xff) 13#define LOW_SYNDROME(x) (((x) >> 15) & 0xff)
11#define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) 14#define HIGH_SYNDROME(x) (((x) >> 24) & 0xff)
@@ -20,13 +23,14 @@
20#define II_MSG(x) ii_msgs[II(x)] 23#define II_MSG(x) ii_msgs[II(x)]
21#define LL(x) (((x) >> 0) & 0x3) 24#define LL(x) (((x) >> 0) & 0x3)
22#define LL_MSG(x) ll_msgs[LL(x)] 25#define LL_MSG(x) ll_msgs[LL(x)]
23#define RRRR(x) (((x) >> 4) & 0xf)
24#define RRRR_MSG(x) rrrr_msgs[RRRR(x)]
25#define TO(x) (((x) >> 8) & 0x1) 26#define TO(x) (((x) >> 8) & 0x1)
26#define TO_MSG(x) to_msgs[TO(x)] 27#define TO_MSG(x) to_msgs[TO(x)]
27#define PP(x) (((x) >> 9) & 0x3) 28#define PP(x) (((x) >> 9) & 0x3)
28#define PP_MSG(x) pp_msgs[PP(x)] 29#define PP_MSG(x) pp_msgs[PP(x)]
29 30
31#define RRRR(x) (((x) >> 4) & 0xf)
32#define RRRR_MSG(x) ((RRRR(x) < 9) ? rrrr_msgs[RRRR(x)] : "Wrong R4!")
33
30#define K8_NBSH 0x4C 34#define K8_NBSH 0x4C
31 35
32#define K8_NBSH_VALID_BIT BIT(31) 36#define K8_NBSH_VALID_BIT BIT(31)
@@ -41,13 +45,45 @@
41#define K8_NBSH_UECC BIT(13) 45#define K8_NBSH_UECC BIT(13)
42#define K8_NBSH_ERR_SCRUBER BIT(8) 46#define K8_NBSH_ERR_SCRUBER BIT(8)
43 47
48enum tt_ids {
49 TT_INSTR = 0,
50 TT_DATA,
51 TT_GEN,
52 TT_RESV,
53};
54
55enum ll_ids {
56 LL_RESV = 0,
57 LL_L1,
58 LL_L2,
59 LL_LG,
60};
61
62enum ii_ids {
63 II_MEM = 0,
64 II_RESV,
65 II_IO,
66 II_GEN,
67};
68
69enum rrrr_ids {
70 R4_GEN = 0,
71 R4_RD,
72 R4_WR,
73 R4_DRD,
74 R4_DWR,
75 R4_IRD,
76 R4_PREF,
77 R4_EVICT,
78 R4_SNOOP,
79};
80
44extern const char *tt_msgs[]; 81extern const char *tt_msgs[];
45extern const char *ll_msgs[]; 82extern const char *ll_msgs[];
46extern const char *rrrr_msgs[]; 83extern const char *rrrr_msgs[];
47extern const char *pp_msgs[]; 84extern const char *pp_msgs[];
48extern const char *to_msgs[]; 85extern const char *to_msgs[];
49extern const char *ii_msgs[]; 86extern const char *ii_msgs[];
50extern const char *ext_msgs[];
51 87
52/* 88/*
53 * relevant NB regs 89 * relevant NB regs
@@ -60,10 +96,19 @@ struct err_regs {
60 u32 nbeal; 96 u32 nbeal;
61}; 97};
62 98
99/*
100 * per-family decoder ops
101 */
102struct amd_decoder_ops {
103 bool (*dc_mce)(u16);
104 bool (*ic_mce)(u16);
105 bool (*nb_mce)(u16, u8);
106};
63 107
64void amd_report_gart_errors(bool); 108void amd_report_gart_errors(bool);
65void amd_register_ecc_decoder(void (*f)(int, struct err_regs *)); 109void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32));
66void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *)); 110void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32));
67void amd_decode_nb_mce(int, struct err_regs *, int); 111void amd_decode_nb_mce(int, struct mce *, u32);
112int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data);
68 113
69#endif /* _EDAC_MCE_AMD_H */ 114#endif /* _EDAC_MCE_AMD_H */
diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
new file mode 100644
index 000000000000..8d0688f36d4c
--- /dev/null
+++ b/drivers/edac/mce_amd_inj.c
@@ -0,0 +1,171 @@
1/*
2 * A simple MCE injection facility for testing the MCE decoding code. This
3 * driver should be built as module so that it can be loaded on production
4 * kernels for testing purposes.
5 *
6 * This file may be distributed under the terms of the GNU General Public
7 * License version 2.
8 *
9 * Copyright (c) 2010: Borislav Petkov <borislav.petkov@amd.com>
10 * Advanced Micro Devices Inc.
11 */
12
13#include <linux/kobject.h>
14#include <linux/sysdev.h>
15#include <linux/edac.h>
16#include <asm/mce.h>
17
18#include "mce_amd.h"
19
20struct edac_mce_attr {
21 struct attribute attr;
22 ssize_t (*show) (struct kobject *kobj, struct edac_mce_attr *attr, char *buf);
23 ssize_t (*store)(struct kobject *kobj, struct edac_mce_attr *attr,
24 const char *buf, size_t count);
25};
26
27#define EDAC_MCE_ATTR(_name, _mode, _show, _store) \
28static struct edac_mce_attr mce_attr_##_name = __ATTR(_name, _mode, _show, _store)
29
30static struct kobject *mce_kobj;
31
32/*
33 * Collect all the MCi_XXX settings
34 */
35static struct mce i_mce;
36
37#define MCE_INJECT_STORE(reg) \
38static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \
39 struct edac_mce_attr *attr, \
40 const char *data, size_t count)\
41{ \
42 int ret = 0; \
43 unsigned long value; \
44 \
45 ret = strict_strtoul(data, 16, &value); \
46 if (ret < 0) \
47 printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \
48 \
49 i_mce.reg = value; \
50 \
51 return count; \
52}
53
54MCE_INJECT_STORE(status);
55MCE_INJECT_STORE(misc);
56MCE_INJECT_STORE(addr);
57
58#define MCE_INJECT_SHOW(reg) \
59static ssize_t edac_inject_##reg##_show(struct kobject *kobj, \
60 struct edac_mce_attr *attr, \
61 char *buf) \
62{ \
63 return sprintf(buf, "0x%016llx\n", i_mce.reg); \
64}
65
66MCE_INJECT_SHOW(status);
67MCE_INJECT_SHOW(misc);
68MCE_INJECT_SHOW(addr);
69
70EDAC_MCE_ATTR(status, 0644, edac_inject_status_show, edac_inject_status_store);
71EDAC_MCE_ATTR(misc, 0644, edac_inject_misc_show, edac_inject_misc_store);
72EDAC_MCE_ATTR(addr, 0644, edac_inject_addr_show, edac_inject_addr_store);
73
74/*
75 * This denotes into which bank we're injecting and triggers
76 * the injection, at the same time.
77 */
78static ssize_t edac_inject_bank_store(struct kobject *kobj,
79 struct edac_mce_attr *attr,
80 const char *data, size_t count)
81{
82 int ret = 0;
83 unsigned long value;
84
85 ret = strict_strtoul(data, 10, &value);
86 if (ret < 0) {
87 printk(KERN_ERR "Invalid bank value!\n");
88 return -EINVAL;
89 }
90
91 if (value > 5) {
92 printk(KERN_ERR "Non-existant MCE bank: %lu\n", value);
93 return -EINVAL;
94 }
95
96 i_mce.bank = value;
97
98 amd_decode_mce(NULL, 0, &i_mce);
99
100 return count;
101}
102
103static ssize_t edac_inject_bank_show(struct kobject *kobj,
104 struct edac_mce_attr *attr, char *buf)
105{
106 return sprintf(buf, "%d\n", i_mce.bank);
107}
108
109EDAC_MCE_ATTR(bank, 0644, edac_inject_bank_show, edac_inject_bank_store);
110
111static struct edac_mce_attr *sysfs_attrs[] = { &mce_attr_status, &mce_attr_misc,
112 &mce_attr_addr, &mce_attr_bank
113};
114
115static int __init edac_init_mce_inject(void)
116{
117 struct sysdev_class *edac_class = NULL;
118 int i, err = 0;
119
120 edac_class = edac_get_sysfs_class();
121 if (!edac_class)
122 return -EINVAL;
123
124 mce_kobj = kobject_create_and_add("mce", &edac_class->kset.kobj);
125 if (!mce_kobj) {
126 printk(KERN_ERR "Error creating a mce kset.\n");
127 err = -ENOMEM;
128 goto err_mce_kobj;
129 }
130
131 for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) {
132 err = sysfs_create_file(mce_kobj, &sysfs_attrs[i]->attr);
133 if (err) {
134 printk(KERN_ERR "Error creating %s in sysfs.\n",
135 sysfs_attrs[i]->attr.name);
136 goto err_sysfs_create;
137 }
138 }
139 return 0;
140
141err_sysfs_create:
142 while (i-- >= 0)
143 sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
144
145 kobject_del(mce_kobj);
146
147err_mce_kobj:
148 edac_put_sysfs_class();
149
150 return err;
151}
152
153static void __exit edac_exit_mce_inject(void)
154{
155 int i;
156
157 for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++)
158 sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
159
160 kobject_del(mce_kobj);
161
162 edac_put_sysfs_class();
163}
164
165module_init(edac_init_mce_inject);
166module_exit(edac_exit_mce_inject);
167
168MODULE_LICENSE("GPL");
169MODULE_AUTHOR("Borislav Petkov <borislav.petkov@amd.com>");
170MODULE_AUTHOR("AMD Inc.");
171MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding");