aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-21 17:04:58 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-21 17:04:58 -0400
commitc029e405bd3d4e92d09fbbc97c03952585986ebe (patch)
tree8c9b23481a22ca15240e6a4e0df9afc3492efe3f /drivers
parenta9ccd80aadadef8c424142d41fddea7c0880f0e9 (diff)
parent525906bc898d712f21e5bfcfc85ab0e517e3d086 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp: (21 commits) EDAC, MCE: Fix shift warning on 32-bit EDAC, MCE: Add a BIT_64() macro EDAC, MCE: Enable MCE decoding on F12h EDAC, MCE: Add F12h NB MCE decoder EDAC, MCE: Add F12h IC MCE decoder EDAC, MCE: Add F12h DC MCE decoder EDAC, MCE: Add support for F11h MCEs EDAC, MCE: Enable MCE decoding on F14h EDAC, MCE: Fix FR MCEs decoding EDAC, MCE: Complete NB MCE decoders EDAC, MCE: Warn about LS MCEs on F14h EDAC, MCE: Adjust IC decoders to F14h EDAC, MCE: Adjust DC decoders to F14h EDAC, MCE: Rename files EDAC, MCE: Rework MCE injection EDAC: Export edac sysfs class to users. EDAC, MCE: Pass complete MCE info to decoders EDAC, MCE: Sanitize error codes EDAC, MCE: Remove unused function parameter EDAC, MCE: Add HW_ERR prefix ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/edac/Kconfig14
-rw-r--r--drivers/edac/Makefile3
-rw-r--r--drivers/edac/amd64_edac.c13
-rw-r--r--drivers/edac/amd64_edac.h5
-rw-r--r--drivers/edac/amd64_edac_dbg.c207
-rw-r--r--drivers/edac/edac_device_sysfs.c16
-rw-r--r--drivers/edac/edac_mc_sysfs.c11
-rw-r--r--drivers/edac/edac_mce_amd.c452
-rw-r--r--drivers/edac/edac_module.c79
-rw-r--r--drivers/edac/edac_module.h1
-rw-r--r--drivers/edac/edac_pci_sysfs.c10
-rw-r--r--drivers/edac/edac_stub.c51
-rw-r--r--drivers/edac/mce_amd.c680
-rw-r--r--drivers/edac/mce_amd.h (renamed from drivers/edac/edac_mce_amd.h)59
-rw-r--r--drivers/edac/mce_amd_inj.c171
15 files changed, 1014 insertions, 758 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 734e2e062374..9dbb28b9559f 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -39,7 +39,7 @@ config EDAC_DEBUG
39 there're four debug levels (x=0,1,2,3 from low to high). 39 there're four debug levels (x=0,1,2,3 from low to high).
40 Usually you should select 'N'. 40 Usually you should select 'N'.
41 41
42 config EDAC_DECODE_MCE 42config EDAC_DECODE_MCE
43 tristate "Decode MCEs in human-readable form (only on AMD for now)" 43 tristate "Decode MCEs in human-readable form (only on AMD for now)"
44 depends on CPU_SUP_AMD && X86_MCE 44 depends on CPU_SUP_AMD && X86_MCE
45 default y 45 default y
@@ -51,6 +51,16 @@ config EDAC_DEBUG
51 which occur really early upon boot, before the module infrastructure 51 which occur really early upon boot, before the module infrastructure
52 has been initialized. 52 has been initialized.
53 53
54config EDAC_MCE_INJ
55 tristate "Simple MCE injection interface over /sysfs"
56 depends on EDAC_DECODE_MCE
57 default n
58 help
59 This is a simple interface to inject MCEs over /sysfs and test
60 the MCE decoding code in EDAC.
61
62 This is currently AMD-only.
63
54config EDAC_MM_EDAC 64config EDAC_MM_EDAC
55 tristate "Main Memory EDAC (Error Detection And Correction) reporting" 65 tristate "Main Memory EDAC (Error Detection And Correction) reporting"
56 help 66 help
@@ -72,7 +82,7 @@ config EDAC_AMD64
72 Families of Memory Controllers (K8, F10h and F11h) 82 Families of Memory Controllers (K8, F10h and F11h)
73 83
74config EDAC_AMD64_ERROR_INJECTION 84config EDAC_AMD64_ERROR_INJECTION
75 bool "Sysfs Error Injection facilities" 85 bool "Sysfs HW Error injection facilities"
76 depends on EDAC_AMD64 86 depends on EDAC_AMD64
77 help 87 help
78 Recent Opterons (Family 10h and later) provide for Memory Error 88 Recent Opterons (Family 10h and later) provide for Memory Error
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index ca6b1bb24ccc..32c7bc93c525 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -17,6 +17,9 @@ ifdef CONFIG_PCI
17edac_core-objs += edac_pci.o edac_pci_sysfs.o 17edac_core-objs += edac_pci.o edac_pci_sysfs.o
18endif 18endif
19 19
20obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o
21
22edac_mce_amd-objs := mce_amd.o
20obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o 23obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o
21 24
22obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o 25obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 09fcc5282327..8521401bbd75 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2073,11 +2073,18 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
2073 amd64_handle_ue(mci, info); 2073 amd64_handle_ue(mci, info);
2074} 2074}
2075 2075
2076void amd64_decode_bus_error(int node_id, struct err_regs *regs) 2076void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg)
2077{ 2077{
2078 struct mem_ctl_info *mci = mci_lookup[node_id]; 2078 struct mem_ctl_info *mci = mci_lookup[node_id];
2079 struct err_regs regs;
2079 2080
2080 __amd64_decode_bus_error(mci, regs); 2081 regs.nbsl = (u32) m->status;
2082 regs.nbsh = (u32)(m->status >> 32);
2083 regs.nbeal = (u32) m->addr;
2084 regs.nbeah = (u32)(m->addr >> 32);
2085 regs.nbcfg = nbcfg;
2086
2087 __amd64_decode_bus_error(mci, &regs);
2081 2088
2082 /* 2089 /*
2083 * Check the UE bit of the NB status high register, if set generate some 2090 * Check the UE bit of the NB status high register, if set generate some
@@ -2086,7 +2093,7 @@ void amd64_decode_bus_error(int node_id, struct err_regs *regs)
2086 * 2093 *
2087 * FIXME: this should go somewhere else, if at all. 2094 * FIXME: this should go somewhere else, if at all.
2088 */ 2095 */
2089 if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors) 2096 if (regs.nbsh & K8_NBSH_UC_ERR && !report_gart_errors)
2090 edac_mc_handle_ue_no_info(mci, "UE bit is set"); 2097 edac_mc_handle_ue_no_info(mci, "UE bit is set");
2091 2098
2092} 2099}
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 613b9381e71a..044aee4f944d 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -72,7 +72,7 @@
72#include <linux/edac.h> 72#include <linux/edac.h>
73#include <asm/msr.h> 73#include <asm/msr.h>
74#include "edac_core.h" 74#include "edac_core.h"
75#include "edac_mce_amd.h" 75#include "mce_amd.h"
76 76
77#define amd64_printk(level, fmt, arg...) \ 77#define amd64_printk(level, fmt, arg...) \
78 edac_printk(level, "amd64", fmt, ##arg) 78 edac_printk(level, "amd64", fmt, ##arg)
@@ -482,11 +482,10 @@ extern const char *rrrr_msgs[16];
482extern const char *to_msgs[2]; 482extern const char *to_msgs[2];
483extern const char *pp_msgs[4]; 483extern const char *pp_msgs[4];
484extern const char *ii_msgs[4]; 484extern const char *ii_msgs[4];
485extern const char *ext_msgs[32];
486extern const char *htlink_msgs[8]; 485extern const char *htlink_msgs[8];
487 486
488#ifdef CONFIG_EDAC_DEBUG 487#ifdef CONFIG_EDAC_DEBUG
489#define NUM_DBG_ATTRS 9 488#define NUM_DBG_ATTRS 5
490#else 489#else
491#define NUM_DBG_ATTRS 0 490#define NUM_DBG_ATTRS 0
492#endif 491#endif
diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c
index 59cf2cf6e11e..e3562288f4ce 100644
--- a/drivers/edac/amd64_edac_dbg.c
+++ b/drivers/edac/amd64_edac_dbg.c
@@ -1,167 +1,16 @@
1#include "amd64_edac.h" 1#include "amd64_edac.h"
2 2
3/* 3#define EDAC_DCT_ATTR_SHOW(reg) \
4 * accept a hex value and store it into the virtual error register file, field: 4static ssize_t amd64_##reg##_show(struct mem_ctl_info *mci, char *data) \
5 * nbeal and nbeah. Assume virtual error values have already been set for: NBSL, 5{ \
6 * NBSH and NBCFG. Then proceed to map the error values to a MC, CSROW and 6 struct amd64_pvt *pvt = mci->pvt_info; \
7 * CHANNEL 7 return sprintf(data, "0x%016llx\n", (u64)pvt->reg); \
8 */
9static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data,
10 size_t count)
11{
12 struct amd64_pvt *pvt = mci->pvt_info;
13 unsigned long long value;
14 int ret = 0;
15
16 ret = strict_strtoull(data, 16, &value);
17 if (ret != -EINVAL) {
18 debugf0("received NBEA= 0x%llx\n", value);
19
20 /* place the value into the virtual error packet */
21 pvt->ctl_error_info.nbeal = (u32) value;
22 value >>= 32;
23 pvt->ctl_error_info.nbeah = (u32) value;
24
25 /* Process the Mapping request */
26 /* TODO: Add race prevention */
27 amd_decode_nb_mce(pvt->mc_node_id, &pvt->ctl_error_info, 1);
28
29 return count;
30 }
31 return ret;
32}
33
34/* display back what the last NBEA (MCA NB Address (MC4_ADDR)) was written */
35static ssize_t amd64_nbea_show(struct mem_ctl_info *mci, char *data)
36{
37 struct amd64_pvt *pvt = mci->pvt_info;
38 u64 value;
39
40 value = pvt->ctl_error_info.nbeah;
41 value <<= 32;
42 value |= pvt->ctl_error_info.nbeal;
43
44 return sprintf(data, "%llx\n", value);
45}
46
47/* store the NBSL (MCA NB Status Low (MC4_STATUS)) value user desires */
48static ssize_t amd64_nbsl_store(struct mem_ctl_info *mci, const char *data,
49 size_t count)
50{
51 struct amd64_pvt *pvt = mci->pvt_info;
52 unsigned long value;
53 int ret = 0;
54
55 ret = strict_strtoul(data, 16, &value);
56 if (ret != -EINVAL) {
57 debugf0("received NBSL= 0x%lx\n", value);
58
59 pvt->ctl_error_info.nbsl = (u32) value;
60
61 return count;
62 }
63 return ret;
64}
65
66/* display back what the last NBSL value written */
67static ssize_t amd64_nbsl_show(struct mem_ctl_info *mci, char *data)
68{
69 struct amd64_pvt *pvt = mci->pvt_info;
70 u32 value;
71
72 value = pvt->ctl_error_info.nbsl;
73
74 return sprintf(data, "%x\n", value);
75}
76
77/* store the NBSH (MCA NB Status High) value user desires */
78static ssize_t amd64_nbsh_store(struct mem_ctl_info *mci, const char *data,
79 size_t count)
80{
81 struct amd64_pvt *pvt = mci->pvt_info;
82 unsigned long value;
83 int ret = 0;
84
85 ret = strict_strtoul(data, 16, &value);
86 if (ret != -EINVAL) {
87 debugf0("received NBSH= 0x%lx\n", value);
88
89 pvt->ctl_error_info.nbsh = (u32) value;
90
91 return count;
92 }
93 return ret;
94}
95
96/* display back what the last NBSH value written */
97static ssize_t amd64_nbsh_show(struct mem_ctl_info *mci, char *data)
98{
99 struct amd64_pvt *pvt = mci->pvt_info;
100 u32 value;
101
102 value = pvt->ctl_error_info.nbsh;
103
104 return sprintf(data, "%x\n", value);
105} 8}
106 9
107/* accept and store the NBCFG (MCA NB Configuration) value user desires */ 10EDAC_DCT_ATTR_SHOW(dhar);
108static ssize_t amd64_nbcfg_store(struct mem_ctl_info *mci, 11EDAC_DCT_ATTR_SHOW(dbam0);
109 const char *data, size_t count) 12EDAC_DCT_ATTR_SHOW(top_mem);
110{ 13EDAC_DCT_ATTR_SHOW(top_mem2);
111 struct amd64_pvt *pvt = mci->pvt_info;
112 unsigned long value;
113 int ret = 0;
114
115 ret = strict_strtoul(data, 16, &value);
116 if (ret != -EINVAL) {
117 debugf0("received NBCFG= 0x%lx\n", value);
118
119 pvt->ctl_error_info.nbcfg = (u32) value;
120
121 return count;
122 }
123 return ret;
124}
125
126/* various show routines for the controls of a MCI */
127static ssize_t amd64_nbcfg_show(struct mem_ctl_info *mci, char *data)
128{
129 struct amd64_pvt *pvt = mci->pvt_info;
130
131 return sprintf(data, "%x\n", pvt->ctl_error_info.nbcfg);
132}
133
134
135static ssize_t amd64_dhar_show(struct mem_ctl_info *mci, char *data)
136{
137 struct amd64_pvt *pvt = mci->pvt_info;
138
139 return sprintf(data, "%x\n", pvt->dhar);
140}
141
142
143static ssize_t amd64_dbam_show(struct mem_ctl_info *mci, char *data)
144{
145 struct amd64_pvt *pvt = mci->pvt_info;
146
147 return sprintf(data, "%x\n", pvt->dbam0);
148}
149
150
151static ssize_t amd64_topmem_show(struct mem_ctl_info *mci, char *data)
152{
153 struct amd64_pvt *pvt = mci->pvt_info;
154
155 return sprintf(data, "%llx\n", pvt->top_mem);
156}
157
158
159static ssize_t amd64_topmem2_show(struct mem_ctl_info *mci, char *data)
160{
161 struct amd64_pvt *pvt = mci->pvt_info;
162
163 return sprintf(data, "%llx\n", pvt->top_mem2);
164}
165 14
166static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data) 15static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data)
167{ 16{
@@ -182,38 +31,6 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
182 31
183 { 32 {
184 .attr = { 33 .attr = {
185 .name = "nbea_ctl",
186 .mode = (S_IRUGO | S_IWUSR)
187 },
188 .show = amd64_nbea_show,
189 .store = amd64_nbea_store,
190 },
191 {
192 .attr = {
193 .name = "nbsl_ctl",
194 .mode = (S_IRUGO | S_IWUSR)
195 },
196 .show = amd64_nbsl_show,
197 .store = amd64_nbsl_store,
198 },
199 {
200 .attr = {
201 .name = "nbsh_ctl",
202 .mode = (S_IRUGO | S_IWUSR)
203 },
204 .show = amd64_nbsh_show,
205 .store = amd64_nbsh_store,
206 },
207 {
208 .attr = {
209 .name = "nbcfg_ctl",
210 .mode = (S_IRUGO | S_IWUSR)
211 },
212 .show = amd64_nbcfg_show,
213 .store = amd64_nbcfg_store,
214 },
215 {
216 .attr = {
217 .name = "dhar", 34 .name = "dhar",
218 .mode = (S_IRUGO) 35 .mode = (S_IRUGO)
219 }, 36 },
@@ -225,7 +42,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
225 .name = "dbam", 42 .name = "dbam",
226 .mode = (S_IRUGO) 43 .mode = (S_IRUGO)
227 }, 44 },
228 .show = amd64_dbam_show, 45 .show = amd64_dbam0_show,
229 .store = NULL, 46 .store = NULL,
230 }, 47 },
231 { 48 {
@@ -233,7 +50,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
233 .name = "topmem", 50 .name = "topmem",
234 .mode = (S_IRUGO) 51 .mode = (S_IRUGO)
235 }, 52 },
236 .show = amd64_topmem_show, 53 .show = amd64_top_mem_show,
237 .store = NULL, 54 .store = NULL,
238 }, 55 },
239 { 56 {
@@ -241,7 +58,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
241 .name = "topmem2", 58 .name = "topmem2",
242 .mode = (S_IRUGO) 59 .mode = (S_IRUGO)
243 }, 60 },
244 .show = amd64_topmem2_show, 61 .show = amd64_top_mem2_show,
245 .store = NULL, 62 .store = NULL,
246 }, 63 },
247 { 64 {
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 070968178a24..2941dca91aae 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -13,6 +13,7 @@
13#include <linux/ctype.h> 13#include <linux/ctype.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/edac.h>
16 17
17#include "edac_core.h" 18#include "edac_core.h"
18#include "edac_module.h" 19#include "edac_module.h"
@@ -235,7 +236,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
235 debugf1("%s()\n", __func__); 236 debugf1("%s()\n", __func__);
236 237
237 /* get the /sys/devices/system/edac reference */ 238 /* get the /sys/devices/system/edac reference */
238 edac_class = edac_get_edac_class(); 239 edac_class = edac_get_sysfs_class();
239 if (edac_class == NULL) { 240 if (edac_class == NULL) {
240 debugf1("%s() no edac_class error\n", __func__); 241 debugf1("%s() no edac_class error\n", __func__);
241 err = -ENODEV; 242 err = -ENODEV;
@@ -255,7 +256,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
255 256
256 if (!try_module_get(edac_dev->owner)) { 257 if (!try_module_get(edac_dev->owner)) {
257 err = -ENODEV; 258 err = -ENODEV;
258 goto err_out; 259 goto err_mod_get;
259 } 260 }
260 261
261 /* register */ 262 /* register */
@@ -282,6 +283,9 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
282err_kobj_reg: 283err_kobj_reg:
283 module_put(edac_dev->owner); 284 module_put(edac_dev->owner);
284 285
286err_mod_get:
287 edac_put_sysfs_class();
288
285err_out: 289err_out:
286 return err; 290 return err;
287} 291}
@@ -290,12 +294,11 @@ err_out:
290 * edac_device_unregister_sysfs_main_kobj: 294 * edac_device_unregister_sysfs_main_kobj:
291 * the '..../edac/<name>' kobject 295 * the '..../edac/<name>' kobject
292 */ 296 */
293void edac_device_unregister_sysfs_main_kobj( 297void edac_device_unregister_sysfs_main_kobj(struct edac_device_ctl_info *dev)
294 struct edac_device_ctl_info *edac_dev)
295{ 298{
296 debugf0("%s()\n", __func__); 299 debugf0("%s()\n", __func__);
297 debugf4("%s() name of kobject is: %s\n", 300 debugf4("%s() name of kobject is: %s\n",
298 __func__, kobject_name(&edac_dev->kobj)); 301 __func__, kobject_name(&dev->kobj));
299 302
300 /* 303 /*
301 * Unregister the edac device's kobject and 304 * Unregister the edac device's kobject and
@@ -304,7 +307,8 @@ void edac_device_unregister_sysfs_main_kobj(
304 * a) module_put() this module 307 * a) module_put() this module
305 * b) 'kfree' the memory 308 * b) 'kfree' the memory
306 */ 309 */
307 kobject_put(&edac_dev->kobj); 310 kobject_put(&dev->kobj);
311 edac_put_sysfs_class();
308} 312}
309 313
310/* edac_dev -> instance information */ 314/* edac_dev -> instance information */
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 8aad94d10c0c..a4135860149b 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/ctype.h> 12#include <linux/ctype.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/edac.h>
14#include <linux/bug.h> 15#include <linux/bug.h>
15 16
16#include "edac_core.h" 17#include "edac_core.h"
@@ -1011,13 +1012,13 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
1011 */ 1012 */
1012int edac_sysfs_setup_mc_kset(void) 1013int edac_sysfs_setup_mc_kset(void)
1013{ 1014{
1014 int err = 0; 1015 int err = -EINVAL;
1015 struct sysdev_class *edac_class; 1016 struct sysdev_class *edac_class;
1016 1017
1017 debugf1("%s()\n", __func__); 1018 debugf1("%s()\n", __func__);
1018 1019
1019 /* get the /sys/devices/system/edac class reference */ 1020 /* get the /sys/devices/system/edac class reference */
1020 edac_class = edac_get_edac_class(); 1021 edac_class = edac_get_sysfs_class();
1021 if (edac_class == NULL) { 1022 if (edac_class == NULL) {
1022 debugf1("%s() no edac_class error=%d\n", __func__, err); 1023 debugf1("%s() no edac_class error=%d\n", __func__, err);
1023 goto fail_out; 1024 goto fail_out;
@@ -1028,15 +1029,16 @@ int edac_sysfs_setup_mc_kset(void)
1028 if (!mc_kset) { 1029 if (!mc_kset) {
1029 err = -ENOMEM; 1030 err = -ENOMEM;
1030 debugf1("%s() Failed to register '.../edac/mc'\n", __func__); 1031 debugf1("%s() Failed to register '.../edac/mc'\n", __func__);
1031 goto fail_out; 1032 goto fail_kset;
1032 } 1033 }
1033 1034
1034 debugf1("%s() Registered '.../edac/mc' kobject\n", __func__); 1035 debugf1("%s() Registered '.../edac/mc' kobject\n", __func__);
1035 1036
1036 return 0; 1037 return 0;
1037 1038
1039fail_kset:
1040 edac_put_sysfs_class();
1038 1041
1039 /* error unwind stack */
1040fail_out: 1042fail_out:
1041 return err; 1043 return err;
1042} 1044}
@@ -1049,5 +1051,6 @@ fail_out:
1049void edac_sysfs_teardown_mc_kset(void) 1051void edac_sysfs_teardown_mc_kset(void)
1050{ 1052{
1051 kset_unregister(mc_kset); 1053 kset_unregister(mc_kset);
1054 edac_put_sysfs_class();
1052} 1055}
1053 1056
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c
deleted file mode 100644
index 9014df6f605d..000000000000
--- a/drivers/edac/edac_mce_amd.c
+++ /dev/null
@@ -1,452 +0,0 @@
1#include <linux/module.h>
2#include "edac_mce_amd.h"
3
4static bool report_gart_errors;
5static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
6
7void amd_report_gart_errors(bool v)
8{
9 report_gart_errors = v;
10}
11EXPORT_SYMBOL_GPL(amd_report_gart_errors);
12
13void amd_register_ecc_decoder(void (*f)(int, struct err_regs *))
14{
15 nb_bus_decoder = f;
16}
17EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
18
19void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *))
20{
21 if (nb_bus_decoder) {
22 WARN_ON(nb_bus_decoder != f);
23
24 nb_bus_decoder = NULL;
25 }
26}
27EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
28
29/*
30 * string representation for the different MCA reported error types, see F3x48
31 * or MSR0000_0411.
32 */
33const char *tt_msgs[] = { /* transaction type */
34 "instruction",
35 "data",
36 "generic",
37 "reserved"
38};
39EXPORT_SYMBOL_GPL(tt_msgs);
40
41const char *ll_msgs[] = { /* cache level */
42 "L0",
43 "L1",
44 "L2",
45 "L3/generic"
46};
47EXPORT_SYMBOL_GPL(ll_msgs);
48
49const char *rrrr_msgs[] = {
50 "generic",
51 "generic read",
52 "generic write",
53 "data read",
54 "data write",
55 "inst fetch",
56 "prefetch",
57 "evict",
58 "snoop",
59 "reserved RRRR= 9",
60 "reserved RRRR= 10",
61 "reserved RRRR= 11",
62 "reserved RRRR= 12",
63 "reserved RRRR= 13",
64 "reserved RRRR= 14",
65 "reserved RRRR= 15"
66};
67EXPORT_SYMBOL_GPL(rrrr_msgs);
68
69const char *pp_msgs[] = { /* participating processor */
70 "local node originated (SRC)",
71 "local node responded to request (RES)",
72 "local node observed as 3rd party (OBS)",
73 "generic"
74};
75EXPORT_SYMBOL_GPL(pp_msgs);
76
77const char *to_msgs[] = {
78 "no timeout",
79 "timed out"
80};
81EXPORT_SYMBOL_GPL(to_msgs);
82
83const char *ii_msgs[] = { /* memory or i/o */
84 "mem access",
85 "reserved",
86 "i/o access",
87 "generic"
88};
89EXPORT_SYMBOL_GPL(ii_msgs);
90
91/*
92 * Map the 4 or 5 (family-specific) bits of Extended Error code to the
93 * string table.
94 */
95const char *ext_msgs[] = {
96 "K8 ECC error", /* 0_0000b */
97 "CRC error on link", /* 0_0001b */
98 "Sync error packets on link", /* 0_0010b */
99 "Master Abort during link operation", /* 0_0011b */
100 "Target Abort during link operation", /* 0_0100b */
101 "Invalid GART PTE entry during table walk", /* 0_0101b */
102 "Unsupported atomic RMW command received", /* 0_0110b */
103 "WDT error: NB transaction timeout", /* 0_0111b */
104 "ECC/ChipKill ECC error", /* 0_1000b */
105 "SVM DEV Error", /* 0_1001b */
106 "Link Data error", /* 0_1010b */
107 "Link/L3/Probe Filter Protocol error", /* 0_1011b */
108 "NB Internal Arrays Parity error", /* 0_1100b */
109 "DRAM Address/Control Parity error", /* 0_1101b */
110 "Link Transmission error", /* 0_1110b */
111 "GART/DEV Table Walk Data error" /* 0_1111b */
112 "Res 0x100 error", /* 1_0000b */
113 "Res 0x101 error", /* 1_0001b */
114 "Res 0x102 error", /* 1_0010b */
115 "Res 0x103 error", /* 1_0011b */
116 "Res 0x104 error", /* 1_0100b */
117 "Res 0x105 error", /* 1_0101b */
118 "Res 0x106 error", /* 1_0110b */
119 "Res 0x107 error", /* 1_0111b */
120 "Res 0x108 error", /* 1_1000b */
121 "Res 0x109 error", /* 1_1001b */
122 "Res 0x10A error", /* 1_1010b */
123 "Res 0x10B error", /* 1_1011b */
124 "ECC error in L3 Cache Data", /* 1_1100b */
125 "L3 Cache Tag error", /* 1_1101b */
126 "L3 Cache LRU Parity error", /* 1_1110b */
127 "Probe Filter error" /* 1_1111b */
128};
129EXPORT_SYMBOL_GPL(ext_msgs);
130
131static void amd_decode_dc_mce(u64 mc0_status)
132{
133 u32 ec = mc0_status & 0xffff;
134 u32 xec = (mc0_status >> 16) & 0xf;
135
136 pr_emerg("Data Cache Error");
137
138 if (xec == 1 && TLB_ERROR(ec))
139 pr_cont(": %s TLB multimatch.\n", LL_MSG(ec));
140 else if (xec == 0) {
141 if (mc0_status & (1ULL << 40))
142 pr_cont(" during Data Scrub.\n");
143 else if (TLB_ERROR(ec))
144 pr_cont(": %s TLB parity error.\n", LL_MSG(ec));
145 else if (MEM_ERROR(ec)) {
146 u8 ll = ec & 0x3;
147 u8 tt = (ec >> 2) & 0x3;
148 u8 rrrr = (ec >> 4) & 0xf;
149
150 /* see F10h BKDG (31116), Table 92. */
151 if (ll == 0x1) {
152 if (tt != 0x1)
153 goto wrong_dc_mce;
154
155 pr_cont(": Data/Tag %s error.\n", RRRR_MSG(ec));
156
157 } else if (ll == 0x2 && rrrr == 0x3)
158 pr_cont(" during L1 linefill from L2.\n");
159 else
160 goto wrong_dc_mce;
161 } else if (BUS_ERROR(ec) && boot_cpu_data.x86 == 0xf)
162 pr_cont(" during system linefill.\n");
163 else
164 goto wrong_dc_mce;
165 } else
166 goto wrong_dc_mce;
167
168 return;
169
170wrong_dc_mce:
171 pr_warning("Corrupted DC MCE info?\n");
172}
173
174static void amd_decode_ic_mce(u64 mc1_status)
175{
176 u32 ec = mc1_status & 0xffff;
177 u32 xec = (mc1_status >> 16) & 0xf;
178
179 pr_emerg("Instruction Cache Error");
180
181 if (xec == 1 && TLB_ERROR(ec))
182 pr_cont(": %s TLB multimatch.\n", LL_MSG(ec));
183 else if (xec == 0) {
184 if (TLB_ERROR(ec))
185 pr_cont(": %s TLB Parity error.\n", LL_MSG(ec));
186 else if (BUS_ERROR(ec)) {
187 if (boot_cpu_data.x86 == 0xf &&
188 (mc1_status & (1ULL << 58)))
189 pr_cont(" during system linefill.\n");
190 else
191 pr_cont(" during attempted NB data read.\n");
192 } else if (MEM_ERROR(ec)) {
193 u8 ll = ec & 0x3;
194 u8 rrrr = (ec >> 4) & 0xf;
195
196 if (ll == 0x2)
197 pr_cont(" during a linefill from L2.\n");
198 else if (ll == 0x1) {
199
200 switch (rrrr) {
201 case 0x5:
202 pr_cont(": Parity error during "
203 "data load.\n");
204 break;
205
206 case 0x7:
207 pr_cont(": Copyback Parity/Victim"
208 " error.\n");
209 break;
210
211 case 0x8:
212 pr_cont(": Tag Snoop error.\n");
213 break;
214
215 default:
216 goto wrong_ic_mce;
217 break;
218 }
219 }
220 } else
221 goto wrong_ic_mce;
222 } else
223 goto wrong_ic_mce;
224
225 return;
226
227wrong_ic_mce:
228 pr_warning("Corrupted IC MCE info?\n");
229}
230
231static void amd_decode_bu_mce(u64 mc2_status)
232{
233 u32 ec = mc2_status & 0xffff;
234 u32 xec = (mc2_status >> 16) & 0xf;
235
236 pr_emerg("Bus Unit Error");
237
238 if (xec == 0x1)
239 pr_cont(" in the write data buffers.\n");
240 else if (xec == 0x3)
241 pr_cont(" in the victim data buffers.\n");
242 else if (xec == 0x2 && MEM_ERROR(ec))
243 pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec));
244 else if (xec == 0x0) {
245 if (TLB_ERROR(ec))
246 pr_cont(": %s error in a Page Descriptor Cache or "
247 "Guest TLB.\n", TT_MSG(ec));
248 else if (BUS_ERROR(ec))
249 pr_cont(": %s/ECC error in data read from NB: %s.\n",
250 RRRR_MSG(ec), PP_MSG(ec));
251 else if (MEM_ERROR(ec)) {
252 u8 rrrr = (ec >> 4) & 0xf;
253
254 if (rrrr >= 0x7)
255 pr_cont(": %s error during data copyback.\n",
256 RRRR_MSG(ec));
257 else if (rrrr <= 0x1)
258 pr_cont(": %s parity/ECC error during data "
259 "access from L2.\n", RRRR_MSG(ec));
260 else
261 goto wrong_bu_mce;
262 } else
263 goto wrong_bu_mce;
264 } else
265 goto wrong_bu_mce;
266
267 return;
268
269wrong_bu_mce:
270 pr_warning("Corrupted BU MCE info?\n");
271}
272
273static void amd_decode_ls_mce(u64 mc3_status)
274{
275 u32 ec = mc3_status & 0xffff;
276 u32 xec = (mc3_status >> 16) & 0xf;
277
278 pr_emerg("Load Store Error");
279
280 if (xec == 0x0) {
281 u8 rrrr = (ec >> 4) & 0xf;
282
283 if (!BUS_ERROR(ec) || (rrrr != 0x3 && rrrr != 0x4))
284 goto wrong_ls_mce;
285
286 pr_cont(" during %s.\n", RRRR_MSG(ec));
287 }
288 return;
289
290wrong_ls_mce:
291 pr_warning("Corrupted LS MCE info?\n");
292}
293
294void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors)
295{
296 u32 ec = ERROR_CODE(regs->nbsl);
297
298 if (!handle_errors)
299 return;
300
301 /*
302 * GART TLB error reporting is disabled by default. Bail out early.
303 */
304 if (TLB_ERROR(ec) && !report_gart_errors)
305 return;
306
307 pr_emerg("Northbridge Error, node %d", node_id);
308
309 /*
310 * F10h, revD can disable ErrCpu[3:0] so check that first and also the
311 * value encoding has changed so interpret those differently
312 */
313 if ((boot_cpu_data.x86 == 0x10) &&
314 (boot_cpu_data.x86_model > 7)) {
315 if (regs->nbsh & K8_NBSH_ERR_CPU_VAL)
316 pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf));
317 } else {
318 u8 assoc_cpus = regs->nbsh & 0xf;
319
320 if (assoc_cpus > 0)
321 pr_cont(", core: %d", fls(assoc_cpus) - 1);
322
323 pr_cont("\n");
324 }
325
326 pr_emerg("%s.\n", EXT_ERR_MSG(regs->nbsl));
327
328 if (BUS_ERROR(ec) && nb_bus_decoder)
329 nb_bus_decoder(node_id, regs);
330}
331EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
332
333static void amd_decode_fr_mce(u64 mc5_status)
334{
335 /* we have only one error signature so match all fields at once. */
336 if ((mc5_status & 0xffff) == 0x0f0f)
337 pr_emerg(" FR Error: CPU Watchdog timer expire.\n");
338 else
339 pr_warning("Corrupted FR MCE info?\n");
340}
341
342static inline void amd_decode_err_code(unsigned int ec)
343{
344 if (TLB_ERROR(ec)) {
345 pr_emerg("Transaction: %s, Cache Level %s\n",
346 TT_MSG(ec), LL_MSG(ec));
347 } else if (MEM_ERROR(ec)) {
348 pr_emerg("Transaction: %s, Type: %s, Cache Level: %s",
349 RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec));
350 } else if (BUS_ERROR(ec)) {
351 pr_emerg("Transaction type: %s(%s), %s, Cache Level: %s, "
352 "Participating Processor: %s\n",
353 RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec),
354 PP_MSG(ec));
355 } else
356 pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
357}
358
359static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
360 void *data)
361{
362 struct mce *m = (struct mce *)data;
363 struct err_regs regs;
364 int node, ecc;
365
366 pr_emerg("MC%d_STATUS: ", m->bank);
367
368 pr_cont("%sorrected error, other errors lost: %s, "
369 "CPU context corrupt: %s",
370 ((m->status & MCI_STATUS_UC) ? "Unc" : "C"),
371 ((m->status & MCI_STATUS_OVER) ? "yes" : "no"),
372 ((m->status & MCI_STATUS_PCC) ? "yes" : "no"));
373
374 /* do the two bits[14:13] together */
375 ecc = (m->status >> 45) & 0x3;
376 if (ecc)
377 pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U"));
378
379 pr_cont("\n");
380
381 switch (m->bank) {
382 case 0:
383 amd_decode_dc_mce(m->status);
384 break;
385
386 case 1:
387 amd_decode_ic_mce(m->status);
388 break;
389
390 case 2:
391 amd_decode_bu_mce(m->status);
392 break;
393
394 case 3:
395 amd_decode_ls_mce(m->status);
396 break;
397
398 case 4:
399 regs.nbsl = (u32) m->status;
400 regs.nbsh = (u32)(m->status >> 32);
401 regs.nbeal = (u32) m->addr;
402 regs.nbeah = (u32)(m->addr >> 32);
403 node = amd_get_nb_id(m->extcpu);
404
405 amd_decode_nb_mce(node, &regs, 1);
406 break;
407
408 case 5:
409 amd_decode_fr_mce(m->status);
410 break;
411
412 default:
413 break;
414 }
415
416 amd_decode_err_code(m->status & 0xffff);
417
418 return NOTIFY_STOP;
419}
420
421static struct notifier_block amd_mce_dec_nb = {
422 .notifier_call = amd_decode_mce,
423};
424
425static int __init mce_amd_init(void)
426{
427 /*
428 * We can decode MCEs for K8, F10h and F11h CPUs:
429 */
430 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
431 return 0;
432
433 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
434 return 0;
435
436 atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
437
438 return 0;
439}
440early_initcall(mce_amd_init);
441
442#ifdef MODULE
443static void __exit mce_amd_exit(void)
444{
445 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
446}
447
448MODULE_DESCRIPTION("AMD MCE decoder");
449MODULE_ALIAS("edac-mce-amd");
450MODULE_LICENSE("GPL");
451module_exit(mce_amd_exit);
452#endif
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c
index 7e1374afd967..be4b075c3098 100644
--- a/drivers/edac/edac_module.c
+++ b/drivers/edac/edac_module.c
@@ -27,15 +27,6 @@ EXPORT_SYMBOL_GPL(edac_debug_level);
27struct workqueue_struct *edac_workqueue; 27struct workqueue_struct *edac_workqueue;
28 28
29/* 29/*
30 * sysfs object: /sys/devices/system/edac
31 * need to export to other files in this modules
32 */
33static struct sysdev_class edac_class = {
34 .name = "edac",
35};
36static int edac_class_valid;
37
38/*
39 * edac_op_state_to_string() 30 * edac_op_state_to_string()
40 */ 31 */
41char *edac_op_state_to_string(int opstate) 32char *edac_op_state_to_string(int opstate)
@@ -55,60 +46,6 @@ char *edac_op_state_to_string(int opstate)
55} 46}
56 47
57/* 48/*
58 * edac_get_edac_class()
59 *
60 * return pointer to the edac class of 'edac'
61 */
62struct sysdev_class *edac_get_edac_class(void)
63{
64 struct sysdev_class *classptr = NULL;
65
66 if (edac_class_valid)
67 classptr = &edac_class;
68
69 return classptr;
70}
71
72/*
73 * edac_register_sysfs_edac_name()
74 *
75 * register the 'edac' into /sys/devices/system
76 *
77 * return:
78 * 0 success
79 * !0 error
80 */
81static int edac_register_sysfs_edac_name(void)
82{
83 int err;
84
85 /* create the /sys/devices/system/edac directory */
86 err = sysdev_class_register(&edac_class);
87
88 if (err) {
89 debugf1("%s() error=%d\n", __func__, err);
90 return err;
91 }
92
93 edac_class_valid = 1;
94 return 0;
95}
96
97/*
98 * sysdev_class_unregister()
99 *
100 * unregister the 'edac' from /sys/devices/system
101 */
102static void edac_unregister_sysfs_edac_name(void)
103{
104 /* only if currently registered, then unregister it */
105 if (edac_class_valid)
106 sysdev_class_unregister(&edac_class);
107
108 edac_class_valid = 0;
109}
110
111/*
112 * edac_workqueue_setup 49 * edac_workqueue_setup
113 * initialize the edac work queue for polling operations 50 * initialize the edac work queue for polling operations
114 */ 51 */
@@ -154,21 +91,11 @@ static int __init edac_init(void)
154 edac_pci_clear_parity_errors(); 91 edac_pci_clear_parity_errors();
155 92
156 /* 93 /*
157 * perform the registration of the /sys/devices/system/edac class object
158 */
159 if (edac_register_sysfs_edac_name()) {
160 edac_printk(KERN_ERR, EDAC_MC,
161 "Error initializing 'edac' kobject\n");
162 err = -ENODEV;
163 goto error;
164 }
165
166 /*
167 * now set up the mc_kset under the edac class object 94 * now set up the mc_kset under the edac class object
168 */ 95 */
169 err = edac_sysfs_setup_mc_kset(); 96 err = edac_sysfs_setup_mc_kset();
170 if (err) 97 if (err)
171 goto sysfs_setup_fail; 98 goto error;
172 99
173 /* Setup/Initialize the workq for this core */ 100 /* Setup/Initialize the workq for this core */
174 err = edac_workqueue_setup(); 101 err = edac_workqueue_setup();
@@ -183,9 +110,6 @@ static int __init edac_init(void)
183workq_fail: 110workq_fail:
184 edac_sysfs_teardown_mc_kset(); 111 edac_sysfs_teardown_mc_kset();
185 112
186sysfs_setup_fail:
187 edac_unregister_sysfs_edac_name();
188
189error: 113error:
190 return err; 114 return err;
191} 115}
@@ -201,7 +125,6 @@ static void __exit edac_exit(void)
201 /* tear down the various subsystems */ 125 /* tear down the various subsystems */
202 edac_workqueue_teardown(); 126 edac_workqueue_teardown();
203 edac_sysfs_teardown_mc_kset(); 127 edac_sysfs_teardown_mc_kset();
204 edac_unregister_sysfs_edac_name();
205} 128}
206 129
207/* 130/*
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index 233d4798c3aa..17aabb7b90ec 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -42,7 +42,6 @@ extern void edac_device_unregister_sysfs_main_kobj(
42 struct edac_device_ctl_info *edac_dev); 42 struct edac_device_ctl_info *edac_dev);
43extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev); 43extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev);
44extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev); 44extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev);
45extern struct sysdev_class *edac_get_edac_class(void);
46 45
47/* edac core workqueue: single CPU mode */ 46/* edac core workqueue: single CPU mode */
48extern struct workqueue_struct *edac_workqueue; 47extern struct workqueue_struct *edac_workqueue;
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index c39697df9cb4..023b01cb5175 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -7,7 +7,7 @@
7 * 7 *
8 */ 8 */
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/sysdev.h> 10#include <linux/edac.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/ctype.h> 12#include <linux/ctype.h>
13 13
@@ -354,7 +354,7 @@ static int edac_pci_main_kobj_setup(void)
354 /* First time, so create the main kobject and its 354 /* First time, so create the main kobject and its
355 * controls and atributes 355 * controls and atributes
356 */ 356 */
357 edac_class = edac_get_edac_class(); 357 edac_class = edac_get_sysfs_class();
358 if (edac_class == NULL) { 358 if (edac_class == NULL) {
359 debugf1("%s() no edac_class\n", __func__); 359 debugf1("%s() no edac_class\n", __func__);
360 err = -ENODEV; 360 err = -ENODEV;
@@ -368,7 +368,7 @@ static int edac_pci_main_kobj_setup(void)
368 if (!try_module_get(THIS_MODULE)) { 368 if (!try_module_get(THIS_MODULE)) {
369 debugf1("%s() try_module_get() failed\n", __func__); 369 debugf1("%s() try_module_get() failed\n", __func__);
370 err = -ENODEV; 370 err = -ENODEV;
371 goto decrement_count_fail; 371 goto mod_get_fail;
372 } 372 }
373 373
374 edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); 374 edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
@@ -403,6 +403,9 @@ kobject_init_and_add_fail:
403kzalloc_fail: 403kzalloc_fail:
404 module_put(THIS_MODULE); 404 module_put(THIS_MODULE);
405 405
406mod_get_fail:
407 edac_put_sysfs_class();
408
406decrement_count_fail: 409decrement_count_fail:
407 /* if are on this error exit, nothing to tear down */ 410 /* if are on this error exit, nothing to tear down */
408 atomic_dec(&edac_pci_sysfs_refcount); 411 atomic_dec(&edac_pci_sysfs_refcount);
@@ -429,6 +432,7 @@ static void edac_pci_main_kobj_teardown(void)
429 __func__); 432 __func__);
430 kobject_put(edac_pci_top_main_kobj); 433 kobject_put(edac_pci_top_main_kobj);
431 } 434 }
435 edac_put_sysfs_class();
432} 436}
433 437
434/* 438/*
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index 20b428aa155e..aab970760b75 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -3,10 +3,13 @@
3 * 3 *
4 * Author: Dave Jiang <djiang@mvista.com> 4 * Author: Dave Jiang <djiang@mvista.com>
5 * 5 *
6 * 2007 (c) MontaVista Software, Inc. This file is licensed under 6 * 2007 (c) MontaVista Software, Inc.
7 * the terms of the GNU General Public License version 2. This program 7 * 2010 (c) Advanced Micro Devices Inc.
8 * is licensed "as is" without any warranty of any kind, whether express 8 * Borislav Petkov <borislav.petkov@amd.com>
9 * or implied. 9 *
10 * This file is licensed under the terms of the GNU General Public
11 * License version 2. This program is licensed "as is" without any
12 * warranty of any kind, whether express or implied.
10 * 13 *
11 */ 14 */
12#include <linux/module.h> 15#include <linux/module.h>
@@ -23,6 +26,8 @@ EXPORT_SYMBOL_GPL(edac_handlers);
23int edac_err_assert = 0; 26int edac_err_assert = 0;
24EXPORT_SYMBOL_GPL(edac_err_assert); 27EXPORT_SYMBOL_GPL(edac_err_assert);
25 28
29static atomic_t edac_class_valid = ATOMIC_INIT(0);
30
26/* 31/*
27 * called to determine if there is an EDAC driver interested in 32 * called to determine if there is an EDAC driver interested in
28 * knowing an event (such as NMI) occurred 33 * knowing an event (such as NMI) occurred
@@ -44,3 +49,41 @@ void edac_atomic_assert_error(void)
44 edac_err_assert++; 49 edac_err_assert++;
45} 50}
46EXPORT_SYMBOL_GPL(edac_atomic_assert_error); 51EXPORT_SYMBOL_GPL(edac_atomic_assert_error);
52
53/*
54 * sysfs object: /sys/devices/system/edac
55 * need to export to other files
56 */
57struct sysdev_class edac_class = {
58 .name = "edac",
59};
60EXPORT_SYMBOL_GPL(edac_class);
61
62/* return pointer to the 'edac' node in sysfs */
63struct sysdev_class *edac_get_sysfs_class(void)
64{
65 int err = 0;
66
67 if (atomic_read(&edac_class_valid))
68 goto out;
69
70 /* create the /sys/devices/system/edac directory */
71 err = sysdev_class_register(&edac_class);
72 if (err) {
73 printk(KERN_ERR "Error registering toplevel EDAC sysfs dir\n");
74 return NULL;
75 }
76
77out:
78 atomic_inc(&edac_class_valid);
79 return &edac_class;
80}
81EXPORT_SYMBOL_GPL(edac_get_sysfs_class);
82
83void edac_put_sysfs_class(void)
84{
85 /* last user unregisters it */
86 if (atomic_dec_and_test(&edac_class_valid))
87 sysdev_class_unregister(&edac_class);
88}
89EXPORT_SYMBOL_GPL(edac_put_sysfs_class);
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
new file mode 100644
index 000000000000..c0181093b490
--- /dev/null
+++ b/drivers/edac/mce_amd.c
@@ -0,0 +1,680 @@
1#include <linux/module.h>
2#include <linux/slab.h>
3
4#include "mce_amd.h"
5
6static struct amd_decoder_ops *fam_ops;
7
8static u8 nb_err_cpumask = 0xf;
9
10static bool report_gart_errors;
11static void (*nb_bus_decoder)(int node_id, struct mce *m, u32 nbcfg);
12
13void amd_report_gart_errors(bool v)
14{
15 report_gart_errors = v;
16}
17EXPORT_SYMBOL_GPL(amd_report_gart_errors);
18
19void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32))
20{
21 nb_bus_decoder = f;
22}
23EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
24
25void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32))
26{
27 if (nb_bus_decoder) {
28 WARN_ON(nb_bus_decoder != f);
29
30 nb_bus_decoder = NULL;
31 }
32}
33EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
34
35/*
36 * string representation for the different MCA reported error types, see F3x48
37 * or MSR0000_0411.
38 */
39
40/* transaction type */
41const char *tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
42EXPORT_SYMBOL_GPL(tt_msgs);
43
44/* cache level */
45const char *ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
46EXPORT_SYMBOL_GPL(ll_msgs);
47
48/* memory transaction type */
49const char *rrrr_msgs[] = {
50 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
51};
52EXPORT_SYMBOL_GPL(rrrr_msgs);
53
54/* participating processor */
55const char *pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
56EXPORT_SYMBOL_GPL(pp_msgs);
57
58/* request timeout */
59const char *to_msgs[] = { "no timeout", "timed out" };
60EXPORT_SYMBOL_GPL(to_msgs);
61
62/* memory or i/o */
63const char *ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
64EXPORT_SYMBOL_GPL(ii_msgs);
65
66static const char *f10h_nb_mce_desc[] = {
67 "HT link data error",
68 "Protocol error (link, L3, probe filter, etc.)",
69 "Parity error in NB-internal arrays",
70 "Link Retry due to IO link transmission error",
71 "L3 ECC data cache error",
72 "ECC error in L3 cache tag",
73 "L3 LRU parity bits error",
74 "ECC Error in the Probe Filter directory"
75};
76
77static bool f12h_dc_mce(u16 ec)
78{
79 bool ret = false;
80
81 if (MEM_ERROR(ec)) {
82 u8 ll = ec & 0x3;
83 ret = true;
84
85 if (ll == LL_L2)
86 pr_cont("during L1 linefill from L2.\n");
87 else if (ll == LL_L1)
88 pr_cont("Data/Tag %s error.\n", RRRR_MSG(ec));
89 else
90 ret = false;
91 }
92 return ret;
93}
94
95static bool f10h_dc_mce(u16 ec)
96{
97 u8 r4 = (ec >> 4) & 0xf;
98 u8 ll = ec & 0x3;
99
100 if (r4 == R4_GEN && ll == LL_L1) {
101 pr_cont("during data scrub.\n");
102 return true;
103 }
104 return f12h_dc_mce(ec);
105}
106
107static bool k8_dc_mce(u16 ec)
108{
109 if (BUS_ERROR(ec)) {
110 pr_cont("during system linefill.\n");
111 return true;
112 }
113
114 return f10h_dc_mce(ec);
115}
116
117static bool f14h_dc_mce(u16 ec)
118{
119 u8 r4 = (ec >> 4) & 0xf;
120 u8 ll = ec & 0x3;
121 u8 tt = (ec >> 2) & 0x3;
122 u8 ii = tt;
123 bool ret = true;
124
125 if (MEM_ERROR(ec)) {
126
127 if (tt != TT_DATA || ll != LL_L1)
128 return false;
129
130 switch (r4) {
131 case R4_DRD:
132 case R4_DWR:
133 pr_cont("Data/Tag parity error due to %s.\n",
134 (r4 == R4_DRD ? "load/hw prf" : "store"));
135 break;
136 case R4_EVICT:
137 pr_cont("Copyback parity error on a tag miss.\n");
138 break;
139 case R4_SNOOP:
140 pr_cont("Tag parity error during snoop.\n");
141 break;
142 default:
143 ret = false;
144 }
145 } else if (BUS_ERROR(ec)) {
146
147 if ((ii != II_MEM && ii != II_IO) || ll != LL_LG)
148 return false;
149
150 pr_cont("System read data error on a ");
151
152 switch (r4) {
153 case R4_RD:
154 pr_cont("TLB reload.\n");
155 break;
156 case R4_DWR:
157 pr_cont("store.\n");
158 break;
159 case R4_DRD:
160 pr_cont("load.\n");
161 break;
162 default:
163 ret = false;
164 }
165 } else {
166 ret = false;
167 }
168
169 return ret;
170}
171
172static void amd_decode_dc_mce(struct mce *m)
173{
174 u16 ec = m->status & 0xffff;
175 u8 xec = (m->status >> 16) & 0xf;
176
177 pr_emerg(HW_ERR "Data Cache Error: ");
178
179 /* TLB error signatures are the same across families */
180 if (TLB_ERROR(ec)) {
181 u8 tt = (ec >> 2) & 0x3;
182
183 if (tt == TT_DATA) {
184 pr_cont("%s TLB %s.\n", LL_MSG(ec),
185 (xec ? "multimatch" : "parity error"));
186 return;
187 }
188 else
189 goto wrong_dc_mce;
190 }
191
192 if (!fam_ops->dc_mce(ec))
193 goto wrong_dc_mce;
194
195 return;
196
197wrong_dc_mce:
198 pr_emerg(HW_ERR "Corrupted DC MCE info?\n");
199}
200
201static bool k8_ic_mce(u16 ec)
202{
203 u8 ll = ec & 0x3;
204 u8 r4 = (ec >> 4) & 0xf;
205 bool ret = true;
206
207 if (!MEM_ERROR(ec))
208 return false;
209
210 if (ll == 0x2)
211 pr_cont("during a linefill from L2.\n");
212 else if (ll == 0x1) {
213 switch (r4) {
214 case R4_IRD:
215 pr_cont("Parity error during data load.\n");
216 break;
217
218 case R4_EVICT:
219 pr_cont("Copyback Parity/Victim error.\n");
220 break;
221
222 case R4_SNOOP:
223 pr_cont("Tag Snoop error.\n");
224 break;
225
226 default:
227 ret = false;
228 break;
229 }
230 } else
231 ret = false;
232
233 return ret;
234}
235
236static bool f14h_ic_mce(u16 ec)
237{
238 u8 ll = ec & 0x3;
239 u8 tt = (ec >> 2) & 0x3;
240 u8 r4 = (ec >> 4) & 0xf;
241 bool ret = true;
242
243 if (MEM_ERROR(ec)) {
244 if (tt != 0 || ll != 1)
245 ret = false;
246
247 if (r4 == R4_IRD)
248 pr_cont("Data/tag array parity error for a tag hit.\n");
249 else if (r4 == R4_SNOOP)
250 pr_cont("Tag error during snoop/victimization.\n");
251 else
252 ret = false;
253 }
254 return ret;
255}
256
257static void amd_decode_ic_mce(struct mce *m)
258{
259 u16 ec = m->status & 0xffff;
260 u8 xec = (m->status >> 16) & 0xf;
261
262 pr_emerg(HW_ERR "Instruction Cache Error: ");
263
264 if (TLB_ERROR(ec))
265 pr_cont("%s TLB %s.\n", LL_MSG(ec),
266 (xec ? "multimatch" : "parity error"));
267 else if (BUS_ERROR(ec)) {
268 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
269
270 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
271 } else if (fam_ops->ic_mce(ec))
272 ;
273 else
274 pr_emerg(HW_ERR "Corrupted IC MCE info?\n");
275}
276
277static void amd_decode_bu_mce(struct mce *m)
278{
279 u32 ec = m->status & 0xffff;
280 u32 xec = (m->status >> 16) & 0xf;
281
282 pr_emerg(HW_ERR "Bus Unit Error");
283
284 if (xec == 0x1)
285 pr_cont(" in the write data buffers.\n");
286 else if (xec == 0x3)
287 pr_cont(" in the victim data buffers.\n");
288 else if (xec == 0x2 && MEM_ERROR(ec))
289 pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec));
290 else if (xec == 0x0) {
291 if (TLB_ERROR(ec))
292 pr_cont(": %s error in a Page Descriptor Cache or "
293 "Guest TLB.\n", TT_MSG(ec));
294 else if (BUS_ERROR(ec))
295 pr_cont(": %s/ECC error in data read from NB: %s.\n",
296 RRRR_MSG(ec), PP_MSG(ec));
297 else if (MEM_ERROR(ec)) {
298 u8 rrrr = (ec >> 4) & 0xf;
299
300 if (rrrr >= 0x7)
301 pr_cont(": %s error during data copyback.\n",
302 RRRR_MSG(ec));
303 else if (rrrr <= 0x1)
304 pr_cont(": %s parity/ECC error during data "
305 "access from L2.\n", RRRR_MSG(ec));
306 else
307 goto wrong_bu_mce;
308 } else
309 goto wrong_bu_mce;
310 } else
311 goto wrong_bu_mce;
312
313 return;
314
315wrong_bu_mce:
316 pr_emerg(HW_ERR "Corrupted BU MCE info?\n");
317}
318
319static void amd_decode_ls_mce(struct mce *m)
320{
321 u16 ec = m->status & 0xffff;
322 u8 xec = (m->status >> 16) & 0xf;
323
324 if (boot_cpu_data.x86 == 0x14) {
325 pr_emerg("You shouldn't be seeing an LS MCE on this cpu family,"
326 " please report on LKML.\n");
327 return;
328 }
329
330 pr_emerg(HW_ERR "Load Store Error");
331
332 if (xec == 0x0) {
333 u8 r4 = (ec >> 4) & 0xf;
334
335 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
336 goto wrong_ls_mce;
337
338 pr_cont(" during %s.\n", RRRR_MSG(ec));
339 } else
340 goto wrong_ls_mce;
341
342 return;
343
344wrong_ls_mce:
345 pr_emerg(HW_ERR "Corrupted LS MCE info?\n");
346}
347
348static bool k8_nb_mce(u16 ec, u8 xec)
349{
350 bool ret = true;
351
352 switch (xec) {
353 case 0x1:
354 pr_cont("CRC error detected on HT link.\n");
355 break;
356
357 case 0x5:
358 pr_cont("Invalid GART PTE entry during GART table walk.\n");
359 break;
360
361 case 0x6:
362 pr_cont("Unsupported atomic RMW received from an IO link.\n");
363 break;
364
365 case 0x0:
366 case 0x8:
367 if (boot_cpu_data.x86 == 0x11)
368 return false;
369
370 pr_cont("DRAM ECC error detected on the NB.\n");
371 break;
372
373 case 0xd:
374 pr_cont("Parity error on the DRAM addr/ctl signals.\n");
375 break;
376
377 default:
378 ret = false;
379 break;
380 }
381
382 return ret;
383}
384
385static bool f10h_nb_mce(u16 ec, u8 xec)
386{
387 bool ret = true;
388 u8 offset = 0;
389
390 if (k8_nb_mce(ec, xec))
391 return true;
392
393 switch(xec) {
394 case 0xa ... 0xc:
395 offset = 10;
396 break;
397
398 case 0xe:
399 offset = 11;
400 break;
401
402 case 0xf:
403 if (TLB_ERROR(ec))
404 pr_cont("GART Table Walk data error.\n");
405 else if (BUS_ERROR(ec))
406 pr_cont("DMA Exclusion Vector Table Walk error.\n");
407 else
408 ret = false;
409
410 goto out;
411 break;
412
413 case 0x1c ... 0x1f:
414 offset = 24;
415 break;
416
417 default:
418 ret = false;
419
420 goto out;
421 break;
422 }
423
424 pr_cont("%s.\n", f10h_nb_mce_desc[xec - offset]);
425
426out:
427 return ret;
428}
429
430static bool nb_noop_mce(u16 ec, u8 xec)
431{
432 return false;
433}
434
435void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg)
436{
437 u8 xec = (m->status >> 16) & 0x1f;
438 u16 ec = m->status & 0xffff;
439 u32 nbsh = (u32)(m->status >> 32);
440
441 pr_emerg(HW_ERR "Northbridge Error, node %d: ", node_id);
442
443 /*
444 * F10h, revD can disable ErrCpu[3:0] so check that first and also the
445 * value encoding has changed so interpret those differently
446 */
447 if ((boot_cpu_data.x86 == 0x10) &&
448 (boot_cpu_data.x86_model > 7)) {
449 if (nbsh & K8_NBSH_ERR_CPU_VAL)
450 pr_cont(", core: %u", (u8)(nbsh & nb_err_cpumask));
451 } else {
452 u8 assoc_cpus = nbsh & nb_err_cpumask;
453
454 if (assoc_cpus > 0)
455 pr_cont(", core: %d", fls(assoc_cpus) - 1);
456 }
457
458 switch (xec) {
459 case 0x2:
460 pr_cont("Sync error (sync packets on HT link detected).\n");
461 return;
462
463 case 0x3:
464 pr_cont("HT Master abort.\n");
465 return;
466
467 case 0x4:
468 pr_cont("HT Target abort.\n");
469 return;
470
471 case 0x7:
472 pr_cont("NB Watchdog timeout.\n");
473 return;
474
475 case 0x9:
476 pr_cont("SVM DMA Exclusion Vector error.\n");
477 return;
478
479 default:
480 break;
481 }
482
483 if (!fam_ops->nb_mce(ec, xec))
484 goto wrong_nb_mce;
485
486 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10)
487 if ((xec == 0x8 || xec == 0x0) && nb_bus_decoder)
488 nb_bus_decoder(node_id, m, nbcfg);
489
490 return;
491
492wrong_nb_mce:
493 pr_emerg(HW_ERR "Corrupted NB MCE info?\n");
494}
495EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
496
497static void amd_decode_fr_mce(struct mce *m)
498{
499 if (boot_cpu_data.x86 == 0xf ||
500 boot_cpu_data.x86 == 0x11)
501 goto wrong_fr_mce;
502
503 /* we have only one error signature so match all fields at once. */
504 if ((m->status & 0xffff) == 0x0f0f) {
505 pr_emerg(HW_ERR "FR Error: CPU Watchdog timer expire.\n");
506 return;
507 }
508
509wrong_fr_mce:
510 pr_emerg(HW_ERR "Corrupted FR MCE info?\n");
511}
512
513static inline void amd_decode_err_code(u16 ec)
514{
515 if (TLB_ERROR(ec)) {
516 pr_emerg(HW_ERR "Transaction: %s, Cache Level: %s\n",
517 TT_MSG(ec), LL_MSG(ec));
518 } else if (MEM_ERROR(ec)) {
519 pr_emerg(HW_ERR "Transaction: %s, Type: %s, Cache Level: %s\n",
520 RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec));
521 } else if (BUS_ERROR(ec)) {
522 pr_emerg(HW_ERR "Transaction: %s (%s), %s, Cache Level: %s, "
523 "Participating Processor: %s\n",
524 RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec),
525 PP_MSG(ec));
526 } else
527 pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec);
528}
529
530/*
531 * Filter out unwanted MCE signatures here.
532 */
533static bool amd_filter_mce(struct mce *m)
534{
535 u8 xec = (m->status >> 16) & 0x1f;
536
537 /*
538 * NB GART TLB error reporting is disabled by default.
539 */
540 if (m->bank == 4 && xec == 0x5 && !report_gart_errors)
541 return true;
542
543 return false;
544}
545
546int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
547{
548 struct mce *m = (struct mce *)data;
549 int node, ecc;
550
551 if (amd_filter_mce(m))
552 return NOTIFY_STOP;
553
554 pr_emerg(HW_ERR "MC%d_STATUS: ", m->bank);
555
556 pr_cont("%sorrected error, other errors lost: %s, "
557 "CPU context corrupt: %s",
558 ((m->status & MCI_STATUS_UC) ? "Unc" : "C"),
559 ((m->status & MCI_STATUS_OVER) ? "yes" : "no"),
560 ((m->status & MCI_STATUS_PCC) ? "yes" : "no"));
561
562 /* do the two bits[14:13] together */
563 ecc = (m->status >> 45) & 0x3;
564 if (ecc)
565 pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U"));
566
567 pr_cont("\n");
568
569 switch (m->bank) {
570 case 0:
571 amd_decode_dc_mce(m);
572 break;
573
574 case 1:
575 amd_decode_ic_mce(m);
576 break;
577
578 case 2:
579 amd_decode_bu_mce(m);
580 break;
581
582 case 3:
583 amd_decode_ls_mce(m);
584 break;
585
586 case 4:
587 node = amd_get_nb_id(m->extcpu);
588 amd_decode_nb_mce(node, m, 0);
589 break;
590
591 case 5:
592 amd_decode_fr_mce(m);
593 break;
594
595 default:
596 break;
597 }
598
599 amd_decode_err_code(m->status & 0xffff);
600
601 return NOTIFY_STOP;
602}
603EXPORT_SYMBOL_GPL(amd_decode_mce);
604
605static struct notifier_block amd_mce_dec_nb = {
606 .notifier_call = amd_decode_mce,
607};
608
609static int __init mce_amd_init(void)
610{
611 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
612 return 0;
613
614 if ((boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x12) &&
615 (boot_cpu_data.x86 != 0x14 || boot_cpu_data.x86_model > 0xf))
616 return 0;
617
618 fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
619 if (!fam_ops)
620 return -ENOMEM;
621
622 switch (boot_cpu_data.x86) {
623 case 0xf:
624 fam_ops->dc_mce = k8_dc_mce;
625 fam_ops->ic_mce = k8_ic_mce;
626 fam_ops->nb_mce = k8_nb_mce;
627 break;
628
629 case 0x10:
630 fam_ops->dc_mce = f10h_dc_mce;
631 fam_ops->ic_mce = k8_ic_mce;
632 fam_ops->nb_mce = f10h_nb_mce;
633 break;
634
635 case 0x11:
636 fam_ops->dc_mce = k8_dc_mce;
637 fam_ops->ic_mce = k8_ic_mce;
638 fam_ops->nb_mce = f10h_nb_mce;
639 break;
640
641 case 0x12:
642 fam_ops->dc_mce = f12h_dc_mce;
643 fam_ops->ic_mce = k8_ic_mce;
644 fam_ops->nb_mce = nb_noop_mce;
645 break;
646
647 case 0x14:
648 nb_err_cpumask = 0x3;
649 fam_ops->dc_mce = f14h_dc_mce;
650 fam_ops->ic_mce = f14h_ic_mce;
651 fam_ops->nb_mce = nb_noop_mce;
652 break;
653
654 default:
655 printk(KERN_WARNING "Huh? What family is that: %d?!\n",
656 boot_cpu_data.x86);
657 kfree(fam_ops);
658 return -EINVAL;
659 }
660
661 pr_info("MCE: In-kernel MCE decoding enabled.\n");
662
663 atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
664
665 return 0;
666}
667early_initcall(mce_amd_init);
668
669#ifdef MODULE
670static void __exit mce_amd_exit(void)
671{
672 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
673 kfree(fam_ops);
674}
675
676MODULE_DESCRIPTION("AMD MCE decoder");
677MODULE_ALIAS("edac-mce-amd");
678MODULE_LICENSE("GPL");
679module_exit(mce_amd_exit);
680#endif
diff --git a/drivers/edac/edac_mce_amd.h b/drivers/edac/mce_amd.h
index df23ee065f79..35f6e0e3b297 100644
--- a/drivers/edac/edac_mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -1,11 +1,14 @@
1#ifndef _EDAC_MCE_AMD_H 1#ifndef _EDAC_MCE_AMD_H
2#define _EDAC_MCE_AMD_H 2#define _EDAC_MCE_AMD_H
3 3
4#include <linux/notifier.h>
5
4#include <asm/mce.h> 6#include <asm/mce.h>
5 7
8#define BIT_64(n) (U64_C(1) << (n))
9
6#define ERROR_CODE(x) ((x) & 0xffff) 10#define ERROR_CODE(x) ((x) & 0xffff)
7#define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f) 11#define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f)
8#define EXT_ERR_MSG(x) ext_msgs[EXT_ERROR_CODE(x)]
9 12
10#define LOW_SYNDROME(x) (((x) >> 15) & 0xff) 13#define LOW_SYNDROME(x) (((x) >> 15) & 0xff)
11#define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) 14#define HIGH_SYNDROME(x) (((x) >> 24) & 0xff)
@@ -20,13 +23,14 @@
20#define II_MSG(x) ii_msgs[II(x)] 23#define II_MSG(x) ii_msgs[II(x)]
21#define LL(x) (((x) >> 0) & 0x3) 24#define LL(x) (((x) >> 0) & 0x3)
22#define LL_MSG(x) ll_msgs[LL(x)] 25#define LL_MSG(x) ll_msgs[LL(x)]
23#define RRRR(x) (((x) >> 4) & 0xf)
24#define RRRR_MSG(x) rrrr_msgs[RRRR(x)]
25#define TO(x) (((x) >> 8) & 0x1) 26#define TO(x) (((x) >> 8) & 0x1)
26#define TO_MSG(x) to_msgs[TO(x)] 27#define TO_MSG(x) to_msgs[TO(x)]
27#define PP(x) (((x) >> 9) & 0x3) 28#define PP(x) (((x) >> 9) & 0x3)
28#define PP_MSG(x) pp_msgs[PP(x)] 29#define PP_MSG(x) pp_msgs[PP(x)]
29 30
31#define RRRR(x) (((x) >> 4) & 0xf)
32#define RRRR_MSG(x) ((RRRR(x) < 9) ? rrrr_msgs[RRRR(x)] : "Wrong R4!")
33
30#define K8_NBSH 0x4C 34#define K8_NBSH 0x4C
31 35
32#define K8_NBSH_VALID_BIT BIT(31) 36#define K8_NBSH_VALID_BIT BIT(31)
@@ -41,13 +45,45 @@
41#define K8_NBSH_UECC BIT(13) 45#define K8_NBSH_UECC BIT(13)
42#define K8_NBSH_ERR_SCRUBER BIT(8) 46#define K8_NBSH_ERR_SCRUBER BIT(8)
43 47
48enum tt_ids {
49 TT_INSTR = 0,
50 TT_DATA,
51 TT_GEN,
52 TT_RESV,
53};
54
55enum ll_ids {
56 LL_RESV = 0,
57 LL_L1,
58 LL_L2,
59 LL_LG,
60};
61
62enum ii_ids {
63 II_MEM = 0,
64 II_RESV,
65 II_IO,
66 II_GEN,
67};
68
69enum rrrr_ids {
70 R4_GEN = 0,
71 R4_RD,
72 R4_WR,
73 R4_DRD,
74 R4_DWR,
75 R4_IRD,
76 R4_PREF,
77 R4_EVICT,
78 R4_SNOOP,
79};
80
44extern const char *tt_msgs[]; 81extern const char *tt_msgs[];
45extern const char *ll_msgs[]; 82extern const char *ll_msgs[];
46extern const char *rrrr_msgs[]; 83extern const char *rrrr_msgs[];
47extern const char *pp_msgs[]; 84extern const char *pp_msgs[];
48extern const char *to_msgs[]; 85extern const char *to_msgs[];
49extern const char *ii_msgs[]; 86extern const char *ii_msgs[];
50extern const char *ext_msgs[];
51 87
52/* 88/*
53 * relevant NB regs 89 * relevant NB regs
@@ -60,10 +96,19 @@ struct err_regs {
60 u32 nbeal; 96 u32 nbeal;
61}; 97};
62 98
99/*
100 * per-family decoder ops
101 */
102struct amd_decoder_ops {
103 bool (*dc_mce)(u16);
104 bool (*ic_mce)(u16);
105 bool (*nb_mce)(u16, u8);
106};
63 107
64void amd_report_gart_errors(bool); 108void amd_report_gart_errors(bool);
65void amd_register_ecc_decoder(void (*f)(int, struct err_regs *)); 109void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32));
66void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *)); 110void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32));
67void amd_decode_nb_mce(int, struct err_regs *, int); 111void amd_decode_nb_mce(int, struct mce *, u32);
112int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data);
68 113
69#endif /* _EDAC_MCE_AMD_H */ 114#endif /* _EDAC_MCE_AMD_H */
diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
new file mode 100644
index 000000000000..8d0688f36d4c
--- /dev/null
+++ b/drivers/edac/mce_amd_inj.c
@@ -0,0 +1,171 @@
1/*
2 * A simple MCE injection facility for testing the MCE decoding code. This
3 * driver should be built as module so that it can be loaded on production
4 * kernels for testing purposes.
5 *
6 * This file may be distributed under the terms of the GNU General Public
7 * License version 2.
8 *
9 * Copyright (c) 2010: Borislav Petkov <borislav.petkov@amd.com>
10 * Advanced Micro Devices Inc.
11 */
12
13#include <linux/kobject.h>
14#include <linux/sysdev.h>
15#include <linux/edac.h>
16#include <asm/mce.h>
17
18#include "mce_amd.h"
19
20struct edac_mce_attr {
21 struct attribute attr;
22 ssize_t (*show) (struct kobject *kobj, struct edac_mce_attr *attr, char *buf);
23 ssize_t (*store)(struct kobject *kobj, struct edac_mce_attr *attr,
24 const char *buf, size_t count);
25};
26
27#define EDAC_MCE_ATTR(_name, _mode, _show, _store) \
28static struct edac_mce_attr mce_attr_##_name = __ATTR(_name, _mode, _show, _store)
29
30static struct kobject *mce_kobj;
31
32/*
33 * Collect all the MCi_XXX settings
34 */
35static struct mce i_mce;
36
37#define MCE_INJECT_STORE(reg) \
38static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \
39 struct edac_mce_attr *attr, \
40 const char *data, size_t count)\
41{ \
42 int ret = 0; \
43 unsigned long value; \
44 \
45 ret = strict_strtoul(data, 16, &value); \
46 if (ret < 0) \
47 printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \
48 \
49 i_mce.reg = value; \
50 \
51 return count; \
52}
53
54MCE_INJECT_STORE(status);
55MCE_INJECT_STORE(misc);
56MCE_INJECT_STORE(addr);
57
58#define MCE_INJECT_SHOW(reg) \
59static ssize_t edac_inject_##reg##_show(struct kobject *kobj, \
60 struct edac_mce_attr *attr, \
61 char *buf) \
62{ \
63 return sprintf(buf, "0x%016llx\n", i_mce.reg); \
64}
65
66MCE_INJECT_SHOW(status);
67MCE_INJECT_SHOW(misc);
68MCE_INJECT_SHOW(addr);
69
70EDAC_MCE_ATTR(status, 0644, edac_inject_status_show, edac_inject_status_store);
71EDAC_MCE_ATTR(misc, 0644, edac_inject_misc_show, edac_inject_misc_store);
72EDAC_MCE_ATTR(addr, 0644, edac_inject_addr_show, edac_inject_addr_store);
73
74/*
75 * This denotes into which bank we're injecting and triggers
76 * the injection, at the same time.
77 */
78static ssize_t edac_inject_bank_store(struct kobject *kobj,
79 struct edac_mce_attr *attr,
80 const char *data, size_t count)
81{
82 int ret = 0;
83 unsigned long value;
84
85 ret = strict_strtoul(data, 10, &value);
86 if (ret < 0) {
87 printk(KERN_ERR "Invalid bank value!\n");
88 return -EINVAL;
89 }
90
91 if (value > 5) {
92 printk(KERN_ERR "Non-existant MCE bank: %lu\n", value);
93 return -EINVAL;
94 }
95
96 i_mce.bank = value;
97
98 amd_decode_mce(NULL, 0, &i_mce);
99
100 return count;
101}
102
103static ssize_t edac_inject_bank_show(struct kobject *kobj,
104 struct edac_mce_attr *attr, char *buf)
105{
106 return sprintf(buf, "%d\n", i_mce.bank);
107}
108
109EDAC_MCE_ATTR(bank, 0644, edac_inject_bank_show, edac_inject_bank_store);
110
111static struct edac_mce_attr *sysfs_attrs[] = { &mce_attr_status, &mce_attr_misc,
112 &mce_attr_addr, &mce_attr_bank
113};
114
115static int __init edac_init_mce_inject(void)
116{
117 struct sysdev_class *edac_class = NULL;
118 int i, err = 0;
119
120 edac_class = edac_get_sysfs_class();
121 if (!edac_class)
122 return -EINVAL;
123
124 mce_kobj = kobject_create_and_add("mce", &edac_class->kset.kobj);
125 if (!mce_kobj) {
126 printk(KERN_ERR "Error creating a mce kset.\n");
127 err = -ENOMEM;
128 goto err_mce_kobj;
129 }
130
131 for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) {
132 err = sysfs_create_file(mce_kobj, &sysfs_attrs[i]->attr);
133 if (err) {
134 printk(KERN_ERR "Error creating %s in sysfs.\n",
135 sysfs_attrs[i]->attr.name);
136 goto err_sysfs_create;
137 }
138 }
139 return 0;
140
141err_sysfs_create:
142 while (i-- >= 0)
143 sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
144
145 kobject_del(mce_kobj);
146
147err_mce_kobj:
148 edac_put_sysfs_class();
149
150 return err;
151}
152
153static void __exit edac_exit_mce_inject(void)
154{
155 int i;
156
157 for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++)
158 sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
159
160 kobject_del(mce_kobj);
161
162 edac_put_sysfs_class();
163}
164
165module_init(edac_init_mce_inject);
166module_exit(edac_exit_mce_inject);
167
168MODULE_LICENSE("GPL");
169MODULE_AUTHOR("Borislav Petkov <borislav.petkov@amd.com>");
170MODULE_AUTHOR("AMD Inc.");
171MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding");