aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
authorBorislav Petkov <borislav.petkov@amd.com>2010-09-02 12:33:24 -0400
committerBorislav Petkov <bp@amd64.org>2010-10-21 08:47:59 -0400
commit9cdeb404a1870c5022915e576dbdc3cde21af5bf (patch)
tree19843bf914f56f362a7c9cdf86c27a648caa475c /drivers/edac
parent30e1f7a8122145f44f45c95366e27b6bb0b08428 (diff)
EDAC, MCE: Rework MCE injection
Add sysfs injection facilities for testing of the MCE decoding code. Remove large parts of amd64_edac_dbg.c, as a result, which did only NB MCE injection anyway and the new injection code supports that functionality already. Add an injection module so that MCE decoding code in production kernels like those in RHEL and SLES can be tested. Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig14
-rw-r--r--drivers/edac/Makefile2
-rw-r--r--drivers/edac/amd64_edac.h2
-rw-r--r--drivers/edac/amd64_edac_dbg.c213
-rw-r--r--drivers/edac/edac_mce_amd.c4
-rw-r--r--drivers/edac/edac_mce_amd.h4
-rw-r--r--drivers/edac/mce_amd_inj.c171
7 files changed, 203 insertions, 207 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 70bb350de996..3bb3a671baa0 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -39,7 +39,7 @@ config EDAC_DEBUG
39 there're four debug levels (x=0,1,2,3 from low to high). 39 there're four debug levels (x=0,1,2,3 from low to high).
40 Usually you should select 'N'. 40 Usually you should select 'N'.
41 41
42 config EDAC_DECODE_MCE 42config EDAC_DECODE_MCE
43 tristate "Decode MCEs in human-readable form (only on AMD for now)" 43 tristate "Decode MCEs in human-readable form (only on AMD for now)"
44 depends on CPU_SUP_AMD && X86_MCE 44 depends on CPU_SUP_AMD && X86_MCE
45 default y 45 default y
@@ -51,6 +51,16 @@ config EDAC_DEBUG
51 which occur really early upon boot, before the module infrastructure 51 which occur really early upon boot, before the module infrastructure
52 has been initialized. 52 has been initialized.
53 53
54config EDAC_MCE_INJ
55 tristate "Simple MCE injection interface over /sysfs"
56 depends on EDAC_DECODE_MCE
57 default n
58 help
59 This is a simple interface to inject MCEs over /sysfs and test
60 the MCE decoding code in EDAC.
61
62 This is currently AMD-only.
63
54config EDAC_MM_EDAC 64config EDAC_MM_EDAC
55 tristate "Main Memory EDAC (Error Detection And Correction) reporting" 65 tristate "Main Memory EDAC (Error Detection And Correction) reporting"
56 help 66 help
@@ -72,7 +82,7 @@ config EDAC_AMD64
72 Families of Memory Controllers (K8, F10h and F11h) 82 Families of Memory Controllers (K8, F10h and F11h)
73 83
74config EDAC_AMD64_ERROR_INJECTION 84config EDAC_AMD64_ERROR_INJECTION
75 bool "Sysfs Error Injection facilities" 85 bool "Sysfs HW Error injection facilities"
76 depends on EDAC_AMD64 86 depends on EDAC_AMD64
77 help 87 help
78 Recent Opterons (Family 10h and later) provide for Memory Error 88 Recent Opterons (Family 10h and later) provide for Memory Error
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index ca6b1bb24ccc..5c38ad38f3a3 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -17,6 +17,8 @@ ifdef CONFIG_PCI
17edac_core-objs += edac_pci.o edac_pci_sysfs.o 17edac_core-objs += edac_pci.o edac_pci_sysfs.o
18endif 18endif
19 19
20obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o
21
20obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o 22obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o
21 23
22obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o 24obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 613b9381e71a..67d9ceb4b839 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -486,7 +486,7 @@ extern const char *ext_msgs[32];
486extern const char *htlink_msgs[8]; 486extern const char *htlink_msgs[8];
487 487
488#ifdef CONFIG_EDAC_DEBUG 488#ifdef CONFIG_EDAC_DEBUG
489#define NUM_DBG_ATTRS 9 489#define NUM_DBG_ATTRS 5
490#else 490#else
491#define NUM_DBG_ATTRS 0 491#define NUM_DBG_ATTRS 0
492#endif 492#endif
diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c
index f6d5695de5b6..e3562288f4ce 100644
--- a/drivers/edac/amd64_edac_dbg.c
+++ b/drivers/edac/amd64_edac_dbg.c
@@ -1,173 +1,16 @@
1#include "amd64_edac.h" 1#include "amd64_edac.h"
2 2
3/* 3#define EDAC_DCT_ATTR_SHOW(reg) \
4 * accept a hex value and store it into the virtual error register file, field: 4static ssize_t amd64_##reg##_show(struct mem_ctl_info *mci, char *data) \
5 * nbeal and nbeah. Assume virtual error values have already been set for: NBSL, 5{ \
6 * NBSH and NBCFG. Then proceed to map the error values to a MC, CSROW and 6 struct amd64_pvt *pvt = mci->pvt_info; \
7 * CHANNEL 7 return sprintf(data, "0x%016llx\n", (u64)pvt->reg); \
8 */
9static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data,
10 size_t count)
11{
12 struct amd64_pvt *pvt = mci->pvt_info;
13 u64 value;
14 int ret = 0;
15 struct mce m;
16
17 ret = strict_strtoull(data, 16, &value);
18 if (ret != -EINVAL) {
19 struct err_regs *regs = &pvt->ctl_error_info;
20
21 debugf0("received NBEA= 0x%llx\n", value);
22
23 /* place the value into the virtual error packet */
24 pvt->ctl_error_info.nbeal = (u32) value;
25 value >>= 32;
26 pvt->ctl_error_info.nbeah = (u32) value;
27
28 m.addr = value;
29 m.status = regs->nbsl | ((u64)regs->nbsh << 32);
30
31 /* Process the Mapping request */
32 /* TODO: Add race prevention */
33 amd_decode_nb_mce(pvt->mc_node_id, &m, regs->nbcfg);
34
35 return count;
36 }
37 return ret;
38} 8}
39 9
40/* display back what the last NBEA (MCA NB Address (MC4_ADDR)) was written */ 10EDAC_DCT_ATTR_SHOW(dhar);
41static ssize_t amd64_nbea_show(struct mem_ctl_info *mci, char *data) 11EDAC_DCT_ATTR_SHOW(dbam0);
42{ 12EDAC_DCT_ATTR_SHOW(top_mem);
43 struct amd64_pvt *pvt = mci->pvt_info; 13EDAC_DCT_ATTR_SHOW(top_mem2);
44 u64 value;
45
46 value = pvt->ctl_error_info.nbeah;
47 value <<= 32;
48 value |= pvt->ctl_error_info.nbeal;
49
50 return sprintf(data, "%llx\n", value);
51}
52
53/* store the NBSL (MCA NB Status Low (MC4_STATUS)) value user desires */
54static ssize_t amd64_nbsl_store(struct mem_ctl_info *mci, const char *data,
55 size_t count)
56{
57 struct amd64_pvt *pvt = mci->pvt_info;
58 unsigned long value;
59 int ret = 0;
60
61 ret = strict_strtoul(data, 16, &value);
62 if (ret != -EINVAL) {
63 debugf0("received NBSL= 0x%lx\n", value);
64
65 pvt->ctl_error_info.nbsl = (u32) value;
66
67 return count;
68 }
69 return ret;
70}
71
72/* display back what the last NBSL value written */
73static ssize_t amd64_nbsl_show(struct mem_ctl_info *mci, char *data)
74{
75 struct amd64_pvt *pvt = mci->pvt_info;
76 u32 value;
77
78 value = pvt->ctl_error_info.nbsl;
79
80 return sprintf(data, "%x\n", value);
81}
82
83/* store the NBSH (MCA NB Status High) value user desires */
84static ssize_t amd64_nbsh_store(struct mem_ctl_info *mci, const char *data,
85 size_t count)
86{
87 struct amd64_pvt *pvt = mci->pvt_info;
88 unsigned long value;
89 int ret = 0;
90
91 ret = strict_strtoul(data, 16, &value);
92 if (ret != -EINVAL) {
93 debugf0("received NBSH= 0x%lx\n", value);
94
95 pvt->ctl_error_info.nbsh = (u32) value;
96
97 return count;
98 }
99 return ret;
100}
101
102/* display back what the last NBSH value written */
103static ssize_t amd64_nbsh_show(struct mem_ctl_info *mci, char *data)
104{
105 struct amd64_pvt *pvt = mci->pvt_info;
106 u32 value;
107
108 value = pvt->ctl_error_info.nbsh;
109
110 return sprintf(data, "%x\n", value);
111}
112
113/* accept and store the NBCFG (MCA NB Configuration) value user desires */
114static ssize_t amd64_nbcfg_store(struct mem_ctl_info *mci,
115 const char *data, size_t count)
116{
117 struct amd64_pvt *pvt = mci->pvt_info;
118 unsigned long value;
119 int ret = 0;
120
121 ret = strict_strtoul(data, 16, &value);
122 if (ret != -EINVAL) {
123 debugf0("received NBCFG= 0x%lx\n", value);
124
125 pvt->ctl_error_info.nbcfg = (u32) value;
126
127 return count;
128 }
129 return ret;
130}
131
132/* various show routines for the controls of a MCI */
133static ssize_t amd64_nbcfg_show(struct mem_ctl_info *mci, char *data)
134{
135 struct amd64_pvt *pvt = mci->pvt_info;
136
137 return sprintf(data, "%x\n", pvt->ctl_error_info.nbcfg);
138}
139
140
141static ssize_t amd64_dhar_show(struct mem_ctl_info *mci, char *data)
142{
143 struct amd64_pvt *pvt = mci->pvt_info;
144
145 return sprintf(data, "%x\n", pvt->dhar);
146}
147
148
149static ssize_t amd64_dbam_show(struct mem_ctl_info *mci, char *data)
150{
151 struct amd64_pvt *pvt = mci->pvt_info;
152
153 return sprintf(data, "%x\n", pvt->dbam0);
154}
155
156
157static ssize_t amd64_topmem_show(struct mem_ctl_info *mci, char *data)
158{
159 struct amd64_pvt *pvt = mci->pvt_info;
160
161 return sprintf(data, "%llx\n", pvt->top_mem);
162}
163
164
165static ssize_t amd64_topmem2_show(struct mem_ctl_info *mci, char *data)
166{
167 struct amd64_pvt *pvt = mci->pvt_info;
168
169 return sprintf(data, "%llx\n", pvt->top_mem2);
170}
171 14
172static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data) 15static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data)
173{ 16{
@@ -188,38 +31,6 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
188 31
189 { 32 {
190 .attr = { 33 .attr = {
191 .name = "nbea_ctl",
192 .mode = (S_IRUGO | S_IWUSR)
193 },
194 .show = amd64_nbea_show,
195 .store = amd64_nbea_store,
196 },
197 {
198 .attr = {
199 .name = "nbsl_ctl",
200 .mode = (S_IRUGO | S_IWUSR)
201 },
202 .show = amd64_nbsl_show,
203 .store = amd64_nbsl_store,
204 },
205 {
206 .attr = {
207 .name = "nbsh_ctl",
208 .mode = (S_IRUGO | S_IWUSR)
209 },
210 .show = amd64_nbsh_show,
211 .store = amd64_nbsh_store,
212 },
213 {
214 .attr = {
215 .name = "nbcfg_ctl",
216 .mode = (S_IRUGO | S_IWUSR)
217 },
218 .show = amd64_nbcfg_show,
219 .store = amd64_nbcfg_store,
220 },
221 {
222 .attr = {
223 .name = "dhar", 34 .name = "dhar",
224 .mode = (S_IRUGO) 35 .mode = (S_IRUGO)
225 }, 36 },
@@ -231,7 +42,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
231 .name = "dbam", 42 .name = "dbam",
232 .mode = (S_IRUGO) 43 .mode = (S_IRUGO)
233 }, 44 },
234 .show = amd64_dbam_show, 45 .show = amd64_dbam0_show,
235 .store = NULL, 46 .store = NULL,
236 }, 47 },
237 { 48 {
@@ -239,7 +50,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
239 .name = "topmem", 50 .name = "topmem",
240 .mode = (S_IRUGO) 51 .mode = (S_IRUGO)
241 }, 52 },
242 .show = amd64_topmem_show, 53 .show = amd64_top_mem_show,
243 .store = NULL, 54 .store = NULL,
244 }, 55 },
245 { 56 {
@@ -247,7 +58,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
247 .name = "topmem2", 58 .name = "topmem2",
248 .mode = (S_IRUGO) 59 .mode = (S_IRUGO)
249 }, 60 },
250 .show = amd64_topmem2_show, 61 .show = amd64_top_mem2_show,
251 .store = NULL, 62 .store = NULL,
252 }, 63 },
253 { 64 {
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c
index 6cfa881888bc..c75c47b0f3ea 100644
--- a/drivers/edac/edac_mce_amd.c
+++ b/drivers/edac/edac_mce_amd.c
@@ -324,8 +324,7 @@ static inline void amd_decode_err_code(u16 ec)
324 pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec); 324 pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec);
325} 325}
326 326
327static int amd_decode_mce(struct notifier_block *nb, unsigned long val, 327int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
328 void *data)
329{ 328{
330 struct mce *m = (struct mce *)data; 329 struct mce *m = (struct mce *)data;
331 int node, ecc; 330 int node, ecc;
@@ -379,6 +378,7 @@ static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
379 378
380 return NOTIFY_STOP; 379 return NOTIFY_STOP;
381} 380}
381EXPORT_SYMBOL_GPL(amd_decode_mce);
382 382
383static struct notifier_block amd_mce_dec_nb = { 383static struct notifier_block amd_mce_dec_nb = {
384 .notifier_call = amd_decode_mce, 384 .notifier_call = amd_decode_mce,
diff --git a/drivers/edac/edac_mce_amd.h b/drivers/edac/edac_mce_amd.h
index 0fba0e76c25f..2712a906afdf 100644
--- a/drivers/edac/edac_mce_amd.h
+++ b/drivers/edac/edac_mce_amd.h
@@ -1,6 +1,8 @@
1#ifndef _EDAC_MCE_AMD_H 1#ifndef _EDAC_MCE_AMD_H
2#define _EDAC_MCE_AMD_H 2#define _EDAC_MCE_AMD_H
3 3
4#include <linux/notifier.h>
5
4#include <asm/mce.h> 6#include <asm/mce.h>
5 7
6#define ERROR_CODE(x) ((x) & 0xffff) 8#define ERROR_CODE(x) ((x) & 0xffff)
@@ -61,10 +63,10 @@ struct err_regs {
61 u32 nbeal; 63 u32 nbeal;
62}; 64};
63 65
64
65void amd_report_gart_errors(bool); 66void amd_report_gart_errors(bool);
66void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32)); 67void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32));
67void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32)); 68void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32));
68void amd_decode_nb_mce(int, struct mce *, u32); 69void amd_decode_nb_mce(int, struct mce *, u32);
70int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data);
69 71
70#endif /* _EDAC_MCE_AMD_H */ 72#endif /* _EDAC_MCE_AMD_H */
diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
new file mode 100644
index 000000000000..0e4f2dcf3bd6
--- /dev/null
+++ b/drivers/edac/mce_amd_inj.c
@@ -0,0 +1,171 @@
1/*
2 * A simple MCE injection facility for testing the MCE decoding code. This
3 * driver should be built as module so that it can be loaded on production
4 * kernels for testing purposes.
5 *
6 * This file may be distributed under the terms of the GNU General Public
7 * License version 2.
8 *
9 * Copyright (c) 2010: Borislav Petkov <borislav.petkov@amd.com>
10 * Advanced Micro Devices Inc.
11 */
12
13#include <linux/kobject.h>
14#include <linux/sysdev.h>
15#include <linux/edac.h>
16#include <asm/mce.h>
17
18#include "edac_mce_amd.h"
19
20struct edac_mce_attr {
21 struct attribute attr;
22 ssize_t (*show) (struct kobject *kobj, struct edac_mce_attr *attr, char *buf);
23 ssize_t (*store)(struct kobject *kobj, struct edac_mce_attr *attr,
24 const char *buf, size_t count);
25};
26
27#define EDAC_MCE_ATTR(_name, _mode, _show, _store) \
28static struct edac_mce_attr mce_attr_##_name = __ATTR(_name, _mode, _show, _store)
29
30static struct kobject *mce_kobj;
31
32/*
33 * Collect all the MCi_XXX settings
34 */
35static struct mce i_mce;
36
37#define MCE_INJECT_STORE(reg) \
38static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \
39 struct edac_mce_attr *attr, \
40 const char *data, size_t count)\
41{ \
42 int ret = 0; \
43 unsigned long value; \
44 \
45 ret = strict_strtoul(data, 16, &value); \
46 if (ret < 0) \
47 printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \
48 \
49 i_mce.reg = value; \
50 \
51 return count; \
52}
53
54MCE_INJECT_STORE(status);
55MCE_INJECT_STORE(misc);
56MCE_INJECT_STORE(addr);
57
58#define MCE_INJECT_SHOW(reg) \
59static ssize_t edac_inject_##reg##_show(struct kobject *kobj, \
60 struct edac_mce_attr *attr, \
61 char *buf) \
62{ \
63 return sprintf(buf, "0x%016llx\n", i_mce.reg); \
64}
65
66MCE_INJECT_SHOW(status);
67MCE_INJECT_SHOW(misc);
68MCE_INJECT_SHOW(addr);
69
70EDAC_MCE_ATTR(status, 0644, edac_inject_status_show, edac_inject_status_store);
71EDAC_MCE_ATTR(misc, 0644, edac_inject_misc_show, edac_inject_misc_store);
72EDAC_MCE_ATTR(addr, 0644, edac_inject_addr_show, edac_inject_addr_store);
73
74/*
75 * This denotes into which bank we're injecting and triggers
76 * the injection, at the same time.
77 */
78static ssize_t edac_inject_bank_store(struct kobject *kobj,
79 struct edac_mce_attr *attr,
80 const char *data, size_t count)
81{
82 int ret = 0;
83 unsigned long value;
84
85 ret = strict_strtoul(data, 10, &value);
86 if (ret < 0) {
87 printk(KERN_ERR "Invalid bank value!\n");
88 return -EINVAL;
89 }
90
91 if (value > 5) {
92 printk(KERN_ERR "Non-existant MCE bank: %lu\n", value);
93 return -EINVAL;
94 }
95
96 i_mce.bank = value;
97
98 amd_decode_mce(NULL, 0, &i_mce);
99
100 return count;
101}
102
103static ssize_t edac_inject_bank_show(struct kobject *kobj,
104 struct edac_mce_attr *attr, char *buf)
105{
106 return sprintf(buf, "%d\n", i_mce.bank);
107}
108
109EDAC_MCE_ATTR(bank, 0644, edac_inject_bank_show, edac_inject_bank_store);
110
111static struct edac_mce_attr *sysfs_attrs[] = { &mce_attr_status, &mce_attr_misc,
112 &mce_attr_addr, &mce_attr_bank
113};
114
115static int __init edac_init_mce_inject(void)
116{
117 struct sysdev_class *edac_class = NULL;
118 int i, err = 0;
119
120 edac_class = edac_get_sysfs_class();
121 if (!edac_class)
122 return -EINVAL;
123
124 mce_kobj = kobject_create_and_add("mce", &edac_class->kset.kobj);
125 if (!mce_kobj) {
126 printk(KERN_ERR "Error creating a mce kset.\n");
127 err = -ENOMEM;
128 goto err_mce_kobj;
129 }
130
131 for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) {
132 err = sysfs_create_file(mce_kobj, &sysfs_attrs[i]->attr);
133 if (err) {
134 printk(KERN_ERR "Error creating %s in sysfs.\n",
135 sysfs_attrs[i]->attr.name);
136 goto err_sysfs_create;
137 }
138 }
139 return 0;
140
141err_sysfs_create:
142 while (i-- >= 0)
143 sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
144
145 kobject_del(mce_kobj);
146
147err_mce_kobj:
148 edac_put_sysfs_class();
149
150 return err;
151}
152
153static void __exit edac_exit_mce_inject(void)
154{
155 int i;
156
157 for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++)
158 sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
159
160 kobject_del(mce_kobj);
161
162 edac_put_sysfs_class();
163}
164
165module_init(edac_init_mce_inject);
166module_exit(edac_exit_mce_inject);
167
168MODULE_LICENSE("GPL");
169MODULE_AUTHOR("Borislav Petkov <borislav.petkov@amd.com>");
170MODULE_AUTHOR("AMD Inc.");
171MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding");