aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-12-16 08:33:17 -0500
committerIngo Molnar <mingo@kernel.org>2013-12-16 08:33:17 -0500
commit014952270eb9770a41084e74c721e4c07f2306c5 (patch)
tree75460debf66bd187355599a73db0a2bffec5a2f8
parent319e2e3f63c348a9b66db4667efa73178e18b17d (diff)
parent42139eb356e3384759ca143ae04d82376346eb4c (diff)
Merge tag 'ras_for_3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp into x86/ras
Pull RAS updates from Borislav Petkov: * Add the functionality to override error reporting agents as some machines are sporting a new extended error logging capability which, if done properly in the BIOS, makes a corresponding EDAC module redundant, from Gong Chen. * PCIe AER tracepoint severity levels fix, from Rui Wang. * Error path correction for the mce device init, from Levente Kurusa. Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c4
-rw-r--r--drivers/acpi/acpi_extlog.c18
-rw-r--r--drivers/edac/edac_stub.c19
-rw-r--r--drivers/edac/sb_edac.c6
-rw-r--r--include/linux/edac.h28
-rw-r--r--include/trace/events/ras.h10
7 files changed, 84 insertions, 9 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 50680a59a2ff..453092c822f1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -881,6 +881,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
881 881
882 The xen output can only be used by Xen PV guests. 882 The xen output can only be used by Xen PV guests.
883 883
884 edac_report= [HW,EDAC] Control how to report EDAC event
885 Format: {"on" | "off" | "force"}
886 on: enable EDAC to report H/W event. May be overridden
887 by other higher priority error reporting module.
888 off: disable H/W event reporting through EDAC.
889 force: enforce the use of EDAC to report H/W event.
890 default: on.
891
884 ekgdboc= [X86,KGDB] Allow early kernel console debugging 892 ekgdboc= [X86,KGDB] Allow early kernel console debugging
885 ekgdboc=kbd 893 ekgdboc=kbd
886 894
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b3218cdee95f..a389c1d859ec 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2272,8 +2272,10 @@ static int mce_device_create(unsigned int cpu)
2272 dev->release = &mce_device_release; 2272 dev->release = &mce_device_release;
2273 2273
2274 err = device_register(dev); 2274 err = device_register(dev);
2275 if (err) 2275 if (err) {
2276 put_device(dev);
2276 return err; 2277 return err;
2278 }
2277 2279
2278 for (i = 0; mce_device_attrs[i]; i++) { 2280 for (i = 0; mce_device_attrs[i]; i++) {
2279 err = device_create_file(dev, mce_device_attrs[i]); 2281 err = device_create_file(dev, mce_device_attrs[i]);
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index a6869e110ce5..5d33c5415405 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -12,6 +12,7 @@
12#include <acpi/acpi_bus.h> 12#include <acpi/acpi_bus.h>
13#include <linux/cper.h> 13#include <linux/cper.h>
14#include <linux/ratelimit.h> 14#include <linux/ratelimit.h>
15#include <linux/edac.h>
15#include <asm/cpu.h> 16#include <asm/cpu.h>
16#include <asm/mce.h> 17#include <asm/mce.h>
17 18
@@ -43,6 +44,8 @@ struct extlog_l1_head {
43 u8 rev1[12]; 44 u8 rev1[12];
44}; 45};
45 46
47static int old_edac_report_status;
48
46static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295"; 49static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295";
47 50
48/* L1 table related physical address */ 51/* L1 table related physical address */
@@ -150,7 +153,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
150 153
151 rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu); 154 rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
152 155
153 return NOTIFY_DONE; 156 return NOTIFY_STOP;
154} 157}
155 158
156static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret) 159static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret)
@@ -231,8 +234,12 @@ static int __init extlog_init(void)
231 u64 cap; 234 u64 cap;
232 int rc; 235 int rc;
233 236
234 rc = -ENODEV; 237 if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
238 pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
239 return -EPERM;
240 }
235 241
242 rc = -ENODEV;
236 rdmsrl(MSR_IA32_MCG_CAP, cap); 243 rdmsrl(MSR_IA32_MCG_CAP, cap);
237 if (!(cap & MCG_ELOG_P)) 244 if (!(cap & MCG_ELOG_P))
238 return rc; 245 return rc;
@@ -287,6 +294,12 @@ static int __init extlog_init(void)
287 if (elog_buf == NULL) 294 if (elog_buf == NULL)
288 goto err_release_elog; 295 goto err_release_elog;
289 296
297 /*
298 * eMCA event report method has higher priority than EDAC method,
299 * unless EDAC event report method is mandatory.
300 */
301 old_edac_report_status = get_edac_report_status();
302 set_edac_report_status(EDAC_REPORTING_DISABLED);
290 mce_register_decode_chain(&extlog_mce_dec); 303 mce_register_decode_chain(&extlog_mce_dec);
291 /* enable OS to be involved to take over management from BIOS */ 304 /* enable OS to be involved to take over management from BIOS */
292 ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; 305 ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
@@ -308,6 +321,7 @@ err:
308 321
309static void __exit extlog_exit(void) 322static void __exit extlog_exit(void)
310{ 323{
324 set_edac_report_status(old_edac_report_status);
311 mce_unregister_decode_chain(&extlog_mce_dec); 325 mce_unregister_decode_chain(&extlog_mce_dec);
312 ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; 326 ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
313 if (extlog_l1_addr) 327 if (extlog_l1_addr)
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index 351945fa2ecd..9d9e18aefaaa 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -29,6 +29,25 @@ EXPORT_SYMBOL_GPL(edac_err_assert);
29 29
30static atomic_t edac_subsys_valid = ATOMIC_INIT(0); 30static atomic_t edac_subsys_valid = ATOMIC_INIT(0);
31 31
32int edac_report_status = EDAC_REPORTING_ENABLED;
33EXPORT_SYMBOL_GPL(edac_report_status);
34
35static int __init edac_report_setup(char *str)
36{
37 if (!str)
38 return -EINVAL;
39
40 if (!strncmp(str, "on", 2))
41 set_edac_report_status(EDAC_REPORTING_ENABLED);
42 else if (!strncmp(str, "off", 3))
43 set_edac_report_status(EDAC_REPORTING_DISABLED);
44 else if (!strncmp(str, "force", 5))
45 set_edac_report_status(EDAC_REPORTING_FORCE);
46
47 return 0;
48}
49__setup("edac_report=", edac_report_setup);
50
32/* 51/*
33 * called to determine if there is an EDAC driver interested in 52 * called to determine if there is an EDAC driver interested in
34 * knowing an event (such as NMI) occurred 53 * knowing an event (such as NMI) occurred
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index d7f1b57bd3be..1229123ccb59 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -1829,6 +1829,9 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
1829 struct mem_ctl_info *mci; 1829 struct mem_ctl_info *mci;
1830 struct sbridge_pvt *pvt; 1830 struct sbridge_pvt *pvt;
1831 1831
1832 if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
1833 return NOTIFY_DONE;
1834
1832 mci = get_mci_for_node_id(mce->socketid); 1835 mci = get_mci_for_node_id(mce->socketid);
1833 if (!mci) 1836 if (!mci)
1834 return NOTIFY_BAD; 1837 return NOTIFY_BAD;
@@ -2142,9 +2145,10 @@ static int __init sbridge_init(void)
2142 opstate_init(); 2145 opstate_init();
2143 2146
2144 pci_rc = pci_register_driver(&sbridge_driver); 2147 pci_rc = pci_register_driver(&sbridge_driver);
2145
2146 if (pci_rc >= 0) { 2148 if (pci_rc >= 0) {
2147 mce_register_decode_chain(&sbridge_mce_dec); 2149 mce_register_decode_chain(&sbridge_mce_dec);
2150 if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
2151 sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
2148 return 0; 2152 return 0;
2149 } 2153 }
2150 2154
diff --git a/include/linux/edac.h b/include/linux/edac.h
index dbdffe8d4469..8e6c20af11a2 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -35,6 +35,34 @@ extern void edac_atomic_assert_error(void);
35extern struct bus_type *edac_get_sysfs_subsys(void); 35extern struct bus_type *edac_get_sysfs_subsys(void);
36extern void edac_put_sysfs_subsys(void); 36extern void edac_put_sysfs_subsys(void);
37 37
38enum {
39 EDAC_REPORTING_ENABLED,
40 EDAC_REPORTING_DISABLED,
41 EDAC_REPORTING_FORCE
42};
43
44extern int edac_report_status;
45#ifdef CONFIG_EDAC
46static inline int get_edac_report_status(void)
47{
48 return edac_report_status;
49}
50
51static inline void set_edac_report_status(int new)
52{
53 edac_report_status = new;
54}
55#else
56static inline int get_edac_report_status(void)
57{
58 return EDAC_REPORTING_DISABLED;
59}
60
61static inline void set_edac_report_status(int new)
62{
63}
64#endif
65
38static inline void opstate_init(void) 66static inline void opstate_init(void)
39{ 67{
40 switch (edac_op_state) { 68 switch (edac_op_state) {
diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h
index 88b878383797..1c875ad1ee5f 100644
--- a/include/trace/events/ras.h
+++ b/include/trace/events/ras.h
@@ -5,7 +5,7 @@
5#define _TRACE_AER_H 5#define _TRACE_AER_H
6 6
7#include <linux/tracepoint.h> 7#include <linux/tracepoint.h>
8#include <linux/edac.h> 8#include <linux/aer.h>
9 9
10 10
11/* 11/*
@@ -63,10 +63,10 @@ TRACE_EVENT(aer_event,
63 63
64 TP_printk("%s PCIe Bus Error: severity=%s, %s\n", 64 TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
65 __get_str(dev_name), 65 __get_str(dev_name),
66 __entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" : 66 __entry->severity == AER_CORRECTABLE ? "Corrected" :
67 __entry->severity == HW_EVENT_ERR_FATAL ? 67 __entry->severity == AER_FATAL ?
68 "Fatal" : "Uncorrected", 68 "Fatal" : "Uncorrected, non-fatal",
69 __entry->severity == HW_EVENT_ERR_CORRECTED ? 69 __entry->severity == AER_CORRECTABLE ?
70 __print_flags(__entry->status, "|", aer_correctable_errors) : 70 __print_flags(__entry->status, "|", aer_correctable_errors) :
71 __print_flags(__entry->status, "|", aer_uncorrectable_errors)) 71 __print_flags(__entry->status, "|", aer_uncorrectable_errors))
72); 72);