diff options
author | Ingo Molnar <mingo@kernel.org> | 2013-12-16 08:33:17 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-12-16 08:33:17 -0500 |
commit | 014952270eb9770a41084e74c721e4c07f2306c5 (patch) | |
tree | 75460debf66bd187355599a73db0a2bffec5a2f8 | |
parent | 319e2e3f63c348a9b66db4667efa73178e18b17d (diff) | |
parent | 42139eb356e3384759ca143ae04d82376346eb4c (diff) |
Merge tag 'ras_for_3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp into x86/ras
Pull RAS updates from Borislav Petkov:
* Add the functionality to override error reporting agents as some
machines are sporting a new extended error logging capability which, if
done properly in the BIOS, makes a corresponding EDAC module redundant,
from Gong Chen.
* PCIe AER tracepoint severity levels fix, from Rui Wang.
* Error path correction for the mce device init, from Levente Kurusa.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | Documentation/kernel-parameters.txt | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 4 | ||||
-rw-r--r-- | drivers/acpi/acpi_extlog.c | 18 | ||||
-rw-r--r-- | drivers/edac/edac_stub.c | 19 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 6 | ||||
-rw-r--r-- | include/linux/edac.h | 28 | ||||
-rw-r--r-- | include/trace/events/ras.h | 10 |
7 files changed, 84 insertions, 9 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 50680a59a2ff..453092c822f1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -881,6 +881,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
881 | 881 | ||
882 | The xen output can only be used by Xen PV guests. | 882 | The xen output can only be used by Xen PV guests. |
883 | 883 | ||
884 | edac_report= [HW,EDAC] Control how to report EDAC event | ||
885 | Format: {"on" | "off" | "force"} | ||
886 | on: enable EDAC to report H/W event. May be overridden | ||
887 | by other higher priority error reporting module. | ||
888 | off: disable H/W event reporting through EDAC. | ||
889 | force: enforce the use of EDAC to report H/W event. | ||
890 | default: on. | ||
891 | |||
884 | ekgdboc= [X86,KGDB] Allow early kernel console debugging | 892 | ekgdboc= [X86,KGDB] Allow early kernel console debugging |
885 | ekgdboc=kbd | 893 | ekgdboc=kbd |
886 | 894 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b3218cdee95f..a389c1d859ec 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -2272,8 +2272,10 @@ static int mce_device_create(unsigned int cpu) | |||
2272 | dev->release = &mce_device_release; | 2272 | dev->release = &mce_device_release; |
2273 | 2273 | ||
2274 | err = device_register(dev); | 2274 | err = device_register(dev); |
2275 | if (err) | 2275 | if (err) { |
2276 | put_device(dev); | ||
2276 | return err; | 2277 | return err; |
2278 | } | ||
2277 | 2279 | ||
2278 | for (i = 0; mce_device_attrs[i]; i++) { | 2280 | for (i = 0; mce_device_attrs[i]; i++) { |
2279 | err = device_create_file(dev, mce_device_attrs[i]); | 2281 | err = device_create_file(dev, mce_device_attrs[i]); |
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index a6869e110ce5..5d33c5415405 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <acpi/acpi_bus.h> | 12 | #include <acpi/acpi_bus.h> |
13 | #include <linux/cper.h> | 13 | #include <linux/cper.h> |
14 | #include <linux/ratelimit.h> | 14 | #include <linux/ratelimit.h> |
15 | #include <linux/edac.h> | ||
15 | #include <asm/cpu.h> | 16 | #include <asm/cpu.h> |
16 | #include <asm/mce.h> | 17 | #include <asm/mce.h> |
17 | 18 | ||
@@ -43,6 +44,8 @@ struct extlog_l1_head { | |||
43 | u8 rev1[12]; | 44 | u8 rev1[12]; |
44 | }; | 45 | }; |
45 | 46 | ||
47 | static int old_edac_report_status; | ||
48 | |||
46 | static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295"; | 49 | static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295"; |
47 | 50 | ||
48 | /* L1 table related physical address */ | 51 | /* L1 table related physical address */ |
@@ -150,7 +153,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, | |||
150 | 153 | ||
151 | rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu); | 154 | rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu); |
152 | 155 | ||
153 | return NOTIFY_DONE; | 156 | return NOTIFY_STOP; |
154 | } | 157 | } |
155 | 158 | ||
156 | static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret) | 159 | static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret) |
@@ -231,8 +234,12 @@ static int __init extlog_init(void) | |||
231 | u64 cap; | 234 | u64 cap; |
232 | int rc; | 235 | int rc; |
233 | 236 | ||
234 | rc = -ENODEV; | 237 | if (get_edac_report_status() == EDAC_REPORTING_FORCE) { |
238 | pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n"); | ||
239 | return -EPERM; | ||
240 | } | ||
235 | 241 | ||
242 | rc = -ENODEV; | ||
236 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 243 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
237 | if (!(cap & MCG_ELOG_P)) | 244 | if (!(cap & MCG_ELOG_P)) |
238 | return rc; | 245 | return rc; |
@@ -287,6 +294,12 @@ static int __init extlog_init(void) | |||
287 | if (elog_buf == NULL) | 294 | if (elog_buf == NULL) |
288 | goto err_release_elog; | 295 | goto err_release_elog; |
289 | 296 | ||
297 | /* | ||
298 | * eMCA event report method has higher priority than EDAC method, | ||
299 | * unless EDAC event report method is mandatory. | ||
300 | */ | ||
301 | old_edac_report_status = get_edac_report_status(); | ||
302 | set_edac_report_status(EDAC_REPORTING_DISABLED); | ||
290 | mce_register_decode_chain(&extlog_mce_dec); | 303 | mce_register_decode_chain(&extlog_mce_dec); |
291 | /* enable OS to be involved to take over management from BIOS */ | 304 | /* enable OS to be involved to take over management from BIOS */ |
292 | ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; | 305 | ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; |
@@ -308,6 +321,7 @@ err: | |||
308 | 321 | ||
309 | static void __exit extlog_exit(void) | 322 | static void __exit extlog_exit(void) |
310 | { | 323 | { |
324 | set_edac_report_status(old_edac_report_status); | ||
311 | mce_unregister_decode_chain(&extlog_mce_dec); | 325 | mce_unregister_decode_chain(&extlog_mce_dec); |
312 | ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; | 326 | ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; |
313 | if (extlog_l1_addr) | 327 | if (extlog_l1_addr) |
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index 351945fa2ecd..9d9e18aefaaa 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c | |||
@@ -29,6 +29,25 @@ EXPORT_SYMBOL_GPL(edac_err_assert); | |||
29 | 29 | ||
30 | static atomic_t edac_subsys_valid = ATOMIC_INIT(0); | 30 | static atomic_t edac_subsys_valid = ATOMIC_INIT(0); |
31 | 31 | ||
32 | int edac_report_status = EDAC_REPORTING_ENABLED; | ||
33 | EXPORT_SYMBOL_GPL(edac_report_status); | ||
34 | |||
35 | static int __init edac_report_setup(char *str) | ||
36 | { | ||
37 | if (!str) | ||
38 | return -EINVAL; | ||
39 | |||
40 | if (!strncmp(str, "on", 2)) | ||
41 | set_edac_report_status(EDAC_REPORTING_ENABLED); | ||
42 | else if (!strncmp(str, "off", 3)) | ||
43 | set_edac_report_status(EDAC_REPORTING_DISABLED); | ||
44 | else if (!strncmp(str, "force", 5)) | ||
45 | set_edac_report_status(EDAC_REPORTING_FORCE); | ||
46 | |||
47 | return 0; | ||
48 | } | ||
49 | __setup("edac_report=", edac_report_setup); | ||
50 | |||
32 | /* | 51 | /* |
33 | * called to determine if there is an EDAC driver interested in | 52 | * called to determine if there is an EDAC driver interested in |
34 | * knowing an event (such as NMI) occurred | 53 | * knowing an event (such as NMI) occurred |
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index d7f1b57bd3be..1229123ccb59 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c | |||
@@ -1829,6 +1829,9 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, | |||
1829 | struct mem_ctl_info *mci; | 1829 | struct mem_ctl_info *mci; |
1830 | struct sbridge_pvt *pvt; | 1830 | struct sbridge_pvt *pvt; |
1831 | 1831 | ||
1832 | if (get_edac_report_status() == EDAC_REPORTING_DISABLED) | ||
1833 | return NOTIFY_DONE; | ||
1834 | |||
1832 | mci = get_mci_for_node_id(mce->socketid); | 1835 | mci = get_mci_for_node_id(mce->socketid); |
1833 | if (!mci) | 1836 | if (!mci) |
1834 | return NOTIFY_BAD; | 1837 | return NOTIFY_BAD; |
@@ -2142,9 +2145,10 @@ static int __init sbridge_init(void) | |||
2142 | opstate_init(); | 2145 | opstate_init(); |
2143 | 2146 | ||
2144 | pci_rc = pci_register_driver(&sbridge_driver); | 2147 | pci_rc = pci_register_driver(&sbridge_driver); |
2145 | |||
2146 | if (pci_rc >= 0) { | 2148 | if (pci_rc >= 0) { |
2147 | mce_register_decode_chain(&sbridge_mce_dec); | 2149 | mce_register_decode_chain(&sbridge_mce_dec); |
2150 | if (get_edac_report_status() == EDAC_REPORTING_DISABLED) | ||
2151 | sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); | ||
2148 | return 0; | 2152 | return 0; |
2149 | } | 2153 | } |
2150 | 2154 | ||
diff --git a/include/linux/edac.h b/include/linux/edac.h index dbdffe8d4469..8e6c20af11a2 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h | |||
@@ -35,6 +35,34 @@ extern void edac_atomic_assert_error(void); | |||
35 | extern struct bus_type *edac_get_sysfs_subsys(void); | 35 | extern struct bus_type *edac_get_sysfs_subsys(void); |
36 | extern void edac_put_sysfs_subsys(void); | 36 | extern void edac_put_sysfs_subsys(void); |
37 | 37 | ||
38 | enum { | ||
39 | EDAC_REPORTING_ENABLED, | ||
40 | EDAC_REPORTING_DISABLED, | ||
41 | EDAC_REPORTING_FORCE | ||
42 | }; | ||
43 | |||
44 | extern int edac_report_status; | ||
45 | #ifdef CONFIG_EDAC | ||
46 | static inline int get_edac_report_status(void) | ||
47 | { | ||
48 | return edac_report_status; | ||
49 | } | ||
50 | |||
51 | static inline void set_edac_report_status(int new) | ||
52 | { | ||
53 | edac_report_status = new; | ||
54 | } | ||
55 | #else | ||
56 | static inline int get_edac_report_status(void) | ||
57 | { | ||
58 | return EDAC_REPORTING_DISABLED; | ||
59 | } | ||
60 | |||
61 | static inline void set_edac_report_status(int new) | ||
62 | { | ||
63 | } | ||
64 | #endif | ||
65 | |||
38 | static inline void opstate_init(void) | 66 | static inline void opstate_init(void) |
39 | { | 67 | { |
40 | switch (edac_op_state) { | 68 | switch (edac_op_state) { |
diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h index 88b878383797..1c875ad1ee5f 100644 --- a/include/trace/events/ras.h +++ b/include/trace/events/ras.h | |||
@@ -5,7 +5,7 @@ | |||
5 | #define _TRACE_AER_H | 5 | #define _TRACE_AER_H |
6 | 6 | ||
7 | #include <linux/tracepoint.h> | 7 | #include <linux/tracepoint.h> |
8 | #include <linux/edac.h> | 8 | #include <linux/aer.h> |
9 | 9 | ||
10 | 10 | ||
11 | /* | 11 | /* |
@@ -63,10 +63,10 @@ TRACE_EVENT(aer_event, | |||
63 | 63 | ||
64 | TP_printk("%s PCIe Bus Error: severity=%s, %s\n", | 64 | TP_printk("%s PCIe Bus Error: severity=%s, %s\n", |
65 | __get_str(dev_name), | 65 | __get_str(dev_name), |
66 | __entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" : | 66 | __entry->severity == AER_CORRECTABLE ? "Corrected" : |
67 | __entry->severity == HW_EVENT_ERR_FATAL ? | 67 | __entry->severity == AER_FATAL ? |
68 | "Fatal" : "Uncorrected", | 68 | "Fatal" : "Uncorrected, non-fatal", |
69 | __entry->severity == HW_EVENT_ERR_CORRECTED ? | 69 | __entry->severity == AER_CORRECTABLE ? |
70 | __print_flags(__entry->status, "|", aer_correctable_errors) : | 70 | __print_flags(__entry->status, "|", aer_correctable_errors) : |
71 | __print_flags(__entry->status, "|", aer_uncorrectable_errors)) | 71 | __print_flags(__entry->status, "|", aer_uncorrectable_errors)) |
72 | ); | 72 | ); |