diff options
author | Borislav Petkov <bp@suse.de> | 2016-11-01 07:52:27 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2016-11-08 11:10:13 -0500 |
commit | cd9c57cad3fe89ea949b9266cddc947c0838f7af (patch) | |
tree | 95bb03b5ae66cc17eb6584f4e2356af2ef85ee62 | |
parent | 8c203dbb78ca7a9aed4e2570c866b0f43c752e41 (diff) |
x86/MCE: Dump MCE to dmesg if no consumers
When there are no error record consumers registered with the kernel, the
only thing that appears in dmesg is something like:
[ 300.000326] mce: [Hardware Error]: Machine check events logged
and the error records are gone. Which is seriously counterproductive.
So let's dump them to dmesg instead, in such a case.
Requested-by: Eric Morton <Eric.Morton@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/20161101120911.13163-4-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 52 |
1 files changed, 46 insertions, 6 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a7fdf453d895..4ca00474804b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -207,8 +207,12 @@ EXPORT_SYMBOL_GPL(mce_inject_log); | |||
207 | 207 | ||
208 | static struct notifier_block mce_srao_nb; | 208 | static struct notifier_block mce_srao_nb; |
209 | 209 | ||
210 | static atomic_t num_notifiers; | ||
211 | |||
210 | void mce_register_decode_chain(struct notifier_block *nb) | 212 | void mce_register_decode_chain(struct notifier_block *nb) |
211 | { | 213 | { |
214 | atomic_inc(&num_notifiers); | ||
215 | |||
212 | /* Ensure SRAO notifier has the highest priority in the decode chain. */ | 216 | /* Ensure SRAO notifier has the highest priority in the decode chain. */ |
213 | if (nb != &mce_srao_nb && nb->priority == INT_MAX) | 217 | if (nb != &mce_srao_nb && nb->priority == INT_MAX) |
214 | nb->priority -= 1; | 218 | nb->priority -= 1; |
@@ -219,6 +223,8 @@ EXPORT_SYMBOL_GPL(mce_register_decode_chain); | |||
219 | 223 | ||
220 | void mce_unregister_decode_chain(struct notifier_block *nb) | 224 | void mce_unregister_decode_chain(struct notifier_block *nb) |
221 | { | 225 | { |
226 | atomic_dec(&num_notifiers); | ||
227 | |||
222 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); | 228 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); |
223 | } | 229 | } |
224 | EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); | 230 | EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); |
@@ -270,17 +276,17 @@ struct mca_msr_regs msr_ops = { | |||
270 | .misc = misc_reg | 276 | .misc = misc_reg |
271 | }; | 277 | }; |
272 | 278 | ||
273 | static void print_mce(struct mce *m) | 279 | static void __print_mce(struct mce *m) |
274 | { | 280 | { |
275 | int ret = 0; | 281 | pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n", |
276 | 282 | m->extcpu, | |
277 | pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", | 283 | (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""), |
278 | m->extcpu, m->mcgstatus, m->bank, m->status); | 284 | m->mcgstatus, m->bank, m->status); |
279 | 285 | ||
280 | if (m->ip) { | 286 | if (m->ip) { |
281 | pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", | 287 | pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", |
282 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | 288 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", |
283 | m->cs, m->ip); | 289 | m->cs, m->ip); |
284 | 290 | ||
285 | if (m->cs == __KERNEL_CS) | 291 | if (m->cs == __KERNEL_CS) |
286 | print_symbol("{%s}", m->ip); | 292 | print_symbol("{%s}", m->ip); |
@@ -308,6 +314,13 @@ static void print_mce(struct mce *m) | |||
308 | pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", | 314 | pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", |
309 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, | 315 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, |
310 | cpu_data(m->extcpu).microcode); | 316 | cpu_data(m->extcpu).microcode); |
317 | } | ||
318 | |||
319 | static void print_mce(struct mce *m) | ||
320 | { | ||
321 | int ret = 0; | ||
322 | |||
323 | __print_mce(m); | ||
311 | 324 | ||
312 | /* | 325 | /* |
313 | * Print out human-readable details about the MCE error, | 326 | * Print out human-readable details about the MCE error, |
@@ -569,6 +582,32 @@ static struct notifier_block mce_srao_nb = { | |||
569 | .priority = INT_MAX, | 582 | .priority = INT_MAX, |
570 | }; | 583 | }; |
571 | 584 | ||
585 | static int mce_default_notifier(struct notifier_block *nb, unsigned long val, | ||
586 | void *data) | ||
587 | { | ||
588 | struct mce *m = (struct mce *)data; | ||
589 | |||
590 | if (!m) | ||
591 | return NOTIFY_DONE; | ||
592 | |||
593 | /* | ||
594 | * Run the default notifier if we have only the SRAO | ||
595 | * notifier and us registered. | ||
596 | */ | ||
597 | if (atomic_read(&num_notifiers) > 2) | ||
598 | return NOTIFY_DONE; | ||
599 | |||
600 | __print_mce(m); | ||
601 | |||
602 | return NOTIFY_DONE; | ||
603 | } | ||
604 | |||
605 | static struct notifier_block mce_default_nb = { | ||
606 | .notifier_call = mce_default_notifier, | ||
607 | /* lowest prio, we want it to run last. */ | ||
608 | .priority = 0, | ||
609 | }; | ||
610 | |||
572 | /* | 611 | /* |
573 | * Read ADDR and MISC registers. | 612 | * Read ADDR and MISC registers. |
574 | */ | 613 | */ |
@@ -2138,6 +2177,7 @@ int __init mcheck_init(void) | |||
2138 | { | 2177 | { |
2139 | mcheck_intel_therm_init(); | 2178 | mcheck_intel_therm_init(); |
2140 | mce_register_decode_chain(&mce_srao_nb); | 2179 | mce_register_decode_chain(&mce_srao_nb); |
2180 | mce_register_decode_chain(&mce_default_nb); | ||
2141 | mcheck_vendor_init_severity(); | 2181 | mcheck_vendor_init_severity(); |
2142 | 2182 | ||
2143 | INIT_WORK(&mce_work, mce_process_work); | 2183 | INIT_WORK(&mce_work, mce_process_work); |