aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBorislav Petkov <bp@suse.de>2016-11-01 07:52:27 -0400
committerThomas Gleixner <tglx@linutronix.de>2016-11-08 11:10:13 -0500
commitcd9c57cad3fe89ea949b9266cddc947c0838f7af (patch)
tree95bb03b5ae66cc17eb6584f4e2356af2ef85ee62
parent8c203dbb78ca7a9aed4e2570c866b0f43c752e41 (diff)
x86/MCE: Dump MCE to dmesg if no consumers
When there are no error record consumers registered with the kernel, the only thing that appears in dmesg is something like: [ 300.000326] mce: [Hardware Error]: Machine check events logged and the error records are gone. Which is seriously counterproductive. So let's dump them to dmesg instead, in such a case. Requested-by: Eric Morton <Eric.Morton@amd.com> Signed-off-by: Borislav Petkov <bp@suse.de> Cc: Tony Luck <tony.luck@intel.com> Link: http://lkml.kernel.org/r/20161101120911.13163-4-bp@alien8.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c52
1 files changed, 46 insertions, 6 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a7fdf453d895..4ca00474804b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -207,8 +207,12 @@ EXPORT_SYMBOL_GPL(mce_inject_log);
207 207
208static struct notifier_block mce_srao_nb; 208static struct notifier_block mce_srao_nb;
209 209
210static atomic_t num_notifiers;
211
210void mce_register_decode_chain(struct notifier_block *nb) 212void mce_register_decode_chain(struct notifier_block *nb)
211{ 213{
214 atomic_inc(&num_notifiers);
215
212 /* Ensure SRAO notifier has the highest priority in the decode chain. */ 216 /* Ensure SRAO notifier has the highest priority in the decode chain. */
213 if (nb != &mce_srao_nb && nb->priority == INT_MAX) 217 if (nb != &mce_srao_nb && nb->priority == INT_MAX)
214 nb->priority -= 1; 218 nb->priority -= 1;
@@ -219,6 +223,8 @@ EXPORT_SYMBOL_GPL(mce_register_decode_chain);
219 223
220void mce_unregister_decode_chain(struct notifier_block *nb) 224void mce_unregister_decode_chain(struct notifier_block *nb)
221{ 225{
226 atomic_dec(&num_notifiers);
227
222 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); 228 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
223} 229}
224EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); 230EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
@@ -270,17 +276,17 @@ struct mca_msr_regs msr_ops = {
270 .misc = misc_reg 276 .misc = misc_reg
271}; 277};
272 278
273static void print_mce(struct mce *m) 279static void __print_mce(struct mce *m)
274{ 280{
275 int ret = 0; 281 pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
276 282 m->extcpu,
277 pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", 283 (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
278 m->extcpu, m->mcgstatus, m->bank, m->status); 284 m->mcgstatus, m->bank, m->status);
279 285
280 if (m->ip) { 286 if (m->ip) {
281 pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", 287 pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
282 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", 288 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
283 m->cs, m->ip); 289 m->cs, m->ip);
284 290
285 if (m->cs == __KERNEL_CS) 291 if (m->cs == __KERNEL_CS)
286 print_symbol("{%s}", m->ip); 292 print_symbol("{%s}", m->ip);
@@ -308,6 +314,13 @@ static void print_mce(struct mce *m)
308 pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", 314 pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
309 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, 315 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
310 cpu_data(m->extcpu).microcode); 316 cpu_data(m->extcpu).microcode);
317}
318
319static void print_mce(struct mce *m)
320{
321 int ret = 0;
322
323 __print_mce(m);
311 324
312 /* 325 /*
313 * Print out human-readable details about the MCE error, 326 * Print out human-readable details about the MCE error,
@@ -569,6 +582,32 @@ static struct notifier_block mce_srao_nb = {
569 .priority = INT_MAX, 582 .priority = INT_MAX,
570}; 583};
571 584
585static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
586 void *data)
587{
588 struct mce *m = (struct mce *)data;
589
590 if (!m)
591 return NOTIFY_DONE;
592
593 /*
594 * Run the default notifier if we have only the SRAO
595 * notifier and us registered.
596 */
597 if (atomic_read(&num_notifiers) > 2)
598 return NOTIFY_DONE;
599
600 __print_mce(m);
601
602 return NOTIFY_DONE;
603}
604
605static struct notifier_block mce_default_nb = {
606 .notifier_call = mce_default_notifier,
607 /* lowest prio, we want it to run last. */
608 .priority = 0,
609};
610
572/* 611/*
573 * Read ADDR and MISC registers. 612 * Read ADDR and MISC registers.
574 */ 613 */
@@ -2138,6 +2177,7 @@ int __init mcheck_init(void)
2138{ 2177{
2139 mcheck_intel_therm_init(); 2178 mcheck_intel_therm_init();
2140 mce_register_decode_chain(&mce_srao_nb); 2179 mce_register_decode_chain(&mce_srao_nb);
2180 mce_register_decode_chain(&mce_default_nb);
2141 mcheck_vendor_init_severity(); 2181 mcheck_vendor_init_severity();
2142 2182
2143 INIT_WORK(&mce_work, mce_process_work); 2183 INIT_WORK(&mce_work, mce_process_work);