diff options
author | Borislav Petkov <bp@suse.de> | 2015-05-18 04:07:17 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-05-18 04:31:22 -0400 |
commit | 17fea54bf0ab34fa09a06bbde2f58ed7bbdf9299 (patch) | |
tree | 619dabf56bbaaaba9677ee7a3ab4421930b3e847 /arch/x86/kernel/cpu | |
parent | c0655fe9b0901a968800f56687be3c62b4cce5d2 (diff) |
x86/mce: Fix MCE severity messages
Derek noticed that a critical MCE gets reported with the wrong
error type description:
[Hardware Error]: CPU 34: Machine Check Exception: 5 Bank 9: f200003f000100b0
[Hardware Error]: RIP !INEXACT! 10:<ffffffff812e14c1> {intel_idle+0xb1/0x170}
[Hardware Error]: TSC 49587b8e321cb
[Hardware Error]: PROCESSOR 0:306e4 TIME 1431561296 SOCKET 1 APIC 29
[Hardware Error]: Some CPUs didn't answer in synchronization
[Hardware Error]: Machine check: Invalid
^^^^^^^
The last line with 'Invalid' should have printed the high level
MCE error type description we get from mce_severity, i.e.
something like:
[Hardware Error]: Machine check: Action required: data load error in a user process
this happens due to the fact that mce_no_way_out() iterates over
all MCA banks and possibly overwrites the @msg argument which is
used in the panic printing later.
Change behavior to take the message of only and the (last)
critical MCE it detects.
Reported-by: Derek <denc716@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1431936437-25286-3-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e535533d5ab8..20190bdac9d5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -708,6 +708,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, | |||
708 | struct pt_regs *regs) | 708 | struct pt_regs *regs) |
709 | { | 709 | { |
710 | int i, ret = 0; | 710 | int i, ret = 0; |
711 | char *tmp; | ||
711 | 712 | ||
712 | for (i = 0; i < mca_cfg.banks; i++) { | 713 | for (i = 0; i < mca_cfg.banks; i++) { |
713 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); | 714 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
@@ -716,9 +717,11 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, | |||
716 | if (quirk_no_way_out) | 717 | if (quirk_no_way_out) |
717 | quirk_no_way_out(i, m, regs); | 718 | quirk_no_way_out(i, m, regs); |
718 | } | 719 | } |
719 | if (mce_severity(m, mca_cfg.tolerant, msg, true) >= | 720 | |
720 | MCE_PANIC_SEVERITY) | 721 | if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { |
722 | *msg = tmp; | ||
721 | ret = 1; | 723 | ret = 1; |
724 | } | ||
722 | } | 725 | } |
723 | return ret; | 726 | return ret; |
724 | } | 727 | } |