diff options
author | Tony Luck <tony.luck@intel.com> | 2015-11-24 02:41:17 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-11-24 03:12:35 -0500 |
commit | 8b38937b7ab55e93065a14c88753b1fe83e93c60 (patch) | |
tree | fa46eec8c5604171b971af1d5afa4807538d8c9d | |
parent | 1ec218373b8ebda821aec00bb156a9c94fad9cd4 (diff) |
x86/mce: Do not enter deferred errors into the generic pool twice
We used to have a special ring buffer for deferred errors that
was used to mark problem pages. We replaced that with a generic
pool. Then later converted mce_log() to also use the same pool.
As a result, we end up adding all deferred errors to the pool
twice.
Rearrange this code. Make sure to set the m.severity and
m.usable_addr fields for deferred errors. Then if flags and
mca_cfg.dont_log_ce mean we call mce_log() we are done, because
that will add this entry to the generic pool.
If we skipped mce_log(), then we still want to take action for
the deferred error, so add to the pool.
Change the name of the boolean "error_logged" to "error_seen",
we should set it whether of not we logged an error because the
return value from machine_check_poll() is used to decide whether
storms have subsided or not.
Reported-by: Gong Chen <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1448350880-5573-2-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 24 |
1 files changed, 13 insertions, 11 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index c5b0d562dbf5..6531cb46803c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -567,7 +567,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); | |||
567 | */ | 567 | */ |
568 | bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | 568 | bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) |
569 | { | 569 | { |
570 | bool error_logged = false; | 570 | bool error_seen = false; |
571 | struct mce m; | 571 | struct mce m; |
572 | int severity; | 572 | int severity; |
573 | int i; | 573 | int i; |
@@ -601,6 +601,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
601 | (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) | 601 | (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) |
602 | continue; | 602 | continue; |
603 | 603 | ||
604 | error_seen = true; | ||
605 | |||
604 | mce_read_aux(&m, i); | 606 | mce_read_aux(&m, i); |
605 | 607 | ||
606 | if (!(flags & MCP_TIMESTAMP)) | 608 | if (!(flags & MCP_TIMESTAMP)) |
@@ -608,17 +610,10 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
608 | 610 | ||
609 | severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); | 611 | severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); |
610 | 612 | ||
611 | /* | ||
612 | * In the cases where we don't have a valid address after all, | ||
613 | * do not add it into the ring buffer. | ||
614 | */ | ||
615 | if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) { | 613 | if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) { |
616 | if (m.status & MCI_STATUS_ADDRV) { | 614 | if (m.status & MCI_STATUS_ADDRV) { |
617 | m.severity = severity; | 615 | m.severity = severity; |
618 | m.usable_addr = mce_usable_address(&m); | 616 | m.usable_addr = mce_usable_address(&m); |
619 | |||
620 | if (!mce_gen_pool_add(&m)) | ||
621 | mce_schedule_work(); | ||
622 | } | 617 | } |
623 | } | 618 | } |
624 | 619 | ||
@@ -626,9 +621,16 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
626 | * Don't get the IP here because it's unlikely to | 621 | * Don't get the IP here because it's unlikely to |
627 | * have anything to do with the actual error location. | 622 | * have anything to do with the actual error location. |
628 | */ | 623 | */ |
629 | if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) { | 624 | if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) |
630 | error_logged = true; | ||
631 | mce_log(&m); | 625 | mce_log(&m); |
626 | else if (m.usable_addr) { | ||
627 | /* | ||
628 | * Although we skipped logging this, we still want | ||
629 | * to take action. Add to the pool so the registered | ||
630 | * notifiers will see it. | ||
631 | */ | ||
632 | if (!mce_gen_pool_add(&m)) | ||
633 | mce_schedule_work(); | ||
632 | } | 634 | } |
633 | 635 | ||
634 | /* | 636 | /* |
@@ -644,7 +646,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
644 | 646 | ||
645 | sync_core(); | 647 | sync_core(); |
646 | 648 | ||
647 | return error_logged; | 649 | return error_seen; |
648 | } | 650 | } |
649 | EXPORT_SYMBOL_GPL(machine_check_poll); | 651 | EXPORT_SYMBOL_GPL(machine_check_poll); |
650 | 652 | ||