diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-11 18:07:19 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-11 18:07:19 -0500 |
commit | 4bd20db2c027eab7490e3c0466734738bef2dd24 (patch) | |
tree | cd9c3aa1444728d10938fe271316b392f387f991 | |
parent | 5cb52b5e1654f3f1ed9c32e34456d98559c85aa0 (diff) | |
parent | feab21f8356bde572663e29c9d9e48c964292e05 (diff) |
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Ingo Molnar:
"Various x86 MCE fixes and small enhancements"
* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mce: Make usable address checks Intel-only
x86/mce: Add the missing memory error check on AMD
x86/RAS: Remove mce.usable_addr
x86/mce: Do not enter deferred errors into the generic pool twice
-rw-r--r-- | arch/x86/include/uapi/asm/mce.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 82 |
2 files changed, 43 insertions, 41 deletions
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 03429da2fa80..2184943341bf 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h | |||
@@ -16,7 +16,7 @@ struct mce { | |||
16 | __u8 cpuvendor; /* cpu vendor as encoded in system.h */ | 16 | __u8 cpuvendor; /* cpu vendor as encoded in system.h */ |
17 | __u8 inject_flags; /* software inject flags */ | 17 | __u8 inject_flags; /* software inject flags */ |
18 | __u8 severity; | 18 | __u8 severity; |
19 | __u8 usable_addr; | 19 | __u8 pad; |
20 | __u32 cpuid; /* CPUID 1 EAX */ | 20 | __u32 cpuid; /* CPUID 1 EAX */ |
21 | __u8 cs; /* code segment */ | 21 | __u8 cs; /* code segment */ |
22 | __u8 bank; /* machine check bank */ | 22 | __u8 bank; /* machine check bank */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7e8a736d09db..a006f4cd792b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -114,7 +114,6 @@ static struct work_struct mce_work; | |||
114 | static struct irq_work mce_irq_work; | 114 | static struct irq_work mce_irq_work; |
115 | 115 | ||
116 | static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); | 116 | static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); |
117 | static int mce_usable_address(struct mce *m); | ||
118 | 117 | ||
119 | /* | 118 | /* |
120 | * CPU/chipset specific EDAC code can register a notifier call here to print | 119 | * CPU/chipset specific EDAC code can register a notifier call here to print |
@@ -475,6 +474,28 @@ static void mce_report_event(struct pt_regs *regs) | |||
475 | irq_work_queue(&mce_irq_work); | 474 | irq_work_queue(&mce_irq_work); |
476 | } | 475 | } |
477 | 476 | ||
477 | /* | ||
478 | * Check if the address reported by the CPU is in a format we can parse. | ||
479 | * It would be possible to add code for most other cases, but all would | ||
480 | * be somewhat complicated (e.g. segment offset would require an instruction | ||
481 | * parser). So only support physical addresses up to page granuality for now. | ||
482 | */ | ||
483 | static int mce_usable_address(struct mce *m) | ||
484 | { | ||
485 | if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) | ||
486 | return 0; | ||
487 | |||
488 | /* Checks after this one are Intel-specific: */ | ||
489 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | ||
490 | return 1; | ||
491 | |||
492 | if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) | ||
493 | return 0; | ||
494 | if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) | ||
495 | return 0; | ||
496 | return 1; | ||
497 | } | ||
498 | |||
478 | static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, | 499 | static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, |
479 | void *data) | 500 | void *data) |
480 | { | 501 | { |
@@ -484,7 +505,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, | |||
484 | if (!mce) | 505 | if (!mce) |
485 | return NOTIFY_DONE; | 506 | return NOTIFY_DONE; |
486 | 507 | ||
487 | if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) { | 508 | if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { |
488 | pfn = mce->addr >> PAGE_SHIFT; | 509 | pfn = mce->addr >> PAGE_SHIFT; |
489 | memory_failure(pfn, MCE_VECTOR, 0); | 510 | memory_failure(pfn, MCE_VECTOR, 0); |
490 | } | 511 | } |
@@ -522,10 +543,10 @@ static bool memory_error(struct mce *m) | |||
522 | struct cpuinfo_x86 *c = &boot_cpu_data; | 543 | struct cpuinfo_x86 *c = &boot_cpu_data; |
523 | 544 | ||
524 | if (c->x86_vendor == X86_VENDOR_AMD) { | 545 | if (c->x86_vendor == X86_VENDOR_AMD) { |
525 | /* | 546 | /* ErrCodeExt[20:16] */ |
526 | * coming soon | 547 | u8 xec = (m->status >> 16) & 0x1f; |
527 | */ | 548 | |
528 | return false; | 549 | return (xec == 0x0 || xec == 0x8); |
529 | } else if (c->x86_vendor == X86_VENDOR_INTEL) { | 550 | } else if (c->x86_vendor == X86_VENDOR_INTEL) { |
530 | /* | 551 | /* |
531 | * Intel SDM Volume 3B - 15.9.2 Compound Error Codes | 552 | * Intel SDM Volume 3B - 15.9.2 Compound Error Codes |
@@ -567,7 +588,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); | |||
567 | */ | 588 | */ |
568 | bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | 589 | bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) |
569 | { | 590 | { |
570 | bool error_logged = false; | 591 | bool error_seen = false; |
571 | struct mce m; | 592 | struct mce m; |
572 | int severity; | 593 | int severity; |
573 | int i; | 594 | int i; |
@@ -601,6 +622,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
601 | (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) | 622 | (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) |
602 | continue; | 623 | continue; |
603 | 624 | ||
625 | error_seen = true; | ||
626 | |||
604 | mce_read_aux(&m, i); | 627 | mce_read_aux(&m, i); |
605 | 628 | ||
606 | if (!(flags & MCP_TIMESTAMP)) | 629 | if (!(flags & MCP_TIMESTAMP)) |
@@ -608,27 +631,24 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
608 | 631 | ||
609 | severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); | 632 | severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); |
610 | 633 | ||
611 | /* | 634 | if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) |
612 | * In the cases where we don't have a valid address after all, | 635 | if (m.status & MCI_STATUS_ADDRV) |
613 | * do not add it into the ring buffer. | ||
614 | */ | ||
615 | if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) { | ||
616 | if (m.status & MCI_STATUS_ADDRV) { | ||
617 | m.severity = severity; | 636 | m.severity = severity; |
618 | m.usable_addr = mce_usable_address(&m); | ||
619 | |||
620 | if (!mce_gen_pool_add(&m)) | ||
621 | mce_schedule_work(); | ||
622 | } | ||
623 | } | ||
624 | 637 | ||
625 | /* | 638 | /* |
626 | * Don't get the IP here because it's unlikely to | 639 | * Don't get the IP here because it's unlikely to |
627 | * have anything to do with the actual error location. | 640 | * have anything to do with the actual error location. |
628 | */ | 641 | */ |
629 | if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) { | 642 | if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) |
630 | error_logged = true; | ||
631 | mce_log(&m); | 643 | mce_log(&m); |
644 | else if (mce_usable_address(&m)) { | ||
645 | /* | ||
646 | * Although we skipped logging this, we still want | ||
647 | * to take action. Add to the pool so the registered | ||
648 | * notifiers will see it. | ||
649 | */ | ||
650 | if (!mce_gen_pool_add(&m)) | ||
651 | mce_schedule_work(); | ||
632 | } | 652 | } |
633 | 653 | ||
634 | /* | 654 | /* |
@@ -644,7 +664,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
644 | 664 | ||
645 | sync_core(); | 665 | sync_core(); |
646 | 666 | ||
647 | return error_logged; | 667 | return error_seen; |
648 | } | 668 | } |
649 | EXPORT_SYMBOL_GPL(machine_check_poll); | 669 | EXPORT_SYMBOL_GPL(machine_check_poll); |
650 | 670 | ||
@@ -931,23 +951,6 @@ reset: | |||
931 | return ret; | 951 | return ret; |
932 | } | 952 | } |
933 | 953 | ||
934 | /* | ||
935 | * Check if the address reported by the CPU is in a format we can parse. | ||
936 | * It would be possible to add code for most other cases, but all would | ||
937 | * be somewhat complicated (e.g. segment offset would require an instruction | ||
938 | * parser). So only support physical addresses up to page granuality for now. | ||
939 | */ | ||
940 | static int mce_usable_address(struct mce *m) | ||
941 | { | ||
942 | if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) | ||
943 | return 0; | ||
944 | if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) | ||
945 | return 0; | ||
946 | if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) | ||
947 | return 0; | ||
948 | return 1; | ||
949 | } | ||
950 | |||
951 | static void mce_clear_state(unsigned long *toclear) | 954 | static void mce_clear_state(unsigned long *toclear) |
952 | { | 955 | { |
953 | int i; | 956 | int i; |
@@ -1100,7 +1103,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1100 | 1103 | ||
1101 | /* assuming valid severity level != 0 */ | 1104 | /* assuming valid severity level != 0 */ |
1102 | m.severity = severity; | 1105 | m.severity = severity; |
1103 | m.usable_addr = mce_usable_address(&m); | ||
1104 | 1106 | ||
1105 | mce_log(&m); | 1107 | mce_log(&m); |
1106 | 1108 | ||