aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-11 18:07:19 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-11 18:07:19 -0500
commit4bd20db2c027eab7490e3c0466734738bef2dd24 (patch)
treecd9c3aa1444728d10938fe271316b392f387f991
parent5cb52b5e1654f3f1ed9c32e34456d98559c85aa0 (diff)
parentfeab21f8356bde572663e29c9d9e48c964292e05 (diff)
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Ingo Molnar: "Various x86 MCE fixes and small enhancements" * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Make usable address checks Intel-only x86/mce: Add the missing memory error check on AMD x86/RAS: Remove mce.usable_addr x86/mce: Do not enter deferred errors into the generic pool twice
-rw-r--r--arch/x86/include/uapi/asm/mce.h2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c82
2 files changed, 43 insertions, 41 deletions
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 03429da2fa80..2184943341bf 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -16,7 +16,7 @@ struct mce {
16 __u8 cpuvendor; /* cpu vendor as encoded in system.h */ 16 __u8 cpuvendor; /* cpu vendor as encoded in system.h */
17 __u8 inject_flags; /* software inject flags */ 17 __u8 inject_flags; /* software inject flags */
18 __u8 severity; 18 __u8 severity;
19 __u8 usable_addr; 19 __u8 pad;
20 __u32 cpuid; /* CPUID 1 EAX */ 20 __u32 cpuid; /* CPUID 1 EAX */
21 __u8 cs; /* code segment */ 21 __u8 cs; /* code segment */
22 __u8 bank; /* machine check bank */ 22 __u8 bank; /* machine check bank */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7e8a736d09db..a006f4cd792b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -114,7 +114,6 @@ static struct work_struct mce_work;
114static struct irq_work mce_irq_work; 114static struct irq_work mce_irq_work;
115 115
116static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); 116static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
117static int mce_usable_address(struct mce *m);
118 117
119/* 118/*
120 * CPU/chipset specific EDAC code can register a notifier call here to print 119 * CPU/chipset specific EDAC code can register a notifier call here to print
@@ -475,6 +474,28 @@ static void mce_report_event(struct pt_regs *regs)
475 irq_work_queue(&mce_irq_work); 474 irq_work_queue(&mce_irq_work);
476} 475}
477 476
477/*
478 * Check if the address reported by the CPU is in a format we can parse.
479 * It would be possible to add code for most other cases, but all would
480 * be somewhat complicated (e.g. segment offset would require an instruction
481 * parser). So only support physical addresses up to page granuality for now.
482 */
483static int mce_usable_address(struct mce *m)
484{
485 if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
486 return 0;
487
488 /* Checks after this one are Intel-specific: */
489 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
490 return 1;
491
492 if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
493 return 0;
494 if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
495 return 0;
496 return 1;
497}
498
478static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, 499static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
479 void *data) 500 void *data)
480{ 501{
@@ -484,7 +505,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
484 if (!mce) 505 if (!mce)
485 return NOTIFY_DONE; 506 return NOTIFY_DONE;
486 507
487 if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) { 508 if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
488 pfn = mce->addr >> PAGE_SHIFT; 509 pfn = mce->addr >> PAGE_SHIFT;
489 memory_failure(pfn, MCE_VECTOR, 0); 510 memory_failure(pfn, MCE_VECTOR, 0);
490 } 511 }
@@ -522,10 +543,10 @@ static bool memory_error(struct mce *m)
522 struct cpuinfo_x86 *c = &boot_cpu_data; 543 struct cpuinfo_x86 *c = &boot_cpu_data;
523 544
524 if (c->x86_vendor == X86_VENDOR_AMD) { 545 if (c->x86_vendor == X86_VENDOR_AMD) {
525 /* 546 /* ErrCodeExt[20:16] */
526 * coming soon 547 u8 xec = (m->status >> 16) & 0x1f;
527 */ 548
528 return false; 549 return (xec == 0x0 || xec == 0x8);
529 } else if (c->x86_vendor == X86_VENDOR_INTEL) { 550 } else if (c->x86_vendor == X86_VENDOR_INTEL) {
530 /* 551 /*
531 * Intel SDM Volume 3B - 15.9.2 Compound Error Codes 552 * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
@@ -567,7 +588,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
567 */ 588 */
568bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) 589bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
569{ 590{
570 bool error_logged = false; 591 bool error_seen = false;
571 struct mce m; 592 struct mce m;
572 int severity; 593 int severity;
573 int i; 594 int i;
@@ -601,6 +622,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
601 (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) 622 (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
602 continue; 623 continue;
603 624
625 error_seen = true;
626
604 mce_read_aux(&m, i); 627 mce_read_aux(&m, i);
605 628
606 if (!(flags & MCP_TIMESTAMP)) 629 if (!(flags & MCP_TIMESTAMP))
@@ -608,27 +631,24 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
608 631
609 severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); 632 severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
610 633
611 /* 634 if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m))
612 * In the cases where we don't have a valid address after all, 635 if (m.status & MCI_STATUS_ADDRV)
613 * do not add it into the ring buffer.
614 */
615 if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
616 if (m.status & MCI_STATUS_ADDRV) {
617 m.severity = severity; 636 m.severity = severity;
618 m.usable_addr = mce_usable_address(&m);
619
620 if (!mce_gen_pool_add(&m))
621 mce_schedule_work();
622 }
623 }
624 637
625 /* 638 /*
626 * Don't get the IP here because it's unlikely to 639 * Don't get the IP here because it's unlikely to
627 * have anything to do with the actual error location. 640 * have anything to do with the actual error location.
628 */ 641 */
629 if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) { 642 if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
630 error_logged = true;
631 mce_log(&m); 643 mce_log(&m);
644 else if (mce_usable_address(&m)) {
645 /*
646 * Although we skipped logging this, we still want
647 * to take action. Add to the pool so the registered
648 * notifiers will see it.
649 */
650 if (!mce_gen_pool_add(&m))
651 mce_schedule_work();
632 } 652 }
633 653
634 /* 654 /*
@@ -644,7 +664,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
644 664
645 sync_core(); 665 sync_core();
646 666
647 return error_logged; 667 return error_seen;
648} 668}
649EXPORT_SYMBOL_GPL(machine_check_poll); 669EXPORT_SYMBOL_GPL(machine_check_poll);
650 670
@@ -931,23 +951,6 @@ reset:
931 return ret; 951 return ret;
932} 952}
933 953
934/*
935 * Check if the address reported by the CPU is in a format we can parse.
936 * It would be possible to add code for most other cases, but all would
937 * be somewhat complicated (e.g. segment offset would require an instruction
938 * parser). So only support physical addresses up to page granuality for now.
939 */
940static int mce_usable_address(struct mce *m)
941{
942 if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
943 return 0;
944 if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
945 return 0;
946 if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
947 return 0;
948 return 1;
949}
950
951static void mce_clear_state(unsigned long *toclear) 954static void mce_clear_state(unsigned long *toclear)
952{ 955{
953 int i; 956 int i;
@@ -1100,7 +1103,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1100 1103
1101 /* assuming valid severity level != 0 */ 1104 /* assuming valid severity level != 0 */
1102 m.severity = severity; 1105 m.severity = severity;
1103 m.usable_addr = mce_usable_address(&m);
1104 1106
1105 mce_log(&m); 1107 mce_log(&m);
1106 1108