diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-06 22:54:57 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-06 22:54:57 -0400 |
| commit | ffa6f55eb6188ee73339cab710fabf30d13110a7 (patch) | |
| tree | 75d28b242e5807b88bf3074d23421592b81d6cc6 | |
| parent | 275b103a26e218b3d739e5ab15be6b40303a1428 (diff) | |
| parent | 71a84402b93e5fbd8f817f40059c137e10171788 (diff) | |
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Borislav Petkov:
- Support for varying MCA bank numbers per CPU: this is in preparation
for future CPU enablement (Yazen Ghannam)
- MCA banks read race fix (Tony Luck)
- Facility to filter MCEs which should not be logged (Yazen Ghannam)
- The usual round of cleanups and fixes
* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/MCE/AMD: Don't report L1 BTB MCA errors on some family 17h models
x86/MCE: Add an MCE-record filtering function
RAS/CEC: Increment cec_entered under the mutex lock
x86/mce: Fix debugfs_simple_attr.cocci warnings
x86/mce: Remove mce_report_event()
x86/mce: Handle varying MCA bank counts
x86/mce: Fix machine_check_poll() tests for error types
MAINTAINERS: Fix file pattern for X86 MCE INFRASTRUCTURE
x86/MCE: Group AMD function prototypes in <asm/mce.h>
| -rw-r--r-- | MAINTAINERS | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/mce.h | 25 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mce/amd.c | 52 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mce/core.c | 102 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mce/genpool.c | 3 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mce/inject.c | 14 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mce/internal.h | 9 | ||||
| -rw-r--r-- | drivers/edac/mce_amd.c | 4 | ||||
| -rw-r--r-- | drivers/ras/cec.c | 4 |
9 files changed, 132 insertions, 83 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 58c1b7a35711..6d3c23bc6d15 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -16941,7 +16941,7 @@ M: Tony Luck <tony.luck@intel.com> | |||
| 16941 | M: Borislav Petkov <bp@alien8.de> | 16941 | M: Borislav Petkov <bp@alien8.de> |
| 16942 | L: linux-edac@vger.kernel.org | 16942 | L: linux-edac@vger.kernel.org |
| 16943 | S: Maintained | 16943 | S: Maintained |
| 16944 | F: arch/x86/kernel/cpu/mcheck/* | 16944 | F: arch/x86/kernel/cpu/mce/* |
| 16945 | 16945 | ||
| 16946 | X86 MICROCODE UPDATE SUPPORT | 16946 | X86 MICROCODE UPDATE SUPPORT |
| 16947 | M: Borislav Petkov <bp@alien8.de> | 16947 | M: Borislav Petkov <bp@alien8.de> |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 22d05e3835f0..dc2d4b206ab7 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
| @@ -210,16 +210,6 @@ static inline void cmci_rediscover(void) {} | |||
| 210 | static inline void cmci_recheck(void) {} | 210 | static inline void cmci_recheck(void) {} |
| 211 | #endif | 211 | #endif |
| 212 | 212 | ||
| 213 | #ifdef CONFIG_X86_MCE_AMD | ||
| 214 | void mce_amd_feature_init(struct cpuinfo_x86 *c); | ||
| 215 | int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr); | ||
| 216 | #else | ||
| 217 | static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } | ||
| 218 | static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; }; | ||
| 219 | #endif | ||
| 220 | |||
| 221 | static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } | ||
| 222 | |||
| 223 | int mce_available(struct cpuinfo_x86 *c); | 213 | int mce_available(struct cpuinfo_x86 *c); |
| 224 | bool mce_is_memory_error(struct mce *m); | 214 | bool mce_is_memory_error(struct mce *m); |
| 225 | bool mce_is_correctable(struct mce *m); | 215 | bool mce_is_correctable(struct mce *m); |
| @@ -345,12 +335,19 @@ extern bool amd_mce_is_memory_error(struct mce *m); | |||
| 345 | extern int mce_threshold_create_device(unsigned int cpu); | 335 | extern int mce_threshold_create_device(unsigned int cpu); |
| 346 | extern int mce_threshold_remove_device(unsigned int cpu); | 336 | extern int mce_threshold_remove_device(unsigned int cpu); |
| 347 | 337 | ||
| 348 | #else | 338 | void mce_amd_feature_init(struct cpuinfo_x86 *c); |
| 339 | int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr); | ||
| 349 | 340 | ||
| 350 | static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; | 341 | #else |
| 351 | static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; | ||
| 352 | static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; | ||
| 353 | 342 | ||
| 343 | static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; | ||
| 344 | static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; | ||
| 345 | static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; | ||
| 346 | static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } | ||
| 347 | static inline int | ||
| 348 | umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; }; | ||
| 354 | #endif | 349 | #endif |
| 355 | 350 | ||
| 351 | static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } | ||
| 352 | |||
| 356 | #endif /* _ASM_X86_MCE_H */ | 353 | #endif /* _ASM_X86_MCE_H */ |
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index e64de5149e50..d904aafe6409 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c | |||
| @@ -563,33 +563,59 @@ out: | |||
| 563 | return offset; | 563 | return offset; |
| 564 | } | 564 | } |
| 565 | 565 | ||
| 566 | bool amd_filter_mce(struct mce *m) | ||
| 567 | { | ||
| 568 | enum smca_bank_types bank_type = smca_get_bank_type(m->bank); | ||
| 569 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
| 570 | u8 xec = (m->status >> 16) & 0x3F; | ||
| 571 | |||
| 572 | /* See Family 17h Models 10h-2Fh Erratum #1114. */ | ||
| 573 | if (c->x86 == 0x17 && | ||
| 574 | c->x86_model >= 0x10 && c->x86_model <= 0x2F && | ||
| 575 | bank_type == SMCA_IF && xec == 10) | ||
| 576 | return true; | ||
| 577 | |||
| 578 | return false; | ||
| 579 | } | ||
| 580 | |||
| 566 | /* | 581 | /* |
| 567 | * Turn off MC4_MISC thresholding banks on all family 0x15 models since | 582 | * Turn off thresholding banks for the following conditions: |
| 568 | * they're not supported there. | 583 | * - MC4_MISC thresholding is not supported on Family 0x15. |
| 584 | * - Prevent possible spurious interrupts from the IF bank on Family 0x17 | ||
| 585 | * Models 0x10-0x2F due to Erratum #1114. | ||
| 569 | */ | 586 | */ |
| 570 | void disable_err_thresholding(struct cpuinfo_x86 *c) | 587 | void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) |
| 571 | { | 588 | { |
| 572 | int i; | 589 | int i, num_msrs; |
| 573 | u64 hwcr; | 590 | u64 hwcr; |
| 574 | bool need_toggle; | 591 | bool need_toggle; |
| 575 | u32 msrs[] = { | 592 | u32 msrs[NR_BLOCKS]; |
| 576 | 0x00000413, /* MC4_MISC0 */ | 593 | |
| 577 | 0xc0000408, /* MC4_MISC1 */ | 594 | if (c->x86 == 0x15 && bank == 4) { |
| 578 | }; | 595 | msrs[0] = 0x00000413; /* MC4_MISC0 */ |
| 596 | msrs[1] = 0xc0000408; /* MC4_MISC1 */ | ||
| 597 | num_msrs = 2; | ||
| 598 | } else if (c->x86 == 0x17 && | ||
| 599 | (c->x86_model >= 0x10 && c->x86_model <= 0x2F)) { | ||
| 579 | 600 | ||
| 580 | if (c->x86 != 0x15) | 601 | if (smca_get_bank_type(bank) != SMCA_IF) |
| 602 | return; | ||
| 603 | |||
| 604 | msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank); | ||
| 605 | num_msrs = 1; | ||
| 606 | } else { | ||
| 581 | return; | 607 | return; |
| 608 | } | ||
| 582 | 609 | ||
| 583 | rdmsrl(MSR_K7_HWCR, hwcr); | 610 | rdmsrl(MSR_K7_HWCR, hwcr); |
| 584 | 611 | ||
| 585 | /* McStatusWrEn has to be set */ | 612 | /* McStatusWrEn has to be set */ |
| 586 | need_toggle = !(hwcr & BIT(18)); | 613 | need_toggle = !(hwcr & BIT(18)); |
| 587 | |||
| 588 | if (need_toggle) | 614 | if (need_toggle) |
| 589 | wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); | 615 | wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); |
| 590 | 616 | ||
| 591 | /* Clear CntP bit safely */ | 617 | /* Clear CntP bit safely */ |
| 592 | for (i = 0; i < ARRAY_SIZE(msrs); i++) | 618 | for (i = 0; i < num_msrs; i++) |
| 593 | msr_clear_bit(msrs[i], 62); | 619 | msr_clear_bit(msrs[i], 62); |
| 594 | 620 | ||
| 595 | /* restore old settings */ | 621 | /* restore old settings */ |
| @@ -604,12 +630,12 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
| 604 | unsigned int bank, block, cpu = smp_processor_id(); | 630 | unsigned int bank, block, cpu = smp_processor_id(); |
| 605 | int offset = -1; | 631 | int offset = -1; |
| 606 | 632 | ||
| 607 | disable_err_thresholding(c); | ||
| 608 | |||
| 609 | for (bank = 0; bank < mca_cfg.banks; ++bank) { | 633 | for (bank = 0; bank < mca_cfg.banks; ++bank) { |
| 610 | if (mce_flags.smca) | 634 | if (mce_flags.smca) |
| 611 | smca_configure(bank, cpu); | 635 | smca_configure(bank, cpu); |
| 612 | 636 | ||
| 637 | disable_err_thresholding(c, bank); | ||
| 638 | |||
| 613 | for (block = 0; block < NR_BLOCKS; ++block) { | 639 | for (block = 0; block < NR_BLOCKS; ++block) { |
| 614 | address = get_block_address(address, low, high, bank, block); | 640 | address = get_block_address(address, low, high, bank, block); |
| 615 | if (!address) | 641 | if (!address) |
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index b7fb541a4873..5112a50e6486 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c | |||
| @@ -460,23 +460,6 @@ static void mce_irq_work_cb(struct irq_work *entry) | |||
| 460 | mce_schedule_work(); | 460 | mce_schedule_work(); |
| 461 | } | 461 | } |
| 462 | 462 | ||
| 463 | static void mce_report_event(struct pt_regs *regs) | ||
| 464 | { | ||
| 465 | if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { | ||
| 466 | mce_notify_irq(); | ||
| 467 | /* | ||
| 468 | * Triggering the work queue here is just an insurance | ||
| 469 | * policy in case the syscall exit notify handler | ||
| 470 | * doesn't run soon enough or ends up running on the | ||
| 471 | * wrong CPU (can happen when audit sleeps) | ||
| 472 | */ | ||
| 473 | mce_schedule_work(); | ||
| 474 | return; | ||
| 475 | } | ||
| 476 | |||
| 477 | irq_work_queue(&mce_irq_work); | ||
| 478 | } | ||
| 479 | |||
| 480 | /* | 463 | /* |
| 481 | * Check if the address reported by the CPU is in a format we can parse. | 464 | * Check if the address reported by the CPU is in a format we can parse. |
| 482 | * It would be possible to add code for most other cases, but all would | 465 | * It would be possible to add code for most other cases, but all would |
| @@ -712,19 +695,49 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
| 712 | 695 | ||
| 713 | barrier(); | 696 | barrier(); |
| 714 | m.status = mce_rdmsrl(msr_ops.status(i)); | 697 | m.status = mce_rdmsrl(msr_ops.status(i)); |
| 698 | |||
| 699 | /* If this entry is not valid, ignore it */ | ||
| 715 | if (!(m.status & MCI_STATUS_VAL)) | 700 | if (!(m.status & MCI_STATUS_VAL)) |
| 716 | continue; | 701 | continue; |
| 717 | 702 | ||
| 718 | /* | 703 | /* |
| 719 | * Uncorrected or signalled events are handled by the exception | 704 | * If we are logging everything (at CPU online) or this |
| 720 | * handler when it is enabled, so don't process those here. | 705 | * is a corrected error, then we must log it. |
| 721 | * | ||
| 722 | * TBD do the same check for MCI_STATUS_EN here? | ||
| 723 | */ | 706 | */ |
| 724 | if (!(flags & MCP_UC) && | 707 | if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC)) |
| 725 | (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) | 708 | goto log_it; |
| 726 | continue; | 709 | |
| 710 | /* | ||
| 711 | * Newer Intel systems that support software error | ||
| 712 | * recovery need to make additional checks. Other | ||
| 713 | * CPUs should skip over uncorrected errors, but log | ||
| 714 | * everything else. | ||
| 715 | */ | ||
| 716 | if (!mca_cfg.ser) { | ||
| 717 | if (m.status & MCI_STATUS_UC) | ||
| 718 | continue; | ||
| 719 | goto log_it; | ||
| 720 | } | ||
| 727 | 721 | ||
| 722 | /* Log "not enabled" (speculative) errors */ | ||
| 723 | if (!(m.status & MCI_STATUS_EN)) | ||
| 724 | goto log_it; | ||
| 725 | |||
| 726 | /* | ||
| 727 | * Log UCNA (SDM: 15.6.3 "UCR Error Classification") | ||
| 728 | * UC == 1 && PCC == 0 && S == 0 | ||
| 729 | */ | ||
| 730 | if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S)) | ||
| 731 | goto log_it; | ||
| 732 | |||
| 733 | /* | ||
| 734 | * Skip anything else. Presumption is that our read of this | ||
| 735 | * bank is racing with a machine check. Leave the log alone | ||
| 736 | * for do_machine_check() to deal with it. | ||
| 737 | */ | ||
| 738 | continue; | ||
| 739 | |||
| 740 | log_it: | ||
| 728 | error_seen = true; | 741 | error_seen = true; |
| 729 | 742 | ||
| 730 | mce_read_aux(&m, i); | 743 | mce_read_aux(&m, i); |
| @@ -1301,7 +1314,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
| 1301 | mce_panic("Fatal machine check on current CPU", &m, msg); | 1314 | mce_panic("Fatal machine check on current CPU", &m, msg); |
| 1302 | 1315 | ||
| 1303 | if (worst > 0) | 1316 | if (worst > 0) |
| 1304 | mce_report_event(regs); | 1317 | irq_work_queue(&mce_irq_work); |
| 1318 | |||
| 1305 | mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); | 1319 | mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); |
| 1306 | 1320 | ||
| 1307 | sync_core(); | 1321 | sync_core(); |
| @@ -1451,13 +1465,12 @@ EXPORT_SYMBOL_GPL(mce_notify_irq); | |||
| 1451 | static int __mcheck_cpu_mce_banks_init(void) | 1465 | static int __mcheck_cpu_mce_banks_init(void) |
| 1452 | { | 1466 | { |
| 1453 | int i; | 1467 | int i; |
| 1454 | u8 num_banks = mca_cfg.banks; | ||
| 1455 | 1468 | ||
| 1456 | mce_banks = kcalloc(num_banks, sizeof(struct mce_bank), GFP_KERNEL); | 1469 | mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL); |
| 1457 | if (!mce_banks) | 1470 | if (!mce_banks) |
| 1458 | return -ENOMEM; | 1471 | return -ENOMEM; |
| 1459 | 1472 | ||
| 1460 | for (i = 0; i < num_banks; i++) { | 1473 | for (i = 0; i < MAX_NR_BANKS; i++) { |
| 1461 | struct mce_bank *b = &mce_banks[i]; | 1474 | struct mce_bank *b = &mce_banks[i]; |
| 1462 | 1475 | ||
| 1463 | b->ctl = -1ULL; | 1476 | b->ctl = -1ULL; |
| @@ -1471,28 +1484,19 @@ static int __mcheck_cpu_mce_banks_init(void) | |||
| 1471 | */ | 1484 | */ |
| 1472 | static int __mcheck_cpu_cap_init(void) | 1485 | static int __mcheck_cpu_cap_init(void) |
| 1473 | { | 1486 | { |
| 1474 | unsigned b; | ||
| 1475 | u64 cap; | 1487 | u64 cap; |
| 1488 | u8 b; | ||
| 1476 | 1489 | ||
| 1477 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 1490 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
| 1478 | 1491 | ||
| 1479 | b = cap & MCG_BANKCNT_MASK; | 1492 | b = cap & MCG_BANKCNT_MASK; |
| 1480 | if (!mca_cfg.banks) | 1493 | if (WARN_ON_ONCE(b > MAX_NR_BANKS)) |
| 1481 | pr_info("CPU supports %d MCE banks\n", b); | ||
| 1482 | |||
| 1483 | if (b > MAX_NR_BANKS) { | ||
| 1484 | pr_warn("Using only %u machine check banks out of %u\n", | ||
| 1485 | MAX_NR_BANKS, b); | ||
| 1486 | b = MAX_NR_BANKS; | 1494 | b = MAX_NR_BANKS; |
| 1487 | } | ||
| 1488 | 1495 | ||
| 1489 | /* Don't support asymmetric configurations today */ | 1496 | mca_cfg.banks = max(mca_cfg.banks, b); |
| 1490 | WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks); | ||
| 1491 | mca_cfg.banks = b; | ||
| 1492 | 1497 | ||
| 1493 | if (!mce_banks) { | 1498 | if (!mce_banks) { |
| 1494 | int err = __mcheck_cpu_mce_banks_init(); | 1499 | int err = __mcheck_cpu_mce_banks_init(); |
| 1495 | |||
| 1496 | if (err) | 1500 | if (err) |
| 1497 | return err; | 1501 | return err; |
| 1498 | } | 1502 | } |
| @@ -1771,6 +1775,14 @@ static void __mcheck_cpu_init_timer(void) | |||
| 1771 | mce_start_timer(t); | 1775 | mce_start_timer(t); |
| 1772 | } | 1776 | } |
| 1773 | 1777 | ||
| 1778 | bool filter_mce(struct mce *m) | ||
| 1779 | { | ||
| 1780 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | ||
| 1781 | return amd_filter_mce(m); | ||
| 1782 | |||
| 1783 | return false; | ||
| 1784 | } | ||
| 1785 | |||
| 1774 | /* Handle unconfigured int18 (should never happen) */ | 1786 | /* Handle unconfigured int18 (should never happen) */ |
| 1775 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | 1787 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) |
| 1776 | { | 1788 | { |
| @@ -2425,8 +2437,8 @@ static int fake_panic_set(void *data, u64 val) | |||
| 2425 | return 0; | 2437 | return 0; |
| 2426 | } | 2438 | } |
| 2427 | 2439 | ||
| 2428 | DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, | 2440 | DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set, |
| 2429 | fake_panic_set, "%llu\n"); | 2441 | "%llu\n"); |
| 2430 | 2442 | ||
| 2431 | static int __init mcheck_debugfs_init(void) | 2443 | static int __init mcheck_debugfs_init(void) |
| 2432 | { | 2444 | { |
| @@ -2435,8 +2447,8 @@ static int __init mcheck_debugfs_init(void) | |||
| 2435 | dmce = mce_get_debugfs_dir(); | 2447 | dmce = mce_get_debugfs_dir(); |
| 2436 | if (!dmce) | 2448 | if (!dmce) |
| 2437 | return -ENOMEM; | 2449 | return -ENOMEM; |
| 2438 | ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL, | 2450 | ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce, |
| 2439 | &fake_panic_fops); | 2451 | NULL, &fake_panic_fops); |
| 2440 | if (!ffake_panic) | 2452 | if (!ffake_panic) |
| 2441 | return -ENOMEM; | 2453 | return -ENOMEM; |
| 2442 | 2454 | ||
| @@ -2451,6 +2463,8 @@ EXPORT_SYMBOL_GPL(mcsafe_key); | |||
| 2451 | 2463 | ||
| 2452 | static int __init mcheck_late_init(void) | 2464 | static int __init mcheck_late_init(void) |
| 2453 | { | 2465 | { |
| 2466 | pr_info("Using %d MCE banks\n", mca_cfg.banks); | ||
| 2467 | |||
| 2454 | if (mca_cfg.recovery) | 2468 | if (mca_cfg.recovery) |
| 2455 | static_branch_inc(&mcsafe_key); | 2469 | static_branch_inc(&mcsafe_key); |
| 2456 | 2470 | ||
diff --git a/arch/x86/kernel/cpu/mce/genpool.c b/arch/x86/kernel/cpu/mce/genpool.c index 3395549c51d3..64d1d5a00f39 100644 --- a/arch/x86/kernel/cpu/mce/genpool.c +++ b/arch/x86/kernel/cpu/mce/genpool.c | |||
| @@ -99,6 +99,9 @@ int mce_gen_pool_add(struct mce *mce) | |||
| 99 | { | 99 | { |
| 100 | struct mce_evt_llist *node; | 100 | struct mce_evt_llist *node; |
| 101 | 101 | ||
| 102 | if (filter_mce(mce)) | ||
| 103 | return -EINVAL; | ||
| 104 | |||
| 102 | if (!mce_evt_pool) | 105 | if (!mce_evt_pool) |
| 103 | return -EINVAL; | 106 | return -EINVAL; |
| 104 | 107 | ||
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 3da9a8823e47..a6026170af92 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c | |||
| @@ -46,8 +46,6 @@ | |||
| 46 | static struct mce i_mce; | 46 | static struct mce i_mce; |
| 47 | static struct dentry *dfs_inj; | 47 | static struct dentry *dfs_inj; |
| 48 | 48 | ||
| 49 | static u8 n_banks; | ||
| 50 | |||
| 51 | #define MAX_FLAG_OPT_SIZE 4 | 49 | #define MAX_FLAG_OPT_SIZE 4 |
| 52 | #define NBCFG 0x44 | 50 | #define NBCFG 0x44 |
| 53 | 51 | ||
| @@ -570,9 +568,15 @@ err: | |||
| 570 | static int inj_bank_set(void *data, u64 val) | 568 | static int inj_bank_set(void *data, u64 val) |
| 571 | { | 569 | { |
| 572 | struct mce *m = (struct mce *)data; | 570 | struct mce *m = (struct mce *)data; |
| 571 | u8 n_banks; | ||
| 572 | u64 cap; | ||
| 573 | |||
| 574 | /* Get bank count on target CPU so we can handle non-uniform values. */ | ||
| 575 | rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap); | ||
| 576 | n_banks = cap & MCG_BANKCNT_MASK; | ||
| 573 | 577 | ||
| 574 | if (val >= n_banks) { | 578 | if (val >= n_banks) { |
| 575 | pr_err("Non-existent MCE bank: %llu\n", val); | 579 | pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu); |
| 576 | return -EINVAL; | 580 | return -EINVAL; |
| 577 | } | 581 | } |
| 578 | 582 | ||
| @@ -665,10 +669,6 @@ static struct dfs_node { | |||
| 665 | static int __init debugfs_init(void) | 669 | static int __init debugfs_init(void) |
| 666 | { | 670 | { |
| 667 | unsigned int i; | 671 | unsigned int i; |
| 668 | u64 cap; | ||
| 669 | |||
| 670 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
| 671 | n_banks = cap & MCG_BANKCNT_MASK; | ||
| 672 | 672 | ||
| 673 | dfs_inj = debugfs_create_dir("mce-inject", NULL); | 673 | dfs_inj = debugfs_create_dir("mce-inject", NULL); |
| 674 | if (!dfs_inj) | 674 | if (!dfs_inj) |
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index af5eab1e65e2..a34b55baa7aa 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h | |||
| @@ -173,4 +173,13 @@ struct mca_msr_regs { | |||
| 173 | 173 | ||
| 174 | extern struct mca_msr_regs msr_ops; | 174 | extern struct mca_msr_regs msr_ops; |
| 175 | 175 | ||
| 176 | /* Decide whether to add MCE record to MCE event pool or filter it out. */ | ||
| 177 | extern bool filter_mce(struct mce *m); | ||
| 178 | |||
| 179 | #ifdef CONFIG_X86_MCE_AMD | ||
| 180 | extern bool amd_filter_mce(struct mce *m); | ||
| 181 | #else | ||
| 182 | static inline bool amd_filter_mce(struct mce *m) { return false; }; | ||
| 183 | #endif | ||
| 184 | |||
| 176 | #endif /* __X86_MCE_INTERNAL_H__ */ | 185 | #endif /* __X86_MCE_INTERNAL_H__ */ |
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 0a1814dad6cf..bb0202ad7a13 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c | |||
| @@ -1004,7 +1004,7 @@ static inline void amd_decode_err_code(u16 ec) | |||
| 1004 | /* | 1004 | /* |
| 1005 | * Filter out unwanted MCE signatures here. | 1005 | * Filter out unwanted MCE signatures here. |
| 1006 | */ | 1006 | */ |
| 1007 | static bool amd_filter_mce(struct mce *m) | 1007 | static bool ignore_mce(struct mce *m) |
| 1008 | { | 1008 | { |
| 1009 | /* | 1009 | /* |
| 1010 | * NB GART TLB error reporting is disabled by default. | 1010 | * NB GART TLB error reporting is disabled by default. |
| @@ -1038,7 +1038,7 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) | |||
| 1038 | unsigned int fam = x86_family(m->cpuid); | 1038 | unsigned int fam = x86_family(m->cpuid); |
| 1039 | int ecc; | 1039 | int ecc; |
| 1040 | 1040 | ||
| 1041 | if (amd_filter_mce(m)) | 1041 | if (ignore_mce(m)) |
| 1042 | return NOTIFY_STOP; | 1042 | return NOTIFY_STOP; |
| 1043 | 1043 | ||
| 1044 | pr_emerg(HW_ERR "%s\n", decode_error_status(m)); | 1044 | pr_emerg(HW_ERR "%s\n", decode_error_status(m)); |
diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index 2d9ec378a8bc..88e4f3ff0cb8 100644 --- a/drivers/ras/cec.c +++ b/drivers/ras/cec.c | |||
| @@ -286,10 +286,10 @@ int cec_add_elem(u64 pfn) | |||
| 286 | if (!ce_arr.array || ce_arr.disabled) | 286 | if (!ce_arr.array || ce_arr.disabled) |
| 287 | return -ENODEV; | 287 | return -ENODEV; |
| 288 | 288 | ||
| 289 | ca->ces_entered++; | ||
| 290 | |||
| 291 | mutex_lock(&ce_mutex); | 289 | mutex_lock(&ce_mutex); |
| 292 | 290 | ||
| 291 | ca->ces_entered++; | ||
| 292 | |||
| 293 | if (ca->n == MAX_ELEMS) | 293 | if (ca->n == MAX_ELEMS) |
| 294 | WARN_ON(!del_lru_elem_unlocked(ca)); | 294 | WARN_ON(!del_lru_elem_unlocked(ca)); |
| 295 | 295 | ||
