diff options
| author | Tony Luck <tony.luck@intel.com> | 2012-07-19 14:28:46 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2012-07-26 09:05:47 -0400 |
| commit | 61b0fccd7f114573f973dfe25d864608822dc09e (patch) | |
| tree | 8ba7f8a8d984cba1ae1c1a528e96309e6a73cd2a | |
| parent | 736edce5f395b8309a61aa62c36c4356abc83219 (diff) | |
x86/mce: Add quirk for instruction recovery on Sandy Bridge processors
Sandy Bridge processors follow the SDM (Vol 3B, Table 15-20) and
set both the RIPV and EIPV bits in the MCG_STATUS register to
zero for machine checks during instruction fetch. This is more
than a little counter-intuitive and means that Linux cannot
recover from these errors. Rather than insert special case code
at several places in mce.c and mce-severity.c, we pretend the
EIPV bit was set for just this case early in processing the
machine check.
Acked-by: Borislav Petkov <bp@amd64.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Cc: Chen Gong <gong.chen@linux.intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Link: http://lkml.kernel.org/r/180a06f3f357cf9f78259ae443a082b14a29535b.1343078495.git.tony.luck@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 43 |
1 files changed, 40 insertions, 3 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5a5a5dc1ff15..8cf60e29790a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
| @@ -105,6 +105,8 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | |||
| 105 | 105 | ||
| 106 | static DEFINE_PER_CPU(struct work_struct, mce_work); | 106 | static DEFINE_PER_CPU(struct work_struct, mce_work); |
| 107 | 107 | ||
| 108 | static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); | ||
| 109 | |||
| 108 | /* | 110 | /* |
| 109 | * CPU/chipset specific EDAC code can register a notifier call here to print | 111 | * CPU/chipset specific EDAC code can register a notifier call here to print |
| 110 | * MCE errors in a human-readable form. | 112 | * MCE errors in a human-readable form. |
| @@ -652,14 +654,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll); | |||
| 652 | * Do a quick check if any of the events requires a panic. | 654 | * Do a quick check if any of the events requires a panic. |
| 653 | * This decides if we keep the events around or clear them. | 655 | * This decides if we keep the events around or clear them. |
| 654 | */ | 656 | */ |
| 655 | static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp) | 657 | static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, |
| 658 | struct pt_regs *regs) | ||
| 656 | { | 659 | { |
| 657 | int i, ret = 0; | 660 | int i, ret = 0; |
| 658 | 661 | ||
| 659 | for (i = 0; i < banks; i++) { | 662 | for (i = 0; i < banks; i++) { |
| 660 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); | 663 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
| 661 | if (m->status & MCI_STATUS_VAL) | 664 | if (m->status & MCI_STATUS_VAL) { |
| 662 | __set_bit(i, validp); | 665 | __set_bit(i, validp); |
| 666 | if (quirk_no_way_out) | ||
| 667 | quirk_no_way_out(i, m, regs); | ||
| 668 | } | ||
| 663 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) | 669 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) |
| 664 | ret = 1; | 670 | ret = 1; |
| 665 | } | 671 | } |
| @@ -1042,7 +1048,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
| 1042 | *final = m; | 1048 | *final = m; |
| 1043 | 1049 | ||
| 1044 | memset(valid_banks, 0, sizeof(valid_banks)); | 1050 | memset(valid_banks, 0, sizeof(valid_banks)); |
| 1045 | no_way_out = mce_no_way_out(&m, &msg, valid_banks); | 1051 | no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs); |
| 1046 | 1052 | ||
| 1047 | barrier(); | 1053 | barrier(); |
| 1048 | 1054 | ||
| @@ -1418,6 +1424,34 @@ static void __mcheck_cpu_init_generic(void) | |||
| 1418 | } | 1424 | } |
| 1419 | } | 1425 | } |
| 1420 | 1426 | ||
| 1427 | /* | ||
| 1428 | * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and | ||
| 1429 | * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM | ||
| 1430 | * Vol 3B Table 15-20). But this confuses both the code that determines | ||
| 1431 | * whether the machine check occurred in kernel or user mode, and also | ||
| 1432 | * the severity assessment code. Pretend that EIPV was set, and take the | ||
| 1433 | * ip/cs values from the pt_regs that mce_gather_info() ignored earlier. | ||
| 1434 | */ | ||
| 1435 | static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs) | ||
| 1436 | { | ||
| 1437 | if (bank != 0) | ||
| 1438 | return; | ||
| 1439 | if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0) | ||
| 1440 | return; | ||
| 1441 | if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC| | ||
| 1442 | MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV| | ||
| 1443 | MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR| | ||
| 1444 | MCACOD)) != | ||
| 1445 | (MCI_STATUS_UC|MCI_STATUS_EN| | ||
| 1446 | MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S| | ||
| 1447 | MCI_STATUS_AR|MCACOD_INSTR)) | ||
| 1448 | return; | ||
| 1449 | |||
| 1450 | m->mcgstatus |= MCG_STATUS_EIPV; | ||
| 1451 | m->ip = regs->ip; | ||
| 1452 | m->cs = regs->cs; | ||
| 1453 | } | ||
| 1454 | |||
| 1421 | /* Add per CPU specific workarounds here */ | 1455 | /* Add per CPU specific workarounds here */ |
| 1422 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | 1456 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) |
| 1423 | { | 1457 | { |
| @@ -1515,6 +1549,9 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
| 1515 | */ | 1549 | */ |
| 1516 | if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) | 1550 | if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) |
| 1517 | mce_bootlog = 0; | 1551 | mce_bootlog = 0; |
| 1552 | |||
| 1553 | if (c->x86 == 6 && c->x86_model == 45) | ||
| 1554 | quirk_no_way_out = quirk_sandybridge_ifu; | ||
| 1518 | } | 1555 | } |
| 1519 | if (monarch_timeout < 0) | 1556 | if (monarch_timeout < 0) |
| 1520 | monarch_timeout = 0; | 1557 | monarch_timeout = 0; |
