diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-25 19:14:12 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-25 19:14:12 -0400 |
commit | 786f02b719f2285e2c0350b6a04dab4a1a0871a1 (patch) | |
tree | 0836083fa3bef945a46449c2d6badd5c29d7baad /arch/x86/kernel/cpu/mcheck | |
parent | fa2af6e4fe0c4d2f8875d42625b25675e8584010 (diff) | |
parent | 37c3459b67dd5a396a968e819cf4a86d24ac9ace (diff) |
Merge tag 'x86-mce-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull x86/mce merge window patches from Tony Luck:
"Including two that make error_context() checks less sucky"
* tag 'x86-mce-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
x86/mce: Add instruction recovery signatures to mce-severity table
x86/mce: Fix check for processor context when machine check was taken.
MCE: Fix vm86 handling for 32bit mce handler
x86/mce Add validation check before GHES error is recorded
x86/mce: Avoid reading every machine check bank register twice.
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-apei.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 26 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 24 |
3 files changed, 41 insertions, 12 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 507ea58688e2..cd8b166a1735 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c | |||
@@ -42,7 +42,8 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) | |||
42 | struct mce m; | 42 | struct mce m; |
43 | 43 | ||
44 | /* Only corrected MC is reported */ | 44 | /* Only corrected MC is reported */ |
45 | if (!corrected) | 45 | if (!corrected || !(mem_err->validation_bits & |
46 | CPER_MEM_VALID_PHYSICAL_ADDRESS)) | ||
46 | return; | 47 | return; |
47 | 48 | ||
48 | mce_setup(&m); | 49 | mce_setup(&m); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 0c82091b1652..413c2ced887c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -126,6 +126,16 @@ static struct severity { | |||
126 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), | 126 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), |
127 | USER | 127 | USER |
128 | ), | 128 | ), |
129 | MCESEV( | ||
130 | KEEP, "HT thread notices Action required: instruction fetch error", | ||
131 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), | ||
132 | MCGMASK(MCG_STATUS_EIPV, 0) | ||
133 | ), | ||
134 | MCESEV( | ||
135 | AR, "Action required: instruction fetch error", | ||
136 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), | ||
137 | USER | ||
138 | ), | ||
129 | #endif | 139 | #endif |
130 | MCESEV( | 140 | MCESEV( |
131 | PANIC, "Action required: unknown MCACOD", | 141 | PANIC, "Action required: unknown MCACOD", |
@@ -165,15 +175,19 @@ static struct severity { | |||
165 | }; | 175 | }; |
166 | 176 | ||
167 | /* | 177 | /* |
168 | * If the EIPV bit is set, it means the saved IP is the | 178 | * If mcgstatus indicated that ip/cs on the stack were |
169 | * instruction which caused the MCE. | 179 | * no good, then "m->cs" will be zero and we will have |
180 | * to assume the worst case (IN_KERNEL) as we actually | ||
181 | * have no idea what we were executing when the machine | ||
182 | * check hit. | ||
183 | * If we do have a good "m->cs" (or a faked one in the | ||
184 | * case we were executing in VM86 mode) we can use it to | ||
185 | * distinguish an exception taken in user from from one | ||
186 | * taken in the kernel. | ||
170 | */ | 187 | */ |
171 | static int error_context(struct mce *m) | 188 | static int error_context(struct mce *m) |
172 | { | 189 | { |
173 | if (m->mcgstatus & MCG_STATUS_EIPV) | 190 | return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; |
174 | return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL; | ||
175 | /* Unknown, assume kernel */ | ||
176 | return IN_KERNEL; | ||
177 | } | 191 | } |
178 | 192 | ||
179 | int mce_severity(struct mce *m, int tolerant, char **msg) | 193 | int mce_severity(struct mce *m, int tolerant, char **msg) |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2afcbd253e1d..b772dd6ad450 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -437,6 +437,14 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) | |||
437 | if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) { | 437 | if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) { |
438 | m->ip = regs->ip; | 438 | m->ip = regs->ip; |
439 | m->cs = regs->cs; | 439 | m->cs = regs->cs; |
440 | |||
441 | /* | ||
442 | * When in VM86 mode make the cs look like ring 3 | ||
443 | * always. This is a lie, but it's better than passing | ||
444 | * the additional vm86 bit around everywhere. | ||
445 | */ | ||
446 | if (v8086_mode(regs)) | ||
447 | m->cs |= 3; | ||
440 | } | 448 | } |
441 | /* Use accurate RIP reporting if available. */ | 449 | /* Use accurate RIP reporting if available. */ |
442 | if (rip_msr) | 450 | if (rip_msr) |
@@ -641,16 +649,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll); | |||
641 | * Do a quick check if any of the events requires a panic. | 649 | * Do a quick check if any of the events requires a panic. |
642 | * This decides if we keep the events around or clear them. | 650 | * This decides if we keep the events around or clear them. |
643 | */ | 651 | */ |
644 | static int mce_no_way_out(struct mce *m, char **msg) | 652 | static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp) |
645 | { | 653 | { |
646 | int i; | 654 | int i, ret = 0; |
647 | 655 | ||
648 | for (i = 0; i < banks; i++) { | 656 | for (i = 0; i < banks; i++) { |
649 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); | 657 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
658 | if (m->status & MCI_STATUS_VAL) | ||
659 | __set_bit(i, validp); | ||
650 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) | 660 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) |
651 | return 1; | 661 | ret = 1; |
652 | } | 662 | } |
653 | return 0; | 663 | return ret; |
654 | } | 664 | } |
655 | 665 | ||
656 | /* | 666 | /* |
@@ -1013,6 +1023,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1013 | */ | 1023 | */ |
1014 | int kill_it = 0; | 1024 | int kill_it = 0; |
1015 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | 1025 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); |
1026 | DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); | ||
1016 | char *msg = "Unknown"; | 1027 | char *msg = "Unknown"; |
1017 | 1028 | ||
1018 | atomic_inc(&mce_entry); | 1029 | atomic_inc(&mce_entry); |
@@ -1027,7 +1038,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1027 | final = &__get_cpu_var(mces_seen); | 1038 | final = &__get_cpu_var(mces_seen); |
1028 | *final = m; | 1039 | *final = m; |
1029 | 1040 | ||
1030 | no_way_out = mce_no_way_out(&m, &msg); | 1041 | memset(valid_banks, 0, sizeof(valid_banks)); |
1042 | no_way_out = mce_no_way_out(&m, &msg, valid_banks); | ||
1031 | 1043 | ||
1032 | barrier(); | 1044 | barrier(); |
1033 | 1045 | ||
@@ -1047,6 +1059,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1047 | order = mce_start(&no_way_out); | 1059 | order = mce_start(&no_way_out); |
1048 | for (i = 0; i < banks; i++) { | 1060 | for (i = 0; i < banks; i++) { |
1049 | __clear_bit(i, toclear); | 1061 | __clear_bit(i, toclear); |
1062 | if (!test_bit(i, valid_banks)) | ||
1063 | continue; | ||
1050 | if (!mce_banks[i].ctl) | 1064 | if (!mce_banks[i].ctl) |
1051 | continue; | 1065 | continue; |
1052 | 1066 | ||