aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mcheck
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-25 19:14:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-25 19:14:12 -0400
commit786f02b719f2285e2c0350b6a04dab4a1a0871a1 (patch)
tree0836083fa3bef945a46449c2d6badd5c29d7baad /arch/x86/kernel/cpu/mcheck
parentfa2af6e4fe0c4d2f8875d42625b25675e8584010 (diff)
parent37c3459b67dd5a396a968e819cf4a86d24ac9ace (diff)
Merge tag 'x86-mce-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull x86/mce merge window patches from Tony Luck: "Including two that make error_context() checks less sucky" * tag 'x86-mce-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: x86/mce: Add instruction recovery signatures to mce-severity table x86/mce: Fix check for processor context when machine check was taken. MCE: Fix vm86 handling for 32bit mce handler x86/mce Add validation check before GHES error is recorded x86/mce: Avoid reading every machine check bank register twice.
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c24
3 files changed, 41 insertions, 12 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index 507ea58688e2..cd8b166a1735 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -42,7 +42,8 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
42 struct mce m; 42 struct mce m;
43 43
44 /* Only corrected MC is reported */ 44 /* Only corrected MC is reported */
45 if (!corrected) 45 if (!corrected || !(mem_err->validation_bits &
46 CPER_MEM_VALID_PHYSICAL_ADDRESS))
46 return; 47 return;
47 48
48 mce_setup(&m); 49 mce_setup(&m);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 0c82091b1652..413c2ced887c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -126,6 +126,16 @@ static struct severity {
126 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), 126 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
127 USER 127 USER
128 ), 128 ),
129 MCESEV(
130 KEEP, "HT thread notices Action required: instruction fetch error",
131 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
132 MCGMASK(MCG_STATUS_EIPV, 0)
133 ),
134 MCESEV(
135 AR, "Action required: instruction fetch error",
136 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
137 USER
138 ),
129#endif 139#endif
130 MCESEV( 140 MCESEV(
131 PANIC, "Action required: unknown MCACOD", 141 PANIC, "Action required: unknown MCACOD",
@@ -165,15 +175,19 @@ static struct severity {
165}; 175};
166 176
167/* 177/*
168 * If the EIPV bit is set, it means the saved IP is the 178 * If mcgstatus indicated that ip/cs on the stack were
169 * instruction which caused the MCE. 179 * no good, then "m->cs" will be zero and we will have
180 * to assume the worst case (IN_KERNEL) as we actually
181 * have no idea what we were executing when the machine
182 * check hit.
183 * If we do have a good "m->cs" (or a faked one in the
184 * case we were executing in VM86 mode) we can use it to
185 * distinguish an exception taken in user from from one
186 * taken in the kernel.
170 */ 187 */
171static int error_context(struct mce *m) 188static int error_context(struct mce *m)
172{ 189{
173 if (m->mcgstatus & MCG_STATUS_EIPV) 190 return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
174 return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
175 /* Unknown, assume kernel */
176 return IN_KERNEL;
177} 191}
178 192
179int mce_severity(struct mce *m, int tolerant, char **msg) 193int mce_severity(struct mce *m, int tolerant, char **msg)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2afcbd253e1d..b772dd6ad450 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -437,6 +437,14 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
437 if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) { 437 if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
438 m->ip = regs->ip; 438 m->ip = regs->ip;
439 m->cs = regs->cs; 439 m->cs = regs->cs;
440
441 /*
442 * When in VM86 mode make the cs look like ring 3
443 * always. This is a lie, but it's better than passing
444 * the additional vm86 bit around everywhere.
445 */
446 if (v8086_mode(regs))
447 m->cs |= 3;
440 } 448 }
441 /* Use accurate RIP reporting if available. */ 449 /* Use accurate RIP reporting if available. */
442 if (rip_msr) 450 if (rip_msr)
@@ -641,16 +649,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
641 * Do a quick check if any of the events requires a panic. 649 * Do a quick check if any of the events requires a panic.
642 * This decides if we keep the events around or clear them. 650 * This decides if we keep the events around or clear them.
643 */ 651 */
644static int mce_no_way_out(struct mce *m, char **msg) 652static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp)
645{ 653{
646 int i; 654 int i, ret = 0;
647 655
648 for (i = 0; i < banks; i++) { 656 for (i = 0; i < banks; i++) {
649 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); 657 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
658 if (m->status & MCI_STATUS_VAL)
659 __set_bit(i, validp);
650 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) 660 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
651 return 1; 661 ret = 1;
652 } 662 }
653 return 0; 663 return ret;
654} 664}
655 665
656/* 666/*
@@ -1013,6 +1023,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1013 */ 1023 */
1014 int kill_it = 0; 1024 int kill_it = 0;
1015 DECLARE_BITMAP(toclear, MAX_NR_BANKS); 1025 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
1026 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
1016 char *msg = "Unknown"; 1027 char *msg = "Unknown";
1017 1028
1018 atomic_inc(&mce_entry); 1029 atomic_inc(&mce_entry);
@@ -1027,7 +1038,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1027 final = &__get_cpu_var(mces_seen); 1038 final = &__get_cpu_var(mces_seen);
1028 *final = m; 1039 *final = m;
1029 1040
1030 no_way_out = mce_no_way_out(&m, &msg); 1041 memset(valid_banks, 0, sizeof(valid_banks));
1042 no_way_out = mce_no_way_out(&m, &msg, valid_banks);
1031 1043
1032 barrier(); 1044 barrier();
1033 1045
@@ -1047,6 +1059,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1047 order = mce_start(&no_way_out); 1059 order = mce_start(&no_way_out);
1048 for (i = 0; i < banks; i++) { 1060 for (i = 0; i < banks; i++) {
1049 __clear_bit(i, toclear); 1061 __clear_bit(i, toclear);
1062 if (!test_bit(i, valid_banks))
1063 continue;
1050 if (!mce_banks[i].ctl) 1064 if (!mce_banks[i].ctl)
1051 continue; 1065 continue;
1052 1066