aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/mce.h10
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c82
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c84
4 files changed, 137 insertions, 44 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ba1f8890cf51..afd3cdf6f8ad 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -15,6 +15,7 @@
15#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ 15#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
16#define MCG_EXT_CNT_SHIFT 16 16#define MCG_EXT_CNT_SHIFT 16
17#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) 17#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
18#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
18 19
19#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ 20#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
20#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ 21#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
@@ -27,6 +28,15 @@
27#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ 28#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
28#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ 29#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
29#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ 30#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
31#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
32#define MCI_STATUS_AR (1ULL<<55) /* Action required */
33
34/* MISC register defines */
35#define MCM_ADDR_SEGOFF 0 /* segment offset */
36#define MCM_ADDR_LINEAR 1 /* linear address */
37#define MCM_ADDR_PHYS 2 /* physical address */
38#define MCM_ADDR_MEM 3 /* memory address */
39#define MCM_ADDR_GENERIC 7 /* generic */
30 40
31/* Fields are zero when not available */ 41/* Fields are zero when not available */
32struct mce { 42struct mce {
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index f126b4ae7a25..54dcb8ff12e5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -2,9 +2,14 @@
2 2
3enum severity_level { 3enum severity_level {
4 MCE_NO_SEVERITY, 4 MCE_NO_SEVERITY,
5 MCE_KEEP_SEVERITY,
5 MCE_SOME_SEVERITY, 6 MCE_SOME_SEVERITY,
7 MCE_AO_SEVERITY,
6 MCE_UC_SEVERITY, 8 MCE_UC_SEVERITY,
9 MCE_AR_SEVERITY,
7 MCE_PANIC_SEVERITY, 10 MCE_PANIC_SEVERITY,
8}; 11};
9 12
10int mce_severity(struct mce *a, int tolerant, char **msg); 13int mce_severity(struct mce *a, int tolerant, char **msg);
14
15extern int mce_ser;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c189e89a89ae..4f4d2caf4043 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -19,43 +19,117 @@
19 * first. Since there are quite a lot of combinations test the bits in a 19 * first. Since there are quite a lot of combinations test the bits in a
20 * table-driven way. The rules are simply processed in order, first 20 * table-driven way. The rules are simply processed in order, first
21 * match wins. 21 * match wins.
22 *
23 * Note this is only used for machine check exceptions, the corrected
24 * errors use much simpler rules. The exceptions still check for the corrected
25 * errors, but only to leave them alone for the CMCI handler (except for
26 * panic situations)
22 */ 27 */
23 28
29enum context { IN_KERNEL = 1, IN_USER = 2 };
30enum ser { SER_REQUIRED = 1, NO_SER = 2 };
31
24static struct severity { 32static struct severity {
25 u64 mask; 33 u64 mask;
26 u64 result; 34 u64 result;
27 unsigned char sev; 35 unsigned char sev;
28 unsigned char mcgmask; 36 unsigned char mcgmask;
29 unsigned char mcgres; 37 unsigned char mcgres;
38 unsigned char ser;
39 unsigned char context;
30 char *msg; 40 char *msg;
31} severities[] = { 41} severities[] = {
42#define KERNEL .context = IN_KERNEL
43#define USER .context = IN_USER
44#define SER .ser = SER_REQUIRED
45#define NOSER .ser = NO_SER
32#define SEV(s) .sev = MCE_ ## s ## _SEVERITY 46#define SEV(s) .sev = MCE_ ## s ## _SEVERITY
33#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } 47#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r }
34#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } 48#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r }
35#define MCGMASK(x, res, s, m, r...) \ 49#define MCGMASK(x, res, s, m, r...) \
36 { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } 50 { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
51#define MASK(x, y, s, m, r...) \
52 { .mask = x, .result = y, SEV(s), .msg = m, ## r }
53#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
54#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
55#define MCACOD 0xffff
56
37 BITCLR(MCI_STATUS_VAL, NO, "Invalid"), 57 BITCLR(MCI_STATUS_VAL, NO, "Invalid"),
38 BITCLR(MCI_STATUS_EN, NO, "Not enabled"), 58 BITCLR(MCI_STATUS_EN, NO, "Not enabled"),
39 BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), 59 BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"),
40 MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "No restart IP"), 60 /* When MCIP is not set something is very confused */
61 MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"),
62 /* Neither return not error IP -- no chance to recover -> PANIC */
63 MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC,
64 "Neither restart nor error IP"),
65 MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
66 KERNEL),
67 BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
68 MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME,
69 "Spurious not enabled", SER),
70
71 /* ignore OVER for UCNA */
72 MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP,
73 "Uncorrected no action required", SER),
74 MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC,
75 "Illegal combination (UCNA with AR=1)", SER),
76 MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER),
77
78 /* AR add known MCACODs here */
79 MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
80 "Action required with lost events", SER),
81 MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC,
82 "Action required; unknown MCACOD", SER),
83
84 /* known AO MCACODs: */
85 MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO,
86 "Action optional: memory scrubbing error", SER),
87 MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO,
88 "Action optional: last level cache writeback error", SER),
89
90 MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME,
91 "Action optional unknown MCACOD", SER),
92 MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME,
93 "Action optional with lost events", SER),
41 BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), 94 BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"),
42 BITSET(MCI_STATUS_UC, UC, "Uncorrected"), 95 BITSET(MCI_STATUS_UC, UC, "Uncorrected"),
43 BITSET(0, SOME, "No match") /* always matches. keep at end */ 96 BITSET(0, SOME, "No match") /* always matches. keep at end */
44}; 97};
45 98
99/*
100 * If the EIPV bit is set, it means the saved IP is the
101 * instruction which caused the MCE.
102 */
103static int error_context(struct mce *m)
104{
105 if (m->mcgstatus & MCG_STATUS_EIPV)
106 return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
107 /* Unknown, assume kernel */
108 return IN_KERNEL;
109}
110
46int mce_severity(struct mce *a, int tolerant, char **msg) 111int mce_severity(struct mce *a, int tolerant, char **msg)
47{ 112{
113 enum context ctx = error_context(a);
48 struct severity *s; 114 struct severity *s;
115
49 for (s = severities;; s++) { 116 for (s = severities;; s++) {
50 if ((a->status & s->mask) != s->result) 117 if ((a->status & s->mask) != s->result)
51 continue; 118 continue;
52 if ((a->mcgstatus & s->mcgmask) != s->mcgres) 119 if ((a->mcgstatus & s->mcgmask) != s->mcgres)
53 continue; 120 continue;
54 if (s->sev > MCE_NO_SEVERITY && (a->status & MCI_STATUS_UC) && 121 if (s->ser == SER_REQUIRED && !mce_ser)
55 tolerant < 1) 122 continue;
56 return MCE_PANIC_SEVERITY; 123 if (s->ser == NO_SER && mce_ser)
124 continue;
125 if (s->context && ctx != s->context)
126 continue;
57 if (msg) 127 if (msg)
58 *msg = s->msg; 128 *msg = s->msg;
129 if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
130 if (panic_on_oops || tolerant < 1)
131 return MCE_PANIC_SEVERITY;
132 }
59 return s->sev; 133 return s->sev;
60 } 134 }
61} 135}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ff9c732989de..f051a7807ab4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -83,6 +83,7 @@ static int rip_msr;
83static int mce_bootlog = -1; 83static int mce_bootlog = -1;
84static int monarch_timeout = -1; 84static int monarch_timeout = -1;
85static int mce_panic_timeout; 85static int mce_panic_timeout;
86int mce_ser;
86 87
87static char trigger[128]; 88static char trigger[128];
88static char *trigger_argv[2] = { trigger, NULL }; 89static char *trigger_argv[2] = { trigger, NULL };
@@ -391,6 +392,15 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
391 * Those are just logged through /dev/mcelog. 392 * Those are just logged through /dev/mcelog.
392 * 393 *
393 * This is executed in standard interrupt context. 394 * This is executed in standard interrupt context.
395 *
396 * Note: spec recommends to panic for fatal unsignalled
397 * errors here. However this would be quite problematic --
398 * we would need to reimplement the Monarch handling and
399 * it would mess up the exclusion between exception handler
400 * and poll hander -- * so we skip this for now.
401 * These cases should not happen anyways, or only when the CPU
402 * is already totally * confused. In this case it's likely it will
403 * not fully execute the machine check handler either.
394 */ 404 */
395void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) 405void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
396{ 406{
@@ -417,13 +427,13 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
417 continue; 427 continue;
418 428
419 /* 429 /*
420 * Uncorrected events are handled by the exception handler 430 * Uncorrected or signalled events are handled by the exception
421 * when it is enabled. But when the exception is disabled log 431 * handler when it is enabled, so don't process those here.
422 * everything.
423 * 432 *
424 * TBD do the same check for MCI_STATUS_EN here? 433 * TBD do the same check for MCI_STATUS_EN here?
425 */ 434 */
426 if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) 435 if (!(flags & MCP_UC) &&
436 (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
427 continue; 437 continue;
428 438
429 if (m.status & MCI_STATUS_MISCV) 439 if (m.status & MCI_STATUS_MISCV)
@@ -790,6 +800,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
790 barrier(); 800 barrier();
791 801
792 /* 802 /*
803 * When no restart IP must always kill or panic.
804 */
805 if (!(m.mcgstatus & MCG_STATUS_RIPV))
806 kill_it = 1;
807
808 /*
793 * Go through all the banks in exclusion of the other CPUs. 809 * Go through all the banks in exclusion of the other CPUs.
794 * This way we don't report duplicated events on shared banks 810 * This way we don't report duplicated events on shared banks
795 * because the first one to see it will clear it. 811 * because the first one to see it will clear it.
@@ -809,10 +825,11 @@ void do_machine_check(struct pt_regs *regs, long error_code)
809 continue; 825 continue;
810 826
811 /* 827 /*
812 * Non uncorrected errors are handled by machine_check_poll 828 * Non uncorrected or non signaled errors are handled by
813 * Leave them alone, unless this panics. 829 * machine_check_poll. Leave them alone, unless this panics.
814 */ 830 */
815 if ((m.status & MCI_STATUS_UC) == 0 && !no_way_out) 831 if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
832 !no_way_out)
816 continue; 833 continue;
817 834
818 /* 835 /*
@@ -820,17 +837,16 @@ void do_machine_check(struct pt_regs *regs, long error_code)
820 */ 837 */
821 add_taint(TAINT_MACHINE_CHECK); 838 add_taint(TAINT_MACHINE_CHECK);
822 839
823 __set_bit(i, toclear); 840 severity = mce_severity(&m, tolerant, NULL);
824 841
825 if (m.status & MCI_STATUS_EN) { 842 /*
826 /* 843 * When machine check was for corrected handler don't touch,
827 * If this error was uncorrectable and there was 844 * unless we're panicing.
828 * an overflow, we're in trouble. If no overflow, 845 */
829 * we might get away with just killing a task. 846 if (severity == MCE_KEEP_SEVERITY && !no_way_out)
830 */ 847 continue;
831 if (m.status & MCI_STATUS_UC) 848 __set_bit(i, toclear);
832 kill_it = 1; 849 if (severity == MCE_NO_SEVERITY) {
833 } else {
834 /* 850 /*
835 * Machine check event was not enabled. Clear, but 851 * Machine check event was not enabled. Clear, but
836 * ignore. 852 * ignore.
@@ -838,6 +854,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
838 continue; 854 continue;
839 } 855 }
840 856
857 /*
858 * Kill on action required.
859 */
860 if (severity == MCE_AR_SEVERITY)
861 kill_it = 1;
862
841 if (m.status & MCI_STATUS_MISCV) 863 if (m.status & MCI_STATUS_MISCV)
842 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); 864 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
843 if (m.status & MCI_STATUS_ADDRV) 865 if (m.status & MCI_STATUS_ADDRV)
@@ -846,7 +868,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
846 mce_get_rip(&m, regs); 868 mce_get_rip(&m, regs);
847 mce_log(&m); 869 mce_log(&m);
848 870
849 severity = mce_severity(&m, tolerant, NULL);
850 if (severity > worst) { 871 if (severity > worst) {
851 *final = m; 872 *final = m;
852 worst = severity; 873 worst = severity;
@@ -879,29 +900,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
879 * one task, do that. If the user has set the tolerance very 900 * one task, do that. If the user has set the tolerance very
880 * high, don't try to do anything at all. 901 * high, don't try to do anything at all.
881 */ 902 */
882 if (kill_it && tolerant < 3) {
883 int user_space = 0;
884
885 /*
886 * If the EIPV bit is set, it means the saved IP is the
887 * instruction which caused the MCE.
888 */
889 if (m.mcgstatus & MCG_STATUS_EIPV)
890 user_space = final->ip && (final->cs & 3);
891 903
892 /* 904 if (kill_it && tolerant < 3)
893 * If we know that the error was in user space, send a 905 force_sig(SIGBUS, current);
894 * SIGBUS. Otherwise, panic if tolerance is low.
895 *
896 * force_sig() takes an awful lot of locks and has a slight
897 * risk of deadlocking.
898 */
899 if (user_space) {
900 force_sig(SIGBUS, current);
901 } else if (panic_on_oops || tolerant < 2) {
902 mce_panic("Uncorrected machine check", final, msg);
903 }
904 }
905 906
906 /* notify userspace ASAP */ 907 /* notify userspace ASAP */
907 set_thread_flag(TIF_MCE_NOTIFY); 908 set_thread_flag(TIF_MCE_NOTIFY);
@@ -1049,6 +1050,9 @@ static int mce_cap_init(void)
1049 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) 1050 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
1050 rip_msr = MSR_IA32_MCG_EIP; 1051 rip_msr = MSR_IA32_MCG_EIP;
1051 1052
1053 if (cap & MCG_SER_P)
1054 mce_ser = 1;
1055
1052 return 0; 1056 return 0;
1053} 1057}
1054 1058