aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorAndi Kleen <andi@firstfloor.org>2009-05-27 15:56:57 -0400
committerH. Peter Anvin <hpa@zytor.com>2009-06-03 17:45:34 -0400
commited7290d0ee8f81aa78bfe816f01b012f208cafc5 (patch)
treec73c44c14ff9f43147422df00dcef830cd952530 /arch/x86
parent86503560e48153aba539ff117450d31ab2ef76d7 (diff)
x86, mce: implement new status bits
The x86 architecture recently added some new machine check status bits: S(ignalled) and AR (Action-Required). Signalled allows to check if a specific event caused an exception or was just logged through CMCI. AR allows the kernel to decide if an event needs immediate action or can be delayed or ignored. Implement support for these new status bits. mce_severity() uses the new bits to grade the machine check correctly and decide what to do. The exception handler uses AR to decide to kill or not. The S bit is used to separate events between the poll/CMCI handler and the exception handler. Classical UC always leads to panic. That was true before anyways because the existing CPUs always passed a PCC with it. Also corrects the rules whether to kill in user or kernel context and how to handle missing RIPV. The machine check handler largely uses the mce-severity grading engine now instead of making its own decisions. This means the logic is centralized in one place. This is useful because it has to be evaluated multiple times. v2: Some rule fixes; Add AO events Fix RIPV, RIPV|EIPV order (Ying Huang) Fix UCNA with AR=1 message (Ying Huang) Add comment about panicing in m_c_p. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/mce.h10
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c82
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c84
4 files changed, 137 insertions, 44 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ba1f8890cf51..afd3cdf6f8ad 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -15,6 +15,7 @@
15#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ 15#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
16#define MCG_EXT_CNT_SHIFT 16 16#define MCG_EXT_CNT_SHIFT 16
17#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) 17#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
18#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
18 19
19#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ 20#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
20#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ 21#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
@@ -27,6 +28,15 @@
27#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ 28#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
28#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ 29#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
29#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ 30#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
31#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
32#define MCI_STATUS_AR (1ULL<<55) /* Action required */
33
34/* MISC register defines */
35#define MCM_ADDR_SEGOFF 0 /* segment offset */
36#define MCM_ADDR_LINEAR 1 /* linear address */
37#define MCM_ADDR_PHYS 2 /* physical address */
38#define MCM_ADDR_MEM 3 /* memory address */
39#define MCM_ADDR_GENERIC 7 /* generic */
30 40
31/* Fields are zero when not available */ 41/* Fields are zero when not available */
32struct mce { 42struct mce {
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index f126b4ae7a25..54dcb8ff12e5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -2,9 +2,14 @@
2 2
3enum severity_level { 3enum severity_level {
4 MCE_NO_SEVERITY, 4 MCE_NO_SEVERITY,
5 MCE_KEEP_SEVERITY,
5 MCE_SOME_SEVERITY, 6 MCE_SOME_SEVERITY,
7 MCE_AO_SEVERITY,
6 MCE_UC_SEVERITY, 8 MCE_UC_SEVERITY,
9 MCE_AR_SEVERITY,
7 MCE_PANIC_SEVERITY, 10 MCE_PANIC_SEVERITY,
8}; 11};
9 12
10int mce_severity(struct mce *a, int tolerant, char **msg); 13int mce_severity(struct mce *a, int tolerant, char **msg);
14
15extern int mce_ser;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c189e89a89ae..4f4d2caf4043 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -19,43 +19,117 @@
19 * first. Since there are quite a lot of combinations test the bits in a 19 * first. Since there are quite a lot of combinations test the bits in a
20 * table-driven way. The rules are simply processed in order, first 20 * table-driven way. The rules are simply processed in order, first
21 * match wins. 21 * match wins.
22 *
23 * Note this is only used for machine check exceptions, the corrected
24 * errors use much simpler rules. The exceptions still check for the corrected
25 * errors, but only to leave them alone for the CMCI handler (except for
26 * panic situations)
22 */ 27 */
23 28
29enum context { IN_KERNEL = 1, IN_USER = 2 };
30enum ser { SER_REQUIRED = 1, NO_SER = 2 };
31
24static struct severity { 32static struct severity {
25 u64 mask; 33 u64 mask;
26 u64 result; 34 u64 result;
27 unsigned char sev; 35 unsigned char sev;
28 unsigned char mcgmask; 36 unsigned char mcgmask;
29 unsigned char mcgres; 37 unsigned char mcgres;
38 unsigned char ser;
39 unsigned char context;
30 char *msg; 40 char *msg;
31} severities[] = { 41} severities[] = {
42#define KERNEL .context = IN_KERNEL
43#define USER .context = IN_USER
44#define SER .ser = SER_REQUIRED
45#define NOSER .ser = NO_SER
32#define SEV(s) .sev = MCE_ ## s ## _SEVERITY 46#define SEV(s) .sev = MCE_ ## s ## _SEVERITY
33#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } 47#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r }
34#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } 48#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r }
35#define MCGMASK(x, res, s, m, r...) \ 49#define MCGMASK(x, res, s, m, r...) \
36 { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } 50 { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
51#define MASK(x, y, s, m, r...) \
52 { .mask = x, .result = y, SEV(s), .msg = m, ## r }
53#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
54#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
55#define MCACOD 0xffff
56
37 BITCLR(MCI_STATUS_VAL, NO, "Invalid"), 57 BITCLR(MCI_STATUS_VAL, NO, "Invalid"),
38 BITCLR(MCI_STATUS_EN, NO, "Not enabled"), 58 BITCLR(MCI_STATUS_EN, NO, "Not enabled"),
39 BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), 59 BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"),
40 MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "No restart IP"), 60 /* When MCIP is not set something is very confused */
61 MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"),
62 /* Neither return not error IP -- no chance to recover -> PANIC */
63 MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC,
64 "Neither restart nor error IP"),
65 MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
66 KERNEL),
67 BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
68 MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME,
69 "Spurious not enabled", SER),
70
71 /* ignore OVER for UCNA */
72 MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP,
73 "Uncorrected no action required", SER),
74 MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC,
75 "Illegal combination (UCNA with AR=1)", SER),
76 MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER),
77
78 /* AR add known MCACODs here */
79 MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
80 "Action required with lost events", SER),
81 MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC,
82 "Action required; unknown MCACOD", SER),
83
84 /* known AO MCACODs: */
85 MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO,
86 "Action optional: memory scrubbing error", SER),
87 MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO,
88 "Action optional: last level cache writeback error", SER),
89
90 MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME,
91 "Action optional unknown MCACOD", SER),
92 MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME,
93 "Action optional with lost events", SER),
41 BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), 94 BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"),
42 BITSET(MCI_STATUS_UC, UC, "Uncorrected"), 95 BITSET(MCI_STATUS_UC, UC, "Uncorrected"),
43 BITSET(0, SOME, "No match") /* always matches. keep at end */ 96 BITSET(0, SOME, "No match") /* always matches. keep at end */
44}; 97};
45 98
99/*
100 * If the EIPV bit is set, it means the saved IP is the
101 * instruction which caused the MCE.
102 */
103static int error_context(struct mce *m)
104{
105 if (m->mcgstatus & MCG_STATUS_EIPV)
106 return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
107 /* Unknown, assume kernel */
108 return IN_KERNEL;
109}
110
46int mce_severity(struct mce *a, int tolerant, char **msg) 111int mce_severity(struct mce *a, int tolerant, char **msg)
47{ 112{
113 enum context ctx = error_context(a);
48 struct severity *s; 114 struct severity *s;
115
49 for (s = severities;; s++) { 116 for (s = severities;; s++) {
50 if ((a->status & s->mask) != s->result) 117 if ((a->status & s->mask) != s->result)
51 continue; 118 continue;
52 if ((a->mcgstatus & s->mcgmask) != s->mcgres) 119 if ((a->mcgstatus & s->mcgmask) != s->mcgres)
53 continue; 120 continue;
54 if (s->sev > MCE_NO_SEVERITY && (a->status & MCI_STATUS_UC) && 121 if (s->ser == SER_REQUIRED && !mce_ser)
55 tolerant < 1) 122 continue;
56 return MCE_PANIC_SEVERITY; 123 if (s->ser == NO_SER && mce_ser)
124 continue;
125 if (s->context && ctx != s->context)
126 continue;
57 if (msg) 127 if (msg)
58 *msg = s->msg; 128 *msg = s->msg;
129 if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
130 if (panic_on_oops || tolerant < 1)
131 return MCE_PANIC_SEVERITY;
132 }
59 return s->sev; 133 return s->sev;
60 } 134 }
61} 135}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ff9c732989de..f051a7807ab4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -83,6 +83,7 @@ static int rip_msr;
83static int mce_bootlog = -1; 83static int mce_bootlog = -1;
84static int monarch_timeout = -1; 84static int monarch_timeout = -1;
85static int mce_panic_timeout; 85static int mce_panic_timeout;
86int mce_ser;
86 87
87static char trigger[128]; 88static char trigger[128];
88static char *trigger_argv[2] = { trigger, NULL }; 89static char *trigger_argv[2] = { trigger, NULL };
@@ -391,6 +392,15 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
391 * Those are just logged through /dev/mcelog. 392 * Those are just logged through /dev/mcelog.
392 * 393 *
393 * This is executed in standard interrupt context. 394 * This is executed in standard interrupt context.
395 *
396 * Note: spec recommends to panic for fatal unsignalled
397 * errors here. However this would be quite problematic --
398 * we would need to reimplement the Monarch handling and
399 * it would mess up the exclusion between exception handler
400 * and poll hander -- * so we skip this for now.
401 * These cases should not happen anyways, or only when the CPU
402 * is already totally * confused. In this case it's likely it will
403 * not fully execute the machine check handler either.
394 */ 404 */
395void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) 405void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
396{ 406{
@@ -417,13 +427,13 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
417 continue; 427 continue;
418 428
419 /* 429 /*
420 * Uncorrected events are handled by the exception handler 430 * Uncorrected or signalled events are handled by the exception
421 * when it is enabled. But when the exception is disabled log 431 * handler when it is enabled, so don't process those here.
422 * everything.
423 * 432 *
424 * TBD do the same check for MCI_STATUS_EN here? 433 * TBD do the same check for MCI_STATUS_EN here?
425 */ 434 */
426 if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) 435 if (!(flags & MCP_UC) &&
436 (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
427 continue; 437 continue;
428 438
429 if (m.status & MCI_STATUS_MISCV) 439 if (m.status & MCI_STATUS_MISCV)
@@ -790,6 +800,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
790 barrier(); 800 barrier();
791 801
792 /* 802 /*
803 * When no restart IP must always kill or panic.
804 */
805 if (!(m.mcgstatus & MCG_STATUS_RIPV))
806 kill_it = 1;
807
808 /*
793 * Go through all the banks in exclusion of the other CPUs. 809 * Go through all the banks in exclusion of the other CPUs.
794 * This way we don't report duplicated events on shared banks 810 * This way we don't report duplicated events on shared banks
795 * because the first one to see it will clear it. 811 * because the first one to see it will clear it.
@@ -809,10 +825,11 @@ void do_machine_check(struct pt_regs *regs, long error_code)
809 continue; 825 continue;
810 826
811 /* 827 /*
812 * Non uncorrected errors are handled by machine_check_poll 828 * Non uncorrected or non signaled errors are handled by
813 * Leave them alone, unless this panics. 829 * machine_check_poll. Leave them alone, unless this panics.
814 */ 830 */
815 if ((m.status & MCI_STATUS_UC) == 0 && !no_way_out) 831 if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
832 !no_way_out)
816 continue; 833 continue;
817 834
818 /* 835 /*
@@ -820,17 +837,16 @@ void do_machine_check(struct pt_regs *regs, long error_code)
820 */ 837 */
821 add_taint(TAINT_MACHINE_CHECK); 838 add_taint(TAINT_MACHINE_CHECK);
822 839
823 __set_bit(i, toclear); 840 severity = mce_severity(&m, tolerant, NULL);
824 841
825 if (m.status & MCI_STATUS_EN) { 842 /*
826 /* 843 * When machine check was for corrected handler don't touch,
827 * If this error was uncorrectable and there was 844 * unless we're panicing.
828 * an overflow, we're in trouble. If no overflow, 845 */
829 * we might get away with just killing a task. 846 if (severity == MCE_KEEP_SEVERITY && !no_way_out)
830 */ 847 continue;
831 if (m.status & MCI_STATUS_UC) 848 __set_bit(i, toclear);
832 kill_it = 1; 849 if (severity == MCE_NO_SEVERITY) {
833 } else {
834 /* 850 /*
835 * Machine check event was not enabled. Clear, but 851 * Machine check event was not enabled. Clear, but
836 * ignore. 852 * ignore.
@@ -838,6 +854,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
838 continue; 854 continue;
839 } 855 }
840 856
857 /*
858 * Kill on action required.
859 */
860 if (severity == MCE_AR_SEVERITY)
861 kill_it = 1;
862
841 if (m.status & MCI_STATUS_MISCV) 863 if (m.status & MCI_STATUS_MISCV)
842 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); 864 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
843 if (m.status & MCI_STATUS_ADDRV) 865 if (m.status & MCI_STATUS_ADDRV)
@@ -846,7 +868,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
846 mce_get_rip(&m, regs); 868 mce_get_rip(&m, regs);
847 mce_log(&m); 869 mce_log(&m);
848 870
849 severity = mce_severity(&m, tolerant, NULL);
850 if (severity > worst) { 871 if (severity > worst) {
851 *final = m; 872 *final = m;
852 worst = severity; 873 worst = severity;
@@ -879,29 +900,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
879 * one task, do that. If the user has set the tolerance very 900 * one task, do that. If the user has set the tolerance very
880 * high, don't try to do anything at all. 901 * high, don't try to do anything at all.
881 */ 902 */
882 if (kill_it && tolerant < 3) {
883 int user_space = 0;
884
885 /*
886 * If the EIPV bit is set, it means the saved IP is the
887 * instruction which caused the MCE.
888 */
889 if (m.mcgstatus & MCG_STATUS_EIPV)
890 user_space = final->ip && (final->cs & 3);
891 903
892 /* 904 if (kill_it && tolerant < 3)
893 * If we know that the error was in user space, send a 905 force_sig(SIGBUS, current);
894 * SIGBUS. Otherwise, panic if tolerance is low.
895 *
896 * force_sig() takes an awful lot of locks and has a slight
897 * risk of deadlocking.
898 */
899 if (user_space) {
900 force_sig(SIGBUS, current);
901 } else if (panic_on_oops || tolerant < 2) {
902 mce_panic("Uncorrected machine check", final, msg);
903 }
904 }
905 906
906 /* notify userspace ASAP */ 907 /* notify userspace ASAP */
907 set_thread_flag(TIF_MCE_NOTIFY); 908 set_thread_flag(TIF_MCE_NOTIFY);
@@ -1049,6 +1050,9 @@ static int mce_cap_init(void)
1049 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) 1050 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
1050 rip_msr = MSR_IA32_MCG_EIP; 1051 rip_msr = MSR_IA32_MCG_EIP;
1051 1052
1053 if (cap & MCG_SER_P)
1054 mce_ser = 1;
1055
1052 return 0; 1056 return 0;
1053} 1057}
1054 1058