diff options
-rw-r--r-- | arch/x86/include/asm/mce.h | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-internal.h | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 82 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 84 |
4 files changed, 137 insertions, 44 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ba1f8890cf51..afd3cdf6f8ad 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -15,6 +15,7 @@ | |||
15 | #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ | 15 | #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ |
16 | #define MCG_EXT_CNT_SHIFT 16 | 16 | #define MCG_EXT_CNT_SHIFT 16 |
17 | #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) | 17 | #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) |
18 | #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ | ||
18 | 19 | ||
19 | #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ | 20 | #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ |
20 | #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ | 21 | #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ |
@@ -27,6 +28,15 @@ | |||
27 | #define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ | 28 | #define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ |
28 | #define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ | 29 | #define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ |
29 | #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ | 30 | #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ |
31 | #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ | ||
32 | #define MCI_STATUS_AR (1ULL<<55) /* Action required */ | ||
33 | |||
34 | /* MISC register defines */ | ||
35 | #define MCM_ADDR_SEGOFF 0 /* segment offset */ | ||
36 | #define MCM_ADDR_LINEAR 1 /* linear address */ | ||
37 | #define MCM_ADDR_PHYS 2 /* physical address */ | ||
38 | #define MCM_ADDR_MEM 3 /* memory address */ | ||
39 | #define MCM_ADDR_GENERIC 7 /* generic */ | ||
30 | 40 | ||
31 | /* Fields are zero when not available */ | 41 | /* Fields are zero when not available */ |
32 | struct mce { | 42 | struct mce { |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index f126b4ae7a25..54dcb8ff12e5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -2,9 +2,14 @@ | |||
2 | 2 | ||
3 | enum severity_level { | 3 | enum severity_level { |
4 | MCE_NO_SEVERITY, | 4 | MCE_NO_SEVERITY, |
5 | MCE_KEEP_SEVERITY, | ||
5 | MCE_SOME_SEVERITY, | 6 | MCE_SOME_SEVERITY, |
7 | MCE_AO_SEVERITY, | ||
6 | MCE_UC_SEVERITY, | 8 | MCE_UC_SEVERITY, |
9 | MCE_AR_SEVERITY, | ||
7 | MCE_PANIC_SEVERITY, | 10 | MCE_PANIC_SEVERITY, |
8 | }; | 11 | }; |
9 | 12 | ||
10 | int mce_severity(struct mce *a, int tolerant, char **msg); | 13 | int mce_severity(struct mce *a, int tolerant, char **msg); |
14 | |||
15 | extern int mce_ser; | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index c189e89a89ae..4f4d2caf4043 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -19,43 +19,117 @@ | |||
19 | * first. Since there are quite a lot of combinations test the bits in a | 19 | * first. Since there are quite a lot of combinations test the bits in a |
20 | * table-driven way. The rules are simply processed in order, first | 20 | * table-driven way. The rules are simply processed in order, first |
21 | * match wins. | 21 | * match wins. |
22 | * | ||
23 | * Note this is only used for machine check exceptions, the corrected | ||
24 | * errors use much simpler rules. The exceptions still check for the corrected | ||
25 | * errors, but only to leave them alone for the CMCI handler (except for | ||
26 | * panic situations) | ||
22 | */ | 27 | */ |
23 | 28 | ||
29 | enum context { IN_KERNEL = 1, IN_USER = 2 }; | ||
30 | enum ser { SER_REQUIRED = 1, NO_SER = 2 }; | ||
31 | |||
24 | static struct severity { | 32 | static struct severity { |
25 | u64 mask; | 33 | u64 mask; |
26 | u64 result; | 34 | u64 result; |
27 | unsigned char sev; | 35 | unsigned char sev; |
28 | unsigned char mcgmask; | 36 | unsigned char mcgmask; |
29 | unsigned char mcgres; | 37 | unsigned char mcgres; |
38 | unsigned char ser; | ||
39 | unsigned char context; | ||
30 | char *msg; | 40 | char *msg; |
31 | } severities[] = { | 41 | } severities[] = { |
42 | #define KERNEL .context = IN_KERNEL | ||
43 | #define USER .context = IN_USER | ||
44 | #define SER .ser = SER_REQUIRED | ||
45 | #define NOSER .ser = NO_SER | ||
32 | #define SEV(s) .sev = MCE_ ## s ## _SEVERITY | 46 | #define SEV(s) .sev = MCE_ ## s ## _SEVERITY |
33 | #define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } | 47 | #define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } |
34 | #define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } | 48 | #define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } |
35 | #define MCGMASK(x, res, s, m, r...) \ | 49 | #define MCGMASK(x, res, s, m, r...) \ |
36 | { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } | 50 | { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } |
51 | #define MASK(x, y, s, m, r...) \ | ||
52 | { .mask = x, .result = y, SEV(s), .msg = m, ## r } | ||
53 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) | ||
54 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) | ||
55 | #define MCACOD 0xffff | ||
56 | |||
37 | BITCLR(MCI_STATUS_VAL, NO, "Invalid"), | 57 | BITCLR(MCI_STATUS_VAL, NO, "Invalid"), |
38 | BITCLR(MCI_STATUS_EN, NO, "Not enabled"), | 58 | BITCLR(MCI_STATUS_EN, NO, "Not enabled"), |
39 | BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), | 59 | BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), |
40 | MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "No restart IP"), | 60 | /* When MCIP is not set something is very confused */ |
61 | MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"), | ||
62 | /* Neither return not error IP -- no chance to recover -> PANIC */ | ||
63 | MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC, | ||
64 | "Neither restart nor error IP"), | ||
65 | MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP", | ||
66 | KERNEL), | ||
67 | BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER), | ||
68 | MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME, | ||
69 | "Spurious not enabled", SER), | ||
70 | |||
71 | /* ignore OVER for UCNA */ | ||
72 | MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP, | ||
73 | "Uncorrected no action required", SER), | ||
74 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC, | ||
75 | "Illegal combination (UCNA with AR=1)", SER), | ||
76 | MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER), | ||
77 | |||
78 | /* AR add known MCACODs here */ | ||
79 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC, | ||
80 | "Action required with lost events", SER), | ||
81 | MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC, | ||
82 | "Action required; unknown MCACOD", SER), | ||
83 | |||
84 | /* known AO MCACODs: */ | ||
85 | MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO, | ||
86 | "Action optional: memory scrubbing error", SER), | ||
87 | MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO, | ||
88 | "Action optional: last level cache writeback error", SER), | ||
89 | |||
90 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME, | ||
91 | "Action optional unknown MCACOD", SER), | ||
92 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME, | ||
93 | "Action optional with lost events", SER), | ||
41 | BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), | 94 | BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), |
42 | BITSET(MCI_STATUS_UC, UC, "Uncorrected"), | 95 | BITSET(MCI_STATUS_UC, UC, "Uncorrected"), |
43 | BITSET(0, SOME, "No match") /* always matches. keep at end */ | 96 | BITSET(0, SOME, "No match") /* always matches. keep at end */ |
44 | }; | 97 | }; |
45 | 98 | ||
99 | /* | ||
100 | * If the EIPV bit is set, it means the saved IP is the | ||
101 | * instruction which caused the MCE. | ||
102 | */ | ||
103 | static int error_context(struct mce *m) | ||
104 | { | ||
105 | if (m->mcgstatus & MCG_STATUS_EIPV) | ||
106 | return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL; | ||
107 | /* Unknown, assume kernel */ | ||
108 | return IN_KERNEL; | ||
109 | } | ||
110 | |||
46 | int mce_severity(struct mce *a, int tolerant, char **msg) | 111 | int mce_severity(struct mce *a, int tolerant, char **msg) |
47 | { | 112 | { |
113 | enum context ctx = error_context(a); | ||
48 | struct severity *s; | 114 | struct severity *s; |
115 | |||
49 | for (s = severities;; s++) { | 116 | for (s = severities;; s++) { |
50 | if ((a->status & s->mask) != s->result) | 117 | if ((a->status & s->mask) != s->result) |
51 | continue; | 118 | continue; |
52 | if ((a->mcgstatus & s->mcgmask) != s->mcgres) | 119 | if ((a->mcgstatus & s->mcgmask) != s->mcgres) |
53 | continue; | 120 | continue; |
54 | if (s->sev > MCE_NO_SEVERITY && (a->status & MCI_STATUS_UC) && | 121 | if (s->ser == SER_REQUIRED && !mce_ser) |
55 | tolerant < 1) | 122 | continue; |
56 | return MCE_PANIC_SEVERITY; | 123 | if (s->ser == NO_SER && mce_ser) |
124 | continue; | ||
125 | if (s->context && ctx != s->context) | ||
126 | continue; | ||
57 | if (msg) | 127 | if (msg) |
58 | *msg = s->msg; | 128 | *msg = s->msg; |
129 | if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) { | ||
130 | if (panic_on_oops || tolerant < 1) | ||
131 | return MCE_PANIC_SEVERITY; | ||
132 | } | ||
59 | return s->sev; | 133 | return s->sev; |
60 | } | 134 | } |
61 | } | 135 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ff9c732989de..f051a7807ab4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -83,6 +83,7 @@ static int rip_msr; | |||
83 | static int mce_bootlog = -1; | 83 | static int mce_bootlog = -1; |
84 | static int monarch_timeout = -1; | 84 | static int monarch_timeout = -1; |
85 | static int mce_panic_timeout; | 85 | static int mce_panic_timeout; |
86 | int mce_ser; | ||
86 | 87 | ||
87 | static char trigger[128]; | 88 | static char trigger[128]; |
88 | static char *trigger_argv[2] = { trigger, NULL }; | 89 | static char *trigger_argv[2] = { trigger, NULL }; |
@@ -391,6 +392,15 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); | |||
391 | * Those are just logged through /dev/mcelog. | 392 | * Those are just logged through /dev/mcelog. |
392 | * | 393 | * |
393 | * This is executed in standard interrupt context. | 394 | * This is executed in standard interrupt context. |
395 | * | ||
396 | * Note: spec recommends to panic for fatal unsignalled | ||
397 | * errors here. However this would be quite problematic -- | ||
398 | * we would need to reimplement the Monarch handling and | ||
399 | * it would mess up the exclusion between exception handler | ||
400 | * and poll hander -- * so we skip this for now. | ||
401 | * These cases should not happen anyways, or only when the CPU | ||
402 | * is already totally * confused. In this case it's likely it will | ||
403 | * not fully execute the machine check handler either. | ||
394 | */ | 404 | */ |
395 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | 405 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) |
396 | { | 406 | { |
@@ -417,13 +427,13 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
417 | continue; | 427 | continue; |
418 | 428 | ||
419 | /* | 429 | /* |
420 | * Uncorrected events are handled by the exception handler | 430 | * Uncorrected or signalled events are handled by the exception |
421 | * when it is enabled. But when the exception is disabled log | 431 | * handler when it is enabled, so don't process those here. |
422 | * everything. | ||
423 | * | 432 | * |
424 | * TBD do the same check for MCI_STATUS_EN here? | 433 | * TBD do the same check for MCI_STATUS_EN here? |
425 | */ | 434 | */ |
426 | if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) | 435 | if (!(flags & MCP_UC) && |
436 | (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) | ||
427 | continue; | 437 | continue; |
428 | 438 | ||
429 | if (m.status & MCI_STATUS_MISCV) | 439 | if (m.status & MCI_STATUS_MISCV) |
@@ -790,6 +800,12 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
790 | barrier(); | 800 | barrier(); |
791 | 801 | ||
792 | /* | 802 | /* |
803 | * When no restart IP must always kill or panic. | ||
804 | */ | ||
805 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) | ||
806 | kill_it = 1; | ||
807 | |||
808 | /* | ||
793 | * Go through all the banks in exclusion of the other CPUs. | 809 | * Go through all the banks in exclusion of the other CPUs. |
794 | * This way we don't report duplicated events on shared banks | 810 | * This way we don't report duplicated events on shared banks |
795 | * because the first one to see it will clear it. | 811 | * because the first one to see it will clear it. |
@@ -809,10 +825,11 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
809 | continue; | 825 | continue; |
810 | 826 | ||
811 | /* | 827 | /* |
812 | * Non uncorrected errors are handled by machine_check_poll | 828 | * Non uncorrected or non signaled errors are handled by |
813 | * Leave them alone, unless this panics. | 829 | * machine_check_poll. Leave them alone, unless this panics. |
814 | */ | 830 | */ |
815 | if ((m.status & MCI_STATUS_UC) == 0 && !no_way_out) | 831 | if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) && |
832 | !no_way_out) | ||
816 | continue; | 833 | continue; |
817 | 834 | ||
818 | /* | 835 | /* |
@@ -820,17 +837,16 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
820 | */ | 837 | */ |
821 | add_taint(TAINT_MACHINE_CHECK); | 838 | add_taint(TAINT_MACHINE_CHECK); |
822 | 839 | ||
823 | __set_bit(i, toclear); | 840 | severity = mce_severity(&m, tolerant, NULL); |
824 | 841 | ||
825 | if (m.status & MCI_STATUS_EN) { | 842 | /* |
826 | /* | 843 | * When machine check was for corrected handler don't touch, |
827 | * If this error was uncorrectable and there was | 844 | * unless we're panicing. |
828 | * an overflow, we're in trouble. If no overflow, | 845 | */ |
829 | * we might get away with just killing a task. | 846 | if (severity == MCE_KEEP_SEVERITY && !no_way_out) |
830 | */ | 847 | continue; |
831 | if (m.status & MCI_STATUS_UC) | 848 | __set_bit(i, toclear); |
832 | kill_it = 1; | 849 | if (severity == MCE_NO_SEVERITY) { |
833 | } else { | ||
834 | /* | 850 | /* |
835 | * Machine check event was not enabled. Clear, but | 851 | * Machine check event was not enabled. Clear, but |
836 | * ignore. | 852 | * ignore. |
@@ -838,6 +854,12 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
838 | continue; | 854 | continue; |
839 | } | 855 | } |
840 | 856 | ||
857 | /* | ||
858 | * Kill on action required. | ||
859 | */ | ||
860 | if (severity == MCE_AR_SEVERITY) | ||
861 | kill_it = 1; | ||
862 | |||
841 | if (m.status & MCI_STATUS_MISCV) | 863 | if (m.status & MCI_STATUS_MISCV) |
842 | m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); | 864 | m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); |
843 | if (m.status & MCI_STATUS_ADDRV) | 865 | if (m.status & MCI_STATUS_ADDRV) |
@@ -846,7 +868,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
846 | mce_get_rip(&m, regs); | 868 | mce_get_rip(&m, regs); |
847 | mce_log(&m); | 869 | mce_log(&m); |
848 | 870 | ||
849 | severity = mce_severity(&m, tolerant, NULL); | ||
850 | if (severity > worst) { | 871 | if (severity > worst) { |
851 | *final = m; | 872 | *final = m; |
852 | worst = severity; | 873 | worst = severity; |
@@ -879,29 +900,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
879 | * one task, do that. If the user has set the tolerance very | 900 | * one task, do that. If the user has set the tolerance very |
880 | * high, don't try to do anything at all. | 901 | * high, don't try to do anything at all. |
881 | */ | 902 | */ |
882 | if (kill_it && tolerant < 3) { | ||
883 | int user_space = 0; | ||
884 | |||
885 | /* | ||
886 | * If the EIPV bit is set, it means the saved IP is the | ||
887 | * instruction which caused the MCE. | ||
888 | */ | ||
889 | if (m.mcgstatus & MCG_STATUS_EIPV) | ||
890 | user_space = final->ip && (final->cs & 3); | ||
891 | 903 | ||
892 | /* | 904 | if (kill_it && tolerant < 3) |
893 | * If we know that the error was in user space, send a | 905 | force_sig(SIGBUS, current); |
894 | * SIGBUS. Otherwise, panic if tolerance is low. | ||
895 | * | ||
896 | * force_sig() takes an awful lot of locks and has a slight | ||
897 | * risk of deadlocking. | ||
898 | */ | ||
899 | if (user_space) { | ||
900 | force_sig(SIGBUS, current); | ||
901 | } else if (panic_on_oops || tolerant < 2) { | ||
902 | mce_panic("Uncorrected machine check", final, msg); | ||
903 | } | ||
904 | } | ||
905 | 906 | ||
906 | /* notify userspace ASAP */ | 907 | /* notify userspace ASAP */ |
907 | set_thread_flag(TIF_MCE_NOTIFY); | 908 | set_thread_flag(TIF_MCE_NOTIFY); |
@@ -1049,6 +1050,9 @@ static int mce_cap_init(void) | |||
1049 | if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) | 1050 | if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) |
1050 | rip_msr = MSR_IA32_MCG_EIP; | 1051 | rip_msr = MSR_IA32_MCG_EIP; |
1051 | 1052 | ||
1053 | if (cap & MCG_SER_P) | ||
1054 | mce_ser = 1; | ||
1055 | |||
1052 | return 0; | 1056 | return 0; |
1053 | } | 1057 | } |
1054 | 1058 | ||