diff options
-rw-r--r-- | arch/x86/include/asm/mce.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-internal.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 23 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 14 | ||||
-rw-r--r-- | drivers/edac/mce_amd.h | 3 |
5 files changed, 32 insertions, 16 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 276392f121fb..51b26e895933 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -34,6 +34,10 @@ | |||
34 | #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ | 34 | #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ |
35 | #define MCI_STATUS_AR (1ULL<<55) /* Action required */ | 35 | #define MCI_STATUS_AR (1ULL<<55) /* Action required */ |
36 | 36 | ||
37 | /* AMD-specific bits */ | ||
38 | #define MCI_STATUS_DEFERRED (1ULL<<44) /* declare an uncorrected error */ | ||
39 | #define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */ | ||
40 | |||
37 | /* | 41 | /* |
38 | * Note that the full MCACOD field of IA32_MCi_STATUS MSR is | 42 | * Note that the full MCACOD field of IA32_MCi_STATUS MSR is |
39 | * bits 15:0. But bit 12 is the 'F' bit, defined for corrected | 43 | * bits 15:0. But bit 12 is the 'F' bit, defined for corrected |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 09edd0b65fef..10b46906767f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -3,6 +3,8 @@ | |||
3 | 3 | ||
4 | enum severity_level { | 4 | enum severity_level { |
5 | MCE_NO_SEVERITY, | 5 | MCE_NO_SEVERITY, |
6 | MCE_DEFERRED_SEVERITY, | ||
7 | MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY, | ||
6 | MCE_KEEP_SEVERITY, | 8 | MCE_KEEP_SEVERITY, |
7 | MCE_SOME_SEVERITY, | 9 | MCE_SOME_SEVERITY, |
8 | MCE_AO_SEVERITY, | 10 | MCE_AO_SEVERITY, |
@@ -21,7 +23,7 @@ struct mce_bank { | |||
21 | char attrname[ATTR_LEN]; /* attribute name */ | 23 | char attrname[ATTR_LEN]; /* attribute name */ |
22 | }; | 24 | }; |
23 | 25 | ||
24 | int mce_severity(struct mce *a, int tolerant, char **msg); | 26 | int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp); |
25 | struct dentry *mce_get_debugfs_dir(void); | 27 | struct dentry *mce_get_debugfs_dir(void); |
26 | 28 | ||
27 | extern struct mce_bank *mce_banks; | 29 | extern struct mce_bank *mce_banks; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index c370e1c4468b..8bb433043a7f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -31,6 +31,7 @@ | |||
31 | 31 | ||
32 | enum context { IN_KERNEL = 1, IN_USER = 2 }; | 32 | enum context { IN_KERNEL = 1, IN_USER = 2 }; |
33 | enum ser { SER_REQUIRED = 1, NO_SER = 2 }; | 33 | enum ser { SER_REQUIRED = 1, NO_SER = 2 }; |
34 | enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 }; | ||
34 | 35 | ||
35 | static struct severity { | 36 | static struct severity { |
36 | u64 mask; | 37 | u64 mask; |
@@ -40,6 +41,7 @@ static struct severity { | |||
40 | unsigned char mcgres; | 41 | unsigned char mcgres; |
41 | unsigned char ser; | 42 | unsigned char ser; |
42 | unsigned char context; | 43 | unsigned char context; |
44 | unsigned char excp; | ||
43 | unsigned char covered; | 45 | unsigned char covered; |
44 | char *msg; | 46 | char *msg; |
45 | } severities[] = { | 47 | } severities[] = { |
@@ -48,6 +50,8 @@ static struct severity { | |||
48 | #define USER .context = IN_USER | 50 | #define USER .context = IN_USER |
49 | #define SER .ser = SER_REQUIRED | 51 | #define SER .ser = SER_REQUIRED |
50 | #define NOSER .ser = NO_SER | 52 | #define NOSER .ser = NO_SER |
53 | #define EXCP .excp = EXCP_CONTEXT | ||
54 | #define NOEXCP .excp = NO_EXCP | ||
51 | #define BITCLR(x) .mask = x, .result = 0 | 55 | #define BITCLR(x) .mask = x, .result = 0 |
52 | #define BITSET(x) .mask = x, .result = x | 56 | #define BITSET(x) .mask = x, .result = x |
53 | #define MCGMASK(x, y) .mcgmask = x, .mcgres = y | 57 | #define MCGMASK(x, y) .mcgmask = x, .mcgres = y |
@@ -62,7 +66,7 @@ static struct severity { | |||
62 | ), | 66 | ), |
63 | MCESEV( | 67 | MCESEV( |
64 | NO, "Not enabled", | 68 | NO, "Not enabled", |
65 | BITCLR(MCI_STATUS_EN) | 69 | EXCP, BITCLR(MCI_STATUS_EN) |
66 | ), | 70 | ), |
67 | MCESEV( | 71 | MCESEV( |
68 | PANIC, "Processor context corrupt", | 72 | PANIC, "Processor context corrupt", |
@@ -71,16 +75,20 @@ static struct severity { | |||
71 | /* When MCIP is not set something is very confused */ | 75 | /* When MCIP is not set something is very confused */ |
72 | MCESEV( | 76 | MCESEV( |
73 | PANIC, "MCIP not set in MCA handler", | 77 | PANIC, "MCIP not set in MCA handler", |
74 | MCGMASK(MCG_STATUS_MCIP, 0) | 78 | EXCP, MCGMASK(MCG_STATUS_MCIP, 0) |
75 | ), | 79 | ), |
76 | /* Neither return not error IP -- no chance to recover -> PANIC */ | 80 | /* Neither return not error IP -- no chance to recover -> PANIC */ |
77 | MCESEV( | 81 | MCESEV( |
78 | PANIC, "Neither restart nor error IP", | 82 | PANIC, "Neither restart nor error IP", |
79 | MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0) | 83 | EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0) |
80 | ), | 84 | ), |
81 | MCESEV( | 85 | MCESEV( |
82 | PANIC, "In kernel and no restart IP", | 86 | PANIC, "In kernel and no restart IP", |
83 | KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) | 87 | EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) |
88 | ), | ||
89 | MCESEV( | ||
90 | DEFERRED, "Deferred error", | ||
91 | NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED) | ||
84 | ), | 92 | ), |
85 | MCESEV( | 93 | MCESEV( |
86 | KEEP, "Corrected error", | 94 | KEEP, "Corrected error", |
@@ -89,7 +97,7 @@ static struct severity { | |||
89 | 97 | ||
90 | /* ignore OVER for UCNA */ | 98 | /* ignore OVER for UCNA */ |
91 | MCESEV( | 99 | MCESEV( |
92 | KEEP, "Uncorrected no action required", | 100 | UCNA, "Uncorrected no action required", |
93 | SER, MASK(MCI_UC_SAR, MCI_STATUS_UC) | 101 | SER, MASK(MCI_UC_SAR, MCI_STATUS_UC) |
94 | ), | 102 | ), |
95 | MCESEV( | 103 | MCESEV( |
@@ -178,8 +186,9 @@ static int error_context(struct mce *m) | |||
178 | return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; | 186 | return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; |
179 | } | 187 | } |
180 | 188 | ||
181 | int mce_severity(struct mce *m, int tolerant, char **msg) | 189 | int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp) |
182 | { | 190 | { |
191 | enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP); | ||
183 | enum context ctx = error_context(m); | 192 | enum context ctx = error_context(m); |
184 | struct severity *s; | 193 | struct severity *s; |
185 | 194 | ||
@@ -194,6 +203,8 @@ int mce_severity(struct mce *m, int tolerant, char **msg) | |||
194 | continue; | 203 | continue; |
195 | if (s->context && ctx != s->context) | 204 | if (s->context && ctx != s->context) |
196 | continue; | 205 | continue; |
206 | if (s->excp && excp != s->excp) | ||
207 | continue; | ||
197 | if (msg) | 208 | if (msg) |
198 | *msg = s->msg; | 209 | *msg = s->msg; |
199 | s->covered = 1; | 210 | s->covered = 1; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 61a9668cebfd..453e9bf90968 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -668,7 +668,8 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, | |||
668 | if (quirk_no_way_out) | 668 | if (quirk_no_way_out) |
669 | quirk_no_way_out(i, m, regs); | 669 | quirk_no_way_out(i, m, regs); |
670 | } | 670 | } |
671 | if (mce_severity(m, mca_cfg.tolerant, msg) >= MCE_PANIC_SEVERITY) | 671 | if (mce_severity(m, mca_cfg.tolerant, msg, true) >= |
672 | MCE_PANIC_SEVERITY) | ||
672 | ret = 1; | 673 | ret = 1; |
673 | } | 674 | } |
674 | return ret; | 675 | return ret; |
@@ -754,7 +755,7 @@ static void mce_reign(void) | |||
754 | for_each_possible_cpu(cpu) { | 755 | for_each_possible_cpu(cpu) { |
755 | int severity = mce_severity(&per_cpu(mces_seen, cpu), | 756 | int severity = mce_severity(&per_cpu(mces_seen, cpu), |
756 | mca_cfg.tolerant, | 757 | mca_cfg.tolerant, |
757 | &nmsg); | 758 | &nmsg, true); |
758 | if (severity > global_worst) { | 759 | if (severity > global_worst) { |
759 | msg = nmsg; | 760 | msg = nmsg; |
760 | global_worst = severity; | 761 | global_worst = severity; |
@@ -1095,13 +1096,14 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1095 | */ | 1096 | */ |
1096 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); | 1097 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); |
1097 | 1098 | ||
1098 | severity = mce_severity(&m, cfg->tolerant, NULL); | 1099 | severity = mce_severity(&m, cfg->tolerant, NULL, true); |
1099 | 1100 | ||
1100 | /* | 1101 | /* |
1101 | * When machine check was for corrected handler don't touch, | 1102 | * When machine check was for corrected/deferred handler don't |
1102 | * unless we're panicing. | 1103 | * touch, unless we're panicing. |
1103 | */ | 1104 | */ |
1104 | if (severity == MCE_KEEP_SEVERITY && !no_way_out) | 1105 | if ((severity == MCE_KEEP_SEVERITY || |
1106 | severity == MCE_UCNA_SEVERITY) && !no_way_out) | ||
1105 | continue; | 1107 | continue; |
1106 | __set_bit(i, toclear); | 1108 | __set_bit(i, toclear); |
1107 | if (severity == MCE_NO_SEVERITY) { | 1109 | if (severity == MCE_NO_SEVERITY) { |
diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 51b7e3a36e37..c2359a1ea6b3 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h | |||
@@ -32,9 +32,6 @@ | |||
32 | #define R4(x) (((x) >> 4) & 0xf) | 32 | #define R4(x) (((x) >> 4) & 0xf) |
33 | #define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!") | 33 | #define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!") |
34 | 34 | ||
35 | #define MCI_STATUS_DEFERRED BIT_64(44) | ||
36 | #define MCI_STATUS_POISON BIT_64(43) | ||
37 | |||
38 | extern const char * const pp_msgs[]; | 35 | extern const char * const pp_msgs[]; |
39 | 36 | ||
40 | enum tt_ids { | 37 | enum tt_ids { |