aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/mce.h16
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h11
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c66
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c154
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c11
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c63
6 files changed, 214 insertions, 107 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 9b3de99dc004..1f5a86d518db 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -116,6 +116,12 @@ struct mca_config {
116 u32 rip_msr; 116 u32 rip_msr;
117}; 117};
118 118
119struct mce_vendor_flags {
120 __u64 overflow_recov : 1, /* cpuid_ebx(80000007) */
121 __reserved_0 : 63;
122};
123extern struct mce_vendor_flags mce_flags;
124
119extern struct mca_config mca_cfg; 125extern struct mca_config mca_cfg;
120extern void mce_register_decode_chain(struct notifier_block *nb); 126extern void mce_register_decode_chain(struct notifier_block *nb);
121extern void mce_unregister_decode_chain(struct notifier_block *nb); 127extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -128,9 +134,11 @@ extern int mce_p5_enabled;
128#ifdef CONFIG_X86_MCE 134#ifdef CONFIG_X86_MCE
129int mcheck_init(void); 135int mcheck_init(void);
130void mcheck_cpu_init(struct cpuinfo_x86 *c); 136void mcheck_cpu_init(struct cpuinfo_x86 *c);
137void mcheck_vendor_init_severity(void);
131#else 138#else
132static inline int mcheck_init(void) { return 0; } 139static inline int mcheck_init(void) { return 0; }
133static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} 140static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
141static inline void mcheck_vendor_init_severity(void) {}
134#endif 142#endif
135 143
136#ifdef CONFIG_X86_ANCIENT_MCE 144#ifdef CONFIG_X86_ANCIENT_MCE
@@ -183,11 +191,11 @@ typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
183DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); 191DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
184 192
185enum mcp_flags { 193enum mcp_flags {
186 MCP_TIMESTAMP = (1 << 0), /* log time stamp */ 194 MCP_TIMESTAMP = BIT(0), /* log time stamp */
187 MCP_UC = (1 << 1), /* log uncorrected errors */ 195 MCP_UC = BIT(1), /* log uncorrected errors */
188 MCP_DONTLOG = (1 << 2), /* only clear, don't log */ 196 MCP_DONTLOG = BIT(2), /* only clear, don't log */
189}; 197};
190void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); 198bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
191 199
192int mce_notify_irq(void); 200int mce_notify_irq(void);
193 201
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 10b46906767f..fe32074b865b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -14,6 +14,7 @@ enum severity_level {
14}; 14};
15 15
16#define ATTR_LEN 16 16#define ATTR_LEN 16
17#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
17 18
18/* One object for each MCE bank, shared by all CPUs */ 19/* One object for each MCE bank, shared by all CPUs */
19struct mce_bank { 20struct mce_bank {
@@ -23,20 +24,20 @@ struct mce_bank {
23 char attrname[ATTR_LEN]; /* attribute name */ 24 char attrname[ATTR_LEN]; /* attribute name */
24}; 25};
25 26
26int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp); 27extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
27struct dentry *mce_get_debugfs_dir(void); 28struct dentry *mce_get_debugfs_dir(void);
28 29
29extern struct mce_bank *mce_banks; 30extern struct mce_bank *mce_banks;
30extern mce_banks_t mce_banks_ce_disabled; 31extern mce_banks_t mce_banks_ce_disabled;
31 32
32#ifdef CONFIG_X86_MCE_INTEL 33#ifdef CONFIG_X86_MCE_INTEL
33unsigned long mce_intel_adjust_timer(unsigned long interval); 34unsigned long cmci_intel_adjust_timer(unsigned long interval);
34void mce_intel_cmci_poll(void); 35bool mce_intel_cmci_poll(void);
35void mce_intel_hcpu_update(unsigned long cpu); 36void mce_intel_hcpu_update(unsigned long cpu);
36void cmci_disable_bank(int bank); 37void cmci_disable_bank(int bank);
37#else 38#else
38# define mce_intel_adjust_timer mce_adjust_timer_default 39# define cmci_intel_adjust_timer mce_adjust_timer_default
39static inline void mce_intel_cmci_poll(void) { } 40static inline bool mce_intel_cmci_poll(void) { return false; }
40static inline void mce_intel_hcpu_update(unsigned long cpu) { } 41static inline void mce_intel_hcpu_update(unsigned long cpu) { }
41static inline void cmci_disable_bank(int bank) { } 42static inline void cmci_disable_bank(int bank) { }
42#endif 43#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 8bb433043a7f..9c682c222071 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -186,7 +186,61 @@ static int error_context(struct mce *m)
186 return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; 186 return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
187} 187}
188 188
189int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp) 189/*
190 * See AMD Error Scope Hierarchy table in a newer BKDG. For example
191 * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
192 */
193static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
194{
195 enum context ctx = error_context(m);
196
197 /* Processor Context Corrupt, no need to fumble too much, die! */
198 if (m->status & MCI_STATUS_PCC)
199 return MCE_PANIC_SEVERITY;
200
201 if (m->status & MCI_STATUS_UC) {
202
203 /*
204 * On older systems where overflow_recov flag is not present, we
205 * should simply panic if an error overflow occurs. If
206 * overflow_recov flag is present and set, then software can try
207 * to at least kill process to prolong system operation.
208 */
209 if (mce_flags.overflow_recov) {
210 /* software can try to contain */
211 if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
212 return MCE_PANIC_SEVERITY;
213
214 /* kill current process */
215 return MCE_AR_SEVERITY;
216 } else {
217 /* at least one error was not logged */
218 if (m->status & MCI_STATUS_OVER)
219 return MCE_PANIC_SEVERITY;
220 }
221
222 /*
223 * For any other case, return MCE_UC_SEVERITY so that we log the
224 * error and exit #MC handler.
225 */
226 return MCE_UC_SEVERITY;
227 }
228
229 /*
230 * deferred error: poll handler catches these and adds to mce_ring so
231 * memory-failure can take recovery actions.
232 */
233 if (m->status & MCI_STATUS_DEFERRED)
234 return MCE_DEFERRED_SEVERITY;
235
236 /*
237 * corrected error: poll handler catches these and passes responsibility
238 * of decoding the error to EDAC
239 */
240 return MCE_KEEP_SEVERITY;
241}
242
243static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
190{ 244{
191 enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP); 245 enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
192 enum context ctx = error_context(m); 246 enum context ctx = error_context(m);
@@ -216,6 +270,16 @@ int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp)
216 } 270 }
217} 271}
218 272
273/* Default to mce_severity_intel */
274int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
275 mce_severity_intel;
276
277void __init mcheck_vendor_init_severity(void)
278{
279 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
280 mce_severity = mce_severity_amd;
281}
282
219#ifdef CONFIG_DEBUG_FS 283#ifdef CONFIG_DEBUG_FS
220static void *s_start(struct seq_file *f, loff_t *pos) 284static void *s_start(struct seq_file *f, loff_t *pos)
221{ 285{
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3c036cb4a370..e535533d5ab8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -60,11 +60,12 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
60#define CREATE_TRACE_POINTS 60#define CREATE_TRACE_POINTS
61#include <trace/events/mce.h> 61#include <trace/events/mce.h>
62 62
63#define SPINUNIT 100 /* 100ns */ 63#define SPINUNIT 100 /* 100ns */
64 64
65DEFINE_PER_CPU(unsigned, mce_exception_count); 65DEFINE_PER_CPU(unsigned, mce_exception_count);
66 66
67struct mce_bank *mce_banks __read_mostly; 67struct mce_bank *mce_banks __read_mostly;
68struct mce_vendor_flags mce_flags __read_mostly;
68 69
69struct mca_config mca_cfg __read_mostly = { 70struct mca_config mca_cfg __read_mostly = {
70 .bootlog = -1, 71 .bootlog = -1,
@@ -89,9 +90,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
89static DEFINE_PER_CPU(struct mce, mces_seen); 90static DEFINE_PER_CPU(struct mce, mces_seen);
90static int cpu_missing; 91static int cpu_missing;
91 92
92/* CMCI storm detection filter */
93static DEFINE_PER_CPU(unsigned long, mce_polled_error);
94
95/* 93/*
96 * MCA banks polled by the period polling timer for corrected events. 94 * MCA banks polled by the period polling timer for corrected events.
97 * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). 95 * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
@@ -622,8 +620,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
622 * is already totally * confused. In this case it's likely it will 620 * is already totally * confused. In this case it's likely it will
623 * not fully execute the machine check handler either. 621 * not fully execute the machine check handler either.
624 */ 622 */
625void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) 623bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
626{ 624{
625 bool error_logged = false;
627 struct mce m; 626 struct mce m;
628 int severity; 627 int severity;
629 int i; 628 int i;
@@ -646,7 +645,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
646 if (!(m.status & MCI_STATUS_VAL)) 645 if (!(m.status & MCI_STATUS_VAL))
647 continue; 646 continue;
648 647
649 this_cpu_write(mce_polled_error, 1); 648
650 /* 649 /*
651 * Uncorrected or signalled events are handled by the exception 650 * Uncorrected or signalled events are handled by the exception
652 * handler when it is enabled, so don't process those here. 651 * handler when it is enabled, so don't process those here.
@@ -679,8 +678,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
679 * Don't get the IP here because it's unlikely to 678 * Don't get the IP here because it's unlikely to
680 * have anything to do with the actual error location. 679 * have anything to do with the actual error location.
681 */ 680 */
682 if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) 681 if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) {
682 error_logged = true;
683 mce_log(&m); 683 mce_log(&m);
684 }
684 685
685 /* 686 /*
686 * Clear state for this bank. 687 * Clear state for this bank.
@@ -694,6 +695,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
694 */ 695 */
695 696
696 sync_core(); 697 sync_core();
698
699 return error_logged;
697} 700}
698EXPORT_SYMBOL_GPL(machine_check_poll); 701EXPORT_SYMBOL_GPL(machine_check_poll);
699 702
@@ -813,7 +816,7 @@ static void mce_reign(void)
813 * other CPUs. 816 * other CPUs.
814 */ 817 */
815 if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) 818 if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
816 mce_panic("Fatal Machine check", m, msg); 819 mce_panic("Fatal machine check", m, msg);
817 820
818 /* 821 /*
819 * For UC somewhere we let the CPU who detects it handle it. 822 * For UC somewhere we let the CPU who detects it handle it.
@@ -826,7 +829,7 @@ static void mce_reign(void)
826 * source or one CPU is hung. Panic. 829 * source or one CPU is hung. Panic.
827 */ 830 */
828 if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3) 831 if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
829 mce_panic("Machine check from unknown source", NULL, NULL); 832 mce_panic("Fatal machine check from unknown source", NULL, NULL);
830 833
831 /* 834 /*
832 * Now clear all the mces_seen so that they don't reappear on 835 * Now clear all the mces_seen so that they don't reappear on
@@ -1258,7 +1261,7 @@ void mce_log_therm_throt_event(__u64 status)
1258 * poller finds an MCE, poll 2x faster. When the poller finds no more 1261 * poller finds an MCE, poll 2x faster. When the poller finds no more
1259 * errors, poll 2x slower (up to check_interval seconds). 1262 * errors, poll 2x slower (up to check_interval seconds).
1260 */ 1263 */
1261static unsigned long check_interval = 5 * 60; /* 5 minutes */ 1264static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
1262 1265
1263static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ 1266static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
1264static DEFINE_PER_CPU(struct timer_list, mce_timer); 1267static DEFINE_PER_CPU(struct timer_list, mce_timer);
@@ -1268,49 +1271,57 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
1268 return interval; 1271 return interval;
1269} 1272}
1270 1273
1271static unsigned long (*mce_adjust_timer)(unsigned long interval) = 1274static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
1272 mce_adjust_timer_default;
1273 1275
1274static int cmc_error_seen(void) 1276static void __restart_timer(struct timer_list *t, unsigned long interval)
1275{ 1277{
1276 unsigned long *v = this_cpu_ptr(&mce_polled_error); 1278 unsigned long when = jiffies + interval;
1279 unsigned long flags;
1280
1281 local_irq_save(flags);
1277 1282
1278 return test_and_clear_bit(0, v); 1283 if (timer_pending(t)) {
1284 if (time_before(when, t->expires))
1285 mod_timer_pinned(t, when);
1286 } else {
1287 t->expires = round_jiffies(when);
1288 add_timer_on(t, smp_processor_id());
1289 }
1290
1291 local_irq_restore(flags);
1279} 1292}
1280 1293
1281static void mce_timer_fn(unsigned long data) 1294static void mce_timer_fn(unsigned long data)
1282{ 1295{
1283 struct timer_list *t = this_cpu_ptr(&mce_timer); 1296 struct timer_list *t = this_cpu_ptr(&mce_timer);
1297 int cpu = smp_processor_id();
1284 unsigned long iv; 1298 unsigned long iv;
1285 int notify;
1286 1299
1287 WARN_ON(smp_processor_id() != data); 1300 WARN_ON(cpu != data);
1301
1302 iv = __this_cpu_read(mce_next_interval);
1288 1303
1289 if (mce_available(this_cpu_ptr(&cpu_info))) { 1304 if (mce_available(this_cpu_ptr(&cpu_info))) {
1290 machine_check_poll(MCP_TIMESTAMP, 1305 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
1291 this_cpu_ptr(&mce_poll_banks)); 1306
1292 mce_intel_cmci_poll(); 1307 if (mce_intel_cmci_poll()) {
1308 iv = mce_adjust_timer(iv);
1309 goto done;
1310 }
1293 } 1311 }
1294 1312
1295 /* 1313 /*
1296 * Alert userspace if needed. If we logged an MCE, reduce the 1314 * Alert userspace if needed. If we logged an MCE, reduce the polling
1297 * polling interval, otherwise increase the polling interval. 1315 * interval, otherwise increase the polling interval.
1298 */ 1316 */
1299 iv = __this_cpu_read(mce_next_interval); 1317 if (mce_notify_irq())
1300 notify = mce_notify_irq();
1301 notify |= cmc_error_seen();
1302 if (notify) {
1303 iv = max(iv / 2, (unsigned long) HZ/100); 1318 iv = max(iv / 2, (unsigned long) HZ/100);
1304 } else { 1319 else
1305 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); 1320 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
1306 iv = mce_adjust_timer(iv); 1321
1307 } 1322done:
1308 __this_cpu_write(mce_next_interval, iv); 1323 __this_cpu_write(mce_next_interval, iv);
1309 /* Might have become 0 after CMCI storm subsided */ 1324 __restart_timer(t, iv);
1310 if (iv) {
1311 t->expires = jiffies + iv;
1312 add_timer_on(t, smp_processor_id());
1313 }
1314} 1325}
1315 1326
1316/* 1327/*
@@ -1319,16 +1330,10 @@ static void mce_timer_fn(unsigned long data)
1319void mce_timer_kick(unsigned long interval) 1330void mce_timer_kick(unsigned long interval)
1320{ 1331{
1321 struct timer_list *t = this_cpu_ptr(&mce_timer); 1332 struct timer_list *t = this_cpu_ptr(&mce_timer);
1322 unsigned long when = jiffies + interval;
1323 unsigned long iv = __this_cpu_read(mce_next_interval); 1333 unsigned long iv = __this_cpu_read(mce_next_interval);
1324 1334
1325 if (timer_pending(t)) { 1335 __restart_timer(t, interval);
1326 if (time_before(when, t->expires)) 1336
1327 mod_timer_pinned(t, when);
1328 } else {
1329 t->expires = round_jiffies(when);
1330 add_timer_on(t, smp_processor_id());
1331 }
1332 if (interval < iv) 1337 if (interval < iv)
1333 __this_cpu_write(mce_next_interval, interval); 1338 __this_cpu_write(mce_next_interval, interval);
1334} 1339}
@@ -1525,45 +1530,46 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1525 * Various K7s with broken bank 0 around. Always disable 1530 * Various K7s with broken bank 0 around. Always disable
1526 * by default. 1531 * by default.
1527 */ 1532 */
1528 if (c->x86 == 6 && cfg->banks > 0) 1533 if (c->x86 == 6 && cfg->banks > 0)
1529 mce_banks[0].ctl = 0; 1534 mce_banks[0].ctl = 0;
1530 1535
1531 /* 1536 /*
1532 * Turn off MC4_MISC thresholding banks on those models since 1537 * overflow_recov is supported for F15h Models 00h-0fh
1533 * they're not supported there. 1538 * even though we don't have a CPUID bit for it.
1534 */ 1539 */
1535 if (c->x86 == 0x15 && 1540 if (c->x86 == 0x15 && c->x86_model <= 0xf)
1536 (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) { 1541 mce_flags.overflow_recov = 1;
1537 int i; 1542
1538 u64 val, hwcr; 1543 /*
1539 bool need_toggle; 1544 * Turn off MC4_MISC thresholding banks on those models since
1540 u32 msrs[] = { 1545 * they're not supported there.
1546 */
1547 if (c->x86 == 0x15 &&
1548 (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
1549 int i;
1550 u64 hwcr;
1551 bool need_toggle;
1552 u32 msrs[] = {
1541 0x00000413, /* MC4_MISC0 */ 1553 0x00000413, /* MC4_MISC0 */
1542 0xc0000408, /* MC4_MISC1 */ 1554 0xc0000408, /* MC4_MISC1 */
1543 }; 1555 };
1544 1556
1545 rdmsrl(MSR_K7_HWCR, hwcr); 1557 rdmsrl(MSR_K7_HWCR, hwcr);
1546 1558
1547 /* McStatusWrEn has to be set */ 1559 /* McStatusWrEn has to be set */
1548 need_toggle = !(hwcr & BIT(18)); 1560 need_toggle = !(hwcr & BIT(18));
1549 1561
1550 if (need_toggle) 1562 if (need_toggle)
1551 wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); 1563 wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
1552 1564
1553 for (i = 0; i < ARRAY_SIZE(msrs); i++) { 1565 /* Clear CntP bit safely */
1554 rdmsrl(msrs[i], val); 1566 for (i = 0; i < ARRAY_SIZE(msrs); i++)
1567 msr_clear_bit(msrs[i], 62);
1555 1568
1556 /* CntP bit set? */ 1569 /* restore old settings */
1557 if (val & BIT_64(62)) { 1570 if (need_toggle)
1558 val &= ~BIT_64(62); 1571 wrmsrl(MSR_K7_HWCR, hwcr);
1559 wrmsrl(msrs[i], val); 1572 }
1560 }
1561 }
1562
1563 /* restore old settings */
1564 if (need_toggle)
1565 wrmsrl(MSR_K7_HWCR, hwcr);
1566 }
1567 } 1573 }
1568 1574
1569 if (c->x86_vendor == X86_VENDOR_INTEL) { 1575 if (c->x86_vendor == X86_VENDOR_INTEL) {
@@ -1629,10 +1635,11 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1629 switch (c->x86_vendor) { 1635 switch (c->x86_vendor) {
1630 case X86_VENDOR_INTEL: 1636 case X86_VENDOR_INTEL:
1631 mce_intel_feature_init(c); 1637 mce_intel_feature_init(c);
1632 mce_adjust_timer = mce_intel_adjust_timer; 1638 mce_adjust_timer = cmci_intel_adjust_timer;
1633 break; 1639 break;
1634 case X86_VENDOR_AMD: 1640 case X86_VENDOR_AMD:
1635 mce_amd_feature_init(c); 1641 mce_amd_feature_init(c);
1642 mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1;
1636 break; 1643 break;
1637 default: 1644 default:
1638 break; 1645 break;
@@ -2017,6 +2024,7 @@ __setup("mce", mcheck_enable);
2017int __init mcheck_init(void) 2024int __init mcheck_init(void)
2018{ 2025{
2019 mcheck_intel_therm_init(); 2026 mcheck_intel_therm_init();
2027 mcheck_vendor_init_severity();
2020 2028
2021 return 0; 2029 return 0;
2022} 2030}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f1c3769bbd64..55ad9b37cae8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -79,7 +79,7 @@ static inline bool is_shared_bank(int bank)
79 return (bank == 4); 79 return (bank == 4);
80} 80}
81 81
82static const char * const bank4_names(struct threshold_block *b) 82static const char *bank4_names(const struct threshold_block *b)
83{ 83{
84 switch (b->address) { 84 switch (b->address) {
85 /* MSR4_MISC0 */ 85 /* MSR4_MISC0 */
@@ -250,6 +250,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
250 if (!b.interrupt_capable) 250 if (!b.interrupt_capable)
251 goto init; 251 goto init;
252 252
253 b.interrupt_enable = 1;
253 new = (high & MASK_LVTOFF_HI) >> 20; 254 new = (high & MASK_LVTOFF_HI) >> 20;
254 offset = setup_APIC_mce(offset, new); 255 offset = setup_APIC_mce(offset, new);
255 256
@@ -322,6 +323,8 @@ static void amd_threshold_interrupt(void)
322log: 323log:
323 mce_setup(&m); 324 mce_setup(&m);
324 rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); 325 rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
326 if (!(m.status & MCI_STATUS_VAL))
327 return;
325 m.misc = ((u64)high << 32) | low; 328 m.misc = ((u64)high << 32) | low;
326 m.bank = bank; 329 m.bank = bank;
327 mce_log(&m); 330 mce_log(&m);
@@ -497,10 +500,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
497 b->interrupt_capable = lvt_interrupt_supported(bank, high); 500 b->interrupt_capable = lvt_interrupt_supported(bank, high);
498 b->threshold_limit = THRESHOLD_MAX; 501 b->threshold_limit = THRESHOLD_MAX;
499 502
500 if (b->interrupt_capable) 503 if (b->interrupt_capable) {
501 threshold_ktype.default_attrs[2] = &interrupt_enable.attr; 504 threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
502 else 505 b->interrupt_enable = 1;
506 } else {
503 threshold_ktype.default_attrs[2] = NULL; 507 threshold_ktype.default_attrs[2] = NULL;
508 }
504 509
505 INIT_LIST_HEAD(&b->miscj); 510 INIT_LIST_HEAD(&b->miscj);
506 511
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index b3c97bafc123..b4a41cf030ed 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -39,6 +39,15 @@
39static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); 39static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
40 40
41/* 41/*
42 * CMCI storm detection backoff counter
43 *
44 * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
45 * encountered an error. If not, we decrement it by one. We signal the end of
46 * the CMCI storm when it reaches 0.
47 */
48static DEFINE_PER_CPU(int, cmci_backoff_cnt);
49
50/*
42 * cmci_discover_lock protects against parallel discovery attempts 51 * cmci_discover_lock protects against parallel discovery attempts
43 * which could race against each other. 52 * which could race against each other.
44 */ 53 */
@@ -46,7 +55,7 @@ static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
46 55
47#define CMCI_THRESHOLD 1 56#define CMCI_THRESHOLD 1
48#define CMCI_POLL_INTERVAL (30 * HZ) 57#define CMCI_POLL_INTERVAL (30 * HZ)
49#define CMCI_STORM_INTERVAL (1 * HZ) 58#define CMCI_STORM_INTERVAL (HZ)
50#define CMCI_STORM_THRESHOLD 15 59#define CMCI_STORM_THRESHOLD 15
51 60
52static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); 61static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
@@ -82,11 +91,21 @@ static int cmci_supported(int *banks)
82 return !!(cap & MCG_CMCI_P); 91 return !!(cap & MCG_CMCI_P);
83} 92}
84 93
85void mce_intel_cmci_poll(void) 94bool mce_intel_cmci_poll(void)
86{ 95{
87 if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) 96 if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
88 return; 97 return false;
89 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 98
99 /*
100 * Reset the counter if we've logged an error in the last poll
101 * during the storm.
102 */
103 if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)))
104 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
105 else
106 this_cpu_dec(cmci_backoff_cnt);
107
108 return true;
90} 109}
91 110
92void mce_intel_hcpu_update(unsigned long cpu) 111void mce_intel_hcpu_update(unsigned long cpu)
@@ -97,31 +116,32 @@ void mce_intel_hcpu_update(unsigned long cpu)
97 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; 116 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
98} 117}
99 118
100unsigned long mce_intel_adjust_timer(unsigned long interval) 119unsigned long cmci_intel_adjust_timer(unsigned long interval)
101{ 120{
102 int r; 121 if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
103 122 (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
104 if (interval < CMCI_POLL_INTERVAL) 123 mce_notify_irq();
105 return interval; 124 return CMCI_STORM_INTERVAL;
125 }
106 126
107 switch (__this_cpu_read(cmci_storm_state)) { 127 switch (__this_cpu_read(cmci_storm_state)) {
108 case CMCI_STORM_ACTIVE: 128 case CMCI_STORM_ACTIVE:
129
109 /* 130 /*
110 * We switch back to interrupt mode once the poll timer has 131 * We switch back to interrupt mode once the poll timer has
111 * silenced itself. That means no events recorded and the 132 * silenced itself. That means no events recorded and the timer
112 * timer interval is back to our poll interval. 133 * interval is back to our poll interval.
113 */ 134 */
114 __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); 135 __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
115 r = atomic_sub_return(1, &cmci_storm_on_cpus); 136 if (!atomic_sub_return(1, &cmci_storm_on_cpus))
116 if (r == 0)
117 pr_notice("CMCI storm subsided: switching to interrupt mode\n"); 137 pr_notice("CMCI storm subsided: switching to interrupt mode\n");
138
118 /* FALLTHROUGH */ 139 /* FALLTHROUGH */
119 140
120 case CMCI_STORM_SUBSIDED: 141 case CMCI_STORM_SUBSIDED:
121 /* 142 /*
122 * We wait for all cpus to go back to SUBSIDED 143 * We wait for all CPUs to go back to SUBSIDED state. When that
123 * state. When that happens we switch back to 144 * happens we switch back to interrupt mode.
124 * interrupt mode.
125 */ 145 */
126 if (!atomic_read(&cmci_storm_on_cpus)) { 146 if (!atomic_read(&cmci_storm_on_cpus)) {
127 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); 147 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
@@ -130,10 +150,8 @@ unsigned long mce_intel_adjust_timer(unsigned long interval)
130 } 150 }
131 return CMCI_POLL_INTERVAL; 151 return CMCI_POLL_INTERVAL;
132 default: 152 default:
133 /* 153
134 * We have shiny weather. Let the poll do whatever it 154 /* We have shiny weather. Let the poll do whatever it thinks. */
135 * thinks.
136 */
137 return interval; 155 return interval;
138 } 156 }
139} 157}
@@ -178,7 +196,8 @@ static bool cmci_storm_detect(void)
178 cmci_storm_disable_banks(); 196 cmci_storm_disable_banks();
179 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); 197 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
180 r = atomic_add_return(1, &cmci_storm_on_cpus); 198 r = atomic_add_return(1, &cmci_storm_on_cpus);
181 mce_timer_kick(CMCI_POLL_INTERVAL); 199 mce_timer_kick(CMCI_STORM_INTERVAL);
200 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
182 201
183 if (r == 1) 202 if (r == 1)
184 pr_notice("CMCI storm detected: switching to poll mode\n"); 203 pr_notice("CMCI storm detected: switching to poll mode\n");
@@ -195,6 +214,7 @@ static void intel_threshold_interrupt(void)
195{ 214{
196 if (cmci_storm_detect()) 215 if (cmci_storm_detect())
197 return; 216 return;
217
198 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 218 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
199 mce_notify_irq(); 219 mce_notify_irq();
200} 220}
@@ -286,6 +306,7 @@ void cmci_recheck(void)
286 306
287 if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) 307 if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
288 return; 308 return;
309
289 local_irq_save(flags); 310 local_irq_save(flags);
290 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 311 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
291 local_irq_restore(flags); 312 local_irq_restore(flags);