aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mcheck/mce.c
diff options
context:
space:
mode:
authorBorislav Petkov <bp@suse.de>2015-01-13 09:08:51 -0500
committerBorislav Petkov <bp@suse.de>2015-02-19 07:24:25 -0500
commit3f2f0680d1161df96a0e8fea16930f1bd487a9cf (patch)
tree29009c1b6dcc24a7dc93ba485983c6ea5f31e0f0 /arch/x86/kernel/cpu/mcheck/mce.c
parent0eac092d8307db61d320f77f9fce40e60b4ffa89 (diff)
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race condition in the CMCI storm adaptive period length thing. Yes, we have to admit, it is fragile and error prone. So let's simplify it. The simpler logic is: now, after we enter storm mode, we go straight to polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm mode as long as we see errors being logged while polling. Theoretically, if we see an uninterrupted error stream, we will remain in storm mode indefinitely and keep polling the MSRs. However, when the storm is actually a burst of errors, once we have logged them all, we back out of it after ~5 mins of polling and no more errors logged. If we encounter an error during those 5 minutes, we reset the polling interval to 5 mins. Making machine_check_poll() return a bool and denoting whether it has seen an error or not lets us simplify a bunch of code and move the storm handling private to mce_intel.c. Some minor cleanups while at it. Reported-by: Calvin Owens <calvinowens@fb.com> Tested-by: Tony Luck <tony.luck@intel.com> Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com Signed-off-by: Borislav Petkov <bp@suse.de>
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck/mce.c')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c86
1 files changed, 45 insertions, 41 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d2c611699cd9..d60cbb8d78f7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -58,7 +58,7 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
58#define CREATE_TRACE_POINTS 58#define CREATE_TRACE_POINTS
59#include <trace/events/mce.h> 59#include <trace/events/mce.h>
60 60
61#define SPINUNIT 100 /* 100ns */ 61#define SPINUNIT 100 /* 100ns */
62 62
63DEFINE_PER_CPU(unsigned, mce_exception_count); 63DEFINE_PER_CPU(unsigned, mce_exception_count);
64 64
@@ -87,9 +87,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
87static DEFINE_PER_CPU(struct mce, mces_seen); 87static DEFINE_PER_CPU(struct mce, mces_seen);
88static int cpu_missing; 88static int cpu_missing;
89 89
90/* CMCI storm detection filter */
91static DEFINE_PER_CPU(unsigned long, mce_polled_error);
92
93/* 90/*
94 * MCA banks polled by the period polling timer for corrected events. 91 * MCA banks polled by the period polling timer for corrected events.
95 * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). 92 * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
@@ -623,8 +620,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
623 * is already totally * confused. In this case it's likely it will 620 * is already totally * confused. In this case it's likely it will
624 * not fully execute the machine check handler either. 621 * not fully execute the machine check handler either.
625 */ 622 */
626void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) 623bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
627{ 624{
625 bool error_logged = false;
628 struct mce m; 626 struct mce m;
629 int severity; 627 int severity;
630 int i; 628 int i;
@@ -647,7 +645,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
647 if (!(m.status & MCI_STATUS_VAL)) 645 if (!(m.status & MCI_STATUS_VAL))
648 continue; 646 continue;
649 647
650 this_cpu_write(mce_polled_error, 1); 648
651 /* 649 /*
652 * Uncorrected or signalled events are handled by the exception 650 * Uncorrected or signalled events are handled by the exception
653 * handler when it is enabled, so don't process those here. 651 * handler when it is enabled, so don't process those here.
@@ -680,8 +678,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
680 * Don't get the IP here because it's unlikely to 678 * Don't get the IP here because it's unlikely to
681 * have anything to do with the actual error location. 679 * have anything to do with the actual error location.
682 */ 680 */
683 if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) 681 if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) {
682 error_logged = true;
684 mce_log(&m); 683 mce_log(&m);
684 }
685 685
686 /* 686 /*
687 * Clear state for this bank. 687 * Clear state for this bank.
@@ -695,6 +695,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
695 */ 695 */
696 696
697 sync_core(); 697 sync_core();
698
699 return error_logged;
698} 700}
699EXPORT_SYMBOL_GPL(machine_check_poll); 701EXPORT_SYMBOL_GPL(machine_check_poll);
700 702
@@ -1311,7 +1313,7 @@ void mce_log_therm_throt_event(__u64 status)
1311 * poller finds an MCE, poll 2x faster. When the poller finds no more 1313 * poller finds an MCE, poll 2x faster. When the poller finds no more
1312 * errors, poll 2x slower (up to check_interval seconds). 1314 * errors, poll 2x slower (up to check_interval seconds).
1313 */ 1315 */
1314static unsigned long check_interval = 5 * 60; /* 5 minutes */ 1316static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
1315 1317
1316static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ 1318static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
1317static DEFINE_PER_CPU(struct timer_list, mce_timer); 1319static DEFINE_PER_CPU(struct timer_list, mce_timer);
@@ -1321,49 +1323,57 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
1321 return interval; 1323 return interval;
1322} 1324}
1323 1325
1324static unsigned long (*mce_adjust_timer)(unsigned long interval) = 1326static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
1325 mce_adjust_timer_default;
1326 1327
1327static int cmc_error_seen(void) 1328static void __restart_timer(struct timer_list *t, unsigned long interval)
1328{ 1329{
1329 unsigned long *v = this_cpu_ptr(&mce_polled_error); 1330 unsigned long when = jiffies + interval;
1331 unsigned long flags;
1330 1332
1331 return test_and_clear_bit(0, v); 1333 local_irq_save(flags);
1334
1335 if (timer_pending(t)) {
1336 if (time_before(when, t->expires))
1337 mod_timer_pinned(t, when);
1338 } else {
1339 t->expires = round_jiffies(when);
1340 add_timer_on(t, smp_processor_id());
1341 }
1342
1343 local_irq_restore(flags);
1332} 1344}
1333 1345
1334static void mce_timer_fn(unsigned long data) 1346static void mce_timer_fn(unsigned long data)
1335{ 1347{
1336 struct timer_list *t = this_cpu_ptr(&mce_timer); 1348 struct timer_list *t = this_cpu_ptr(&mce_timer);
1349 int cpu = smp_processor_id();
1337 unsigned long iv; 1350 unsigned long iv;
1338 int notify;
1339 1351
1340 WARN_ON(smp_processor_id() != data); 1352 WARN_ON(cpu != data);
1353
1354 iv = __this_cpu_read(mce_next_interval);
1341 1355
1342 if (mce_available(this_cpu_ptr(&cpu_info))) { 1356 if (mce_available(this_cpu_ptr(&cpu_info))) {
1343 machine_check_poll(MCP_TIMESTAMP, 1357 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
1344 this_cpu_ptr(&mce_poll_banks)); 1358
1345 mce_intel_cmci_poll(); 1359 if (mce_intel_cmci_poll()) {
1360 iv = mce_adjust_timer(iv);
1361 goto done;
1362 }
1346 } 1363 }
1347 1364
1348 /* 1365 /*
1349 * Alert userspace if needed. If we logged an MCE, reduce the 1366 * Alert userspace if needed. If we logged an MCE, reduce the polling
1350 * polling interval, otherwise increase the polling interval. 1367 * interval, otherwise increase the polling interval.
1351 */ 1368 */
1352 iv = __this_cpu_read(mce_next_interval); 1369 if (mce_notify_irq())
1353 notify = mce_notify_irq();
1354 notify |= cmc_error_seen();
1355 if (notify) {
1356 iv = max(iv / 2, (unsigned long) HZ/100); 1370 iv = max(iv / 2, (unsigned long) HZ/100);
1357 } else { 1371 else
1358 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); 1372 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
1359 iv = mce_adjust_timer(iv); 1373
1360 } 1374done:
1361 __this_cpu_write(mce_next_interval, iv); 1375 __this_cpu_write(mce_next_interval, iv);
1362 /* Might have become 0 after CMCI storm subsided */ 1376 __restart_timer(t, iv);
1363 if (iv) {
1364 t->expires = jiffies + iv;
1365 add_timer_on(t, smp_processor_id());
1366 }
1367} 1377}
1368 1378
1369/* 1379/*
@@ -1372,16 +1382,10 @@ static void mce_timer_fn(unsigned long data)
1372void mce_timer_kick(unsigned long interval) 1382void mce_timer_kick(unsigned long interval)
1373{ 1383{
1374 struct timer_list *t = this_cpu_ptr(&mce_timer); 1384 struct timer_list *t = this_cpu_ptr(&mce_timer);
1375 unsigned long when = jiffies + interval;
1376 unsigned long iv = __this_cpu_read(mce_next_interval); 1385 unsigned long iv = __this_cpu_read(mce_next_interval);
1377 1386
1378 if (timer_pending(t)) { 1387 __restart_timer(t, interval);
1379 if (time_before(when, t->expires)) 1388
1380 mod_timer_pinned(t, when);
1381 } else {
1382 t->expires = round_jiffies(when);
1383 add_timer_on(t, smp_processor_id());
1384 }
1385 if (interval < iv) 1389 if (interval < iv)
1386 __this_cpu_write(mce_next_interval, interval); 1390 __this_cpu_write(mce_next_interval, interval);
1387} 1391}
@@ -1682,7 +1686,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1682 switch (c->x86_vendor) { 1686 switch (c->x86_vendor) {
1683 case X86_VENDOR_INTEL: 1687 case X86_VENDOR_INTEL:
1684 mce_intel_feature_init(c); 1688 mce_intel_feature_init(c);
1685 mce_adjust_timer = mce_intel_adjust_timer; 1689 mce_adjust_timer = cmci_intel_adjust_timer;
1686 break; 1690 break;
1687 case X86_VENDOR_AMD: 1691 case X86_VENDOR_AMD:
1688 mce_amd_feature_init(c); 1692 mce_amd_feature_init(c);