diff options
author | Borislav Petkov <bp@suse.de> | 2015-01-13 09:08:51 -0500 |
---|---|---|
committer | Borislav Petkov <bp@suse.de> | 2015-02-19 07:24:25 -0500 |
commit | 3f2f0680d1161df96a0e8fea16930f1bd487a9cf (patch) | |
tree | 29009c1b6dcc24a7dc93ba485983c6ea5f31e0f0 /arch/x86/kernel/cpu/mcheck/mce.c | |
parent | 0eac092d8307db61d320f77f9fce40e60b4ffa89 (diff) |
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck/mce.c')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 86 |
1 files changed, 45 insertions, 41 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d2c611699cd9..d60cbb8d78f7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -58,7 +58,7 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); | |||
58 | #define CREATE_TRACE_POINTS | 58 | #define CREATE_TRACE_POINTS |
59 | #include <trace/events/mce.h> | 59 | #include <trace/events/mce.h> |
60 | 60 | ||
61 | #define SPINUNIT 100 /* 100ns */ | 61 | #define SPINUNIT 100 /* 100ns */ |
62 | 62 | ||
63 | DEFINE_PER_CPU(unsigned, mce_exception_count); | 63 | DEFINE_PER_CPU(unsigned, mce_exception_count); |
64 | 64 | ||
@@ -87,9 +87,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); | |||
87 | static DEFINE_PER_CPU(struct mce, mces_seen); | 87 | static DEFINE_PER_CPU(struct mce, mces_seen); |
88 | static int cpu_missing; | 88 | static int cpu_missing; |
89 | 89 | ||
90 | /* CMCI storm detection filter */ | ||
91 | static DEFINE_PER_CPU(unsigned long, mce_polled_error); | ||
92 | |||
93 | /* | 90 | /* |
94 | * MCA banks polled by the period polling timer for corrected events. | 91 | * MCA banks polled by the period polling timer for corrected events. |
95 | * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). | 92 | * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). |
@@ -623,8 +620,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); | |||
623 | * is already totally * confused. In this case it's likely it will | 620 | * is already totally * confused. In this case it's likely it will |
624 | * not fully execute the machine check handler either. | 621 | * not fully execute the machine check handler either. |
625 | */ | 622 | */ |
626 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | 623 | bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) |
627 | { | 624 | { |
625 | bool error_logged = false; | ||
628 | struct mce m; | 626 | struct mce m; |
629 | int severity; | 627 | int severity; |
630 | int i; | 628 | int i; |
@@ -647,7 +645,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
647 | if (!(m.status & MCI_STATUS_VAL)) | 645 | if (!(m.status & MCI_STATUS_VAL)) |
648 | continue; | 646 | continue; |
649 | 647 | ||
650 | this_cpu_write(mce_polled_error, 1); | 648 | |
651 | /* | 649 | /* |
652 | * Uncorrected or signalled events are handled by the exception | 650 | * Uncorrected or signalled events are handled by the exception |
653 | * handler when it is enabled, so don't process those here. | 651 | * handler when it is enabled, so don't process those here. |
@@ -680,8 +678,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
680 | * Don't get the IP here because it's unlikely to | 678 | * Don't get the IP here because it's unlikely to |
681 | * have anything to do with the actual error location. | 679 | * have anything to do with the actual error location. |
682 | */ | 680 | */ |
683 | if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) | 681 | if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) { |
682 | error_logged = true; | ||
684 | mce_log(&m); | 683 | mce_log(&m); |
684 | } | ||
685 | 685 | ||
686 | /* | 686 | /* |
687 | * Clear state for this bank. | 687 | * Clear state for this bank. |
@@ -695,6 +695,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
695 | */ | 695 | */ |
696 | 696 | ||
697 | sync_core(); | 697 | sync_core(); |
698 | |||
699 | return error_logged; | ||
698 | } | 700 | } |
699 | EXPORT_SYMBOL_GPL(machine_check_poll); | 701 | EXPORT_SYMBOL_GPL(machine_check_poll); |
700 | 702 | ||
@@ -1311,7 +1313,7 @@ void mce_log_therm_throt_event(__u64 status) | |||
1311 | * poller finds an MCE, poll 2x faster. When the poller finds no more | 1313 | * poller finds an MCE, poll 2x faster. When the poller finds no more |
1312 | * errors, poll 2x slower (up to check_interval seconds). | 1314 | * errors, poll 2x slower (up to check_interval seconds). |
1313 | */ | 1315 | */ |
1314 | static unsigned long check_interval = 5 * 60; /* 5 minutes */ | 1316 | static unsigned long check_interval = INITIAL_CHECK_INTERVAL; |
1315 | 1317 | ||
1316 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ | 1318 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ |
1317 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1319 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
@@ -1321,49 +1323,57 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) | |||
1321 | return interval; | 1323 | return interval; |
1322 | } | 1324 | } |
1323 | 1325 | ||
1324 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = | 1326 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; |
1325 | mce_adjust_timer_default; | ||
1326 | 1327 | ||
1327 | static int cmc_error_seen(void) | 1328 | static void __restart_timer(struct timer_list *t, unsigned long interval) |
1328 | { | 1329 | { |
1329 | unsigned long *v = this_cpu_ptr(&mce_polled_error); | 1330 | unsigned long when = jiffies + interval; |
1331 | unsigned long flags; | ||
1330 | 1332 | ||
1331 | return test_and_clear_bit(0, v); | 1333 | local_irq_save(flags); |
1334 | |||
1335 | if (timer_pending(t)) { | ||
1336 | if (time_before(when, t->expires)) | ||
1337 | mod_timer_pinned(t, when); | ||
1338 | } else { | ||
1339 | t->expires = round_jiffies(when); | ||
1340 | add_timer_on(t, smp_processor_id()); | ||
1341 | } | ||
1342 | |||
1343 | local_irq_restore(flags); | ||
1332 | } | 1344 | } |
1333 | 1345 | ||
1334 | static void mce_timer_fn(unsigned long data) | 1346 | static void mce_timer_fn(unsigned long data) |
1335 | { | 1347 | { |
1336 | struct timer_list *t = this_cpu_ptr(&mce_timer); | 1348 | struct timer_list *t = this_cpu_ptr(&mce_timer); |
1349 | int cpu = smp_processor_id(); | ||
1337 | unsigned long iv; | 1350 | unsigned long iv; |
1338 | int notify; | ||
1339 | 1351 | ||
1340 | WARN_ON(smp_processor_id() != data); | 1352 | WARN_ON(cpu != data); |
1353 | |||
1354 | iv = __this_cpu_read(mce_next_interval); | ||
1341 | 1355 | ||
1342 | if (mce_available(this_cpu_ptr(&cpu_info))) { | 1356 | if (mce_available(this_cpu_ptr(&cpu_info))) { |
1343 | machine_check_poll(MCP_TIMESTAMP, | 1357 | machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks)); |
1344 | this_cpu_ptr(&mce_poll_banks)); | 1358 | |
1345 | mce_intel_cmci_poll(); | 1359 | if (mce_intel_cmci_poll()) { |
1360 | iv = mce_adjust_timer(iv); | ||
1361 | goto done; | ||
1362 | } | ||
1346 | } | 1363 | } |
1347 | 1364 | ||
1348 | /* | 1365 | /* |
1349 | * Alert userspace if needed. If we logged an MCE, reduce the | 1366 | * Alert userspace if needed. If we logged an MCE, reduce the polling |
1350 | * polling interval, otherwise increase the polling interval. | 1367 | * interval, otherwise increase the polling interval. |
1351 | */ | 1368 | */ |
1352 | iv = __this_cpu_read(mce_next_interval); | 1369 | if (mce_notify_irq()) |
1353 | notify = mce_notify_irq(); | ||
1354 | notify |= cmc_error_seen(); | ||
1355 | if (notify) { | ||
1356 | iv = max(iv / 2, (unsigned long) HZ/100); | 1370 | iv = max(iv / 2, (unsigned long) HZ/100); |
1357 | } else { | 1371 | else |
1358 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); | 1372 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); |
1359 | iv = mce_adjust_timer(iv); | 1373 | |
1360 | } | 1374 | done: |
1361 | __this_cpu_write(mce_next_interval, iv); | 1375 | __this_cpu_write(mce_next_interval, iv); |
1362 | /* Might have become 0 after CMCI storm subsided */ | 1376 | __restart_timer(t, iv); |
1363 | if (iv) { | ||
1364 | t->expires = jiffies + iv; | ||
1365 | add_timer_on(t, smp_processor_id()); | ||
1366 | } | ||
1367 | } | 1377 | } |
1368 | 1378 | ||
1369 | /* | 1379 | /* |
@@ -1372,16 +1382,10 @@ static void mce_timer_fn(unsigned long data) | |||
1372 | void mce_timer_kick(unsigned long interval) | 1382 | void mce_timer_kick(unsigned long interval) |
1373 | { | 1383 | { |
1374 | struct timer_list *t = this_cpu_ptr(&mce_timer); | 1384 | struct timer_list *t = this_cpu_ptr(&mce_timer); |
1375 | unsigned long when = jiffies + interval; | ||
1376 | unsigned long iv = __this_cpu_read(mce_next_interval); | 1385 | unsigned long iv = __this_cpu_read(mce_next_interval); |
1377 | 1386 | ||
1378 | if (timer_pending(t)) { | 1387 | __restart_timer(t, interval); |
1379 | if (time_before(when, t->expires)) | 1388 | |
1380 | mod_timer_pinned(t, when); | ||
1381 | } else { | ||
1382 | t->expires = round_jiffies(when); | ||
1383 | add_timer_on(t, smp_processor_id()); | ||
1384 | } | ||
1385 | if (interval < iv) | 1389 | if (interval < iv) |
1386 | __this_cpu_write(mce_next_interval, interval); | 1390 | __this_cpu_write(mce_next_interval, interval); |
1387 | } | 1391 | } |
@@ -1682,7 +1686,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
1682 | switch (c->x86_vendor) { | 1686 | switch (c->x86_vendor) { |
1683 | case X86_VENDOR_INTEL: | 1687 | case X86_VENDOR_INTEL: |
1684 | mce_intel_feature_init(c); | 1688 | mce_intel_feature_init(c); |
1685 | mce_adjust_timer = mce_intel_adjust_timer; | 1689 | mce_adjust_timer = cmci_intel_adjust_timer; |
1686 | break; | 1690 | break; |
1687 | case X86_VENDOR_AMD: | 1691 | case X86_VENDOR_AMD: |
1688 | mce_amd_feature_init(c); | 1692 | mce_amd_feature_init(c); |