diff options
author | Chen, Gong <gong.chen@linux.intel.com> | 2014-03-27 21:24:36 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2014-03-28 16:40:16 -0400 |
commit | 27f6c573e0f77f7d1cc907c1494c99a61e48b7d8 (patch) | |
tree | 4af791c41a8dc43ae48d963c50a585c6a1c77041 /arch | |
parent | b098d6726bbfb94c06d6e1097466187afddae61f (diff) |
x86, CMCI: Add proper detection of end of CMCI storms
When CMCI storm persists for a long time(at least beyond predefined
threshold. It's 30 seconds for now), we can watch CMCI storm is
detected immediately after it subsides.
...
Dec 10 22:04:29 kernel: CMCI storm detected: switching to poll mode
Dec 10 22:04:59 kernel: CMCI storm subsided: switching to interrupt mode
Dec 10 22:04:59 kernel: CMCI storm detected: switching to poll mode
Dec 10 22:05:29 kernel: CMCI storm subsided: switching to interrupt mode
...
The problem is that our logic that determines that the storm has
ended is incorrect. We announce the end, re-enable interrupts and
realize that the storm is still going on, so we switch back to
polling mode. Rinse, repeat.
When a storm happens we disable signaling of errors via CMCI and begin
polling machine check banks instead. If we find any logged errors,
then we need to set a per-cpu flag so that our per-cpu tests that
check whether the storm is ongoing will see that errors are still
being logged independently of whether mce_notify_irq() says that the
error has been fully processed.
cmci_clear() is not the right tool to disable a bank. It disables the
interrupt for the bank as desired, but it also clears the bit for
this bank in "mce_banks_owned" so we will skip the bank when polling
(so we fail to see that the storm continues because we stop looking).
New cmci_storm_disable_banks() just disables the interrupt while
allowing polling to continue.
Reported-by: William Dauchy <wdauchy@gmail.com>
Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 19 |
2 files changed, 35 insertions, 2 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 4d5419b249da..78c92125db8a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -89,6 +89,9 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); | |||
89 | static DEFINE_PER_CPU(struct mce, mces_seen); | 89 | static DEFINE_PER_CPU(struct mce, mces_seen); |
90 | static int cpu_missing; | 90 | static int cpu_missing; |
91 | 91 | ||
92 | /* CMCI storm detection filter */ | ||
93 | static DEFINE_PER_CPU(unsigned long, mce_polled_error); | ||
94 | |||
92 | /* | 95 | /* |
93 | * MCA banks polled by the period polling timer for corrected events. | 96 | * MCA banks polled by the period polling timer for corrected events. |
94 | * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). | 97 | * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). |
@@ -595,6 +598,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
595 | { | 598 | { |
596 | struct mce m; | 599 | struct mce m; |
597 | int i; | 600 | int i; |
601 | unsigned long *v; | ||
598 | 602 | ||
599 | this_cpu_inc(mce_poll_count); | 603 | this_cpu_inc(mce_poll_count); |
600 | 604 | ||
@@ -614,6 +618,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
614 | if (!(m.status & MCI_STATUS_VAL)) | 618 | if (!(m.status & MCI_STATUS_VAL)) |
615 | continue; | 619 | continue; |
616 | 620 | ||
621 | v = &get_cpu_var(mce_polled_error); | ||
622 | set_bit(0, v); | ||
617 | /* | 623 | /* |
618 | * Uncorrected or signalled events are handled by the exception | 624 | * Uncorrected or signalled events are handled by the exception |
619 | * handler when it is enabled, so don't process those here. | 625 | * handler when it is enabled, so don't process those here. |
@@ -1278,10 +1284,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) | |||
1278 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = | 1284 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = |
1279 | mce_adjust_timer_default; | 1285 | mce_adjust_timer_default; |
1280 | 1286 | ||
1287 | static int cmc_error_seen(void) | ||
1288 | { | ||
1289 | unsigned long *v = &__get_cpu_var(mce_polled_error); | ||
1290 | |||
1291 | return test_and_clear_bit(0, v); | ||
1292 | } | ||
1293 | |||
1281 | static void mce_timer_fn(unsigned long data) | 1294 | static void mce_timer_fn(unsigned long data) |
1282 | { | 1295 | { |
1283 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1296 | struct timer_list *t = &__get_cpu_var(mce_timer); |
1284 | unsigned long iv; | 1297 | unsigned long iv; |
1298 | int notify; | ||
1285 | 1299 | ||
1286 | WARN_ON(smp_processor_id() != data); | 1300 | WARN_ON(smp_processor_id() != data); |
1287 | 1301 | ||
@@ -1296,7 +1310,9 @@ static void mce_timer_fn(unsigned long data) | |||
1296 | * polling interval, otherwise increase the polling interval. | 1310 | * polling interval, otherwise increase the polling interval. |
1297 | */ | 1311 | */ |
1298 | iv = __this_cpu_read(mce_next_interval); | 1312 | iv = __this_cpu_read(mce_next_interval); |
1299 | if (mce_notify_irq()) { | 1313 | notify = mce_notify_irq(); |
1314 | notify |= cmc_error_seen(); | ||
1315 | if (notify) { | ||
1300 | iv = max(iv / 2, (unsigned long) HZ/100); | 1316 | iv = max(iv / 2, (unsigned long) HZ/100); |
1301 | } else { | 1317 | } else { |
1302 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); | 1318 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index fb6156fee6f7..3bdb95ae8c43 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
10 | #include <linux/percpu.h> | 10 | #include <linux/percpu.h> |
11 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
12 | #include <linux/cpumask.h> | ||
12 | #include <asm/apic.h> | 13 | #include <asm/apic.h> |
13 | #include <asm/processor.h> | 14 | #include <asm/processor.h> |
14 | #include <asm/msr.h> | 15 | #include <asm/msr.h> |
@@ -137,6 +138,22 @@ unsigned long mce_intel_adjust_timer(unsigned long interval) | |||
137 | } | 138 | } |
138 | } | 139 | } |
139 | 140 | ||
141 | static void cmci_storm_disable_banks(void) | ||
142 | { | ||
143 | unsigned long flags, *owned; | ||
144 | int bank; | ||
145 | u64 val; | ||
146 | |||
147 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); | ||
148 | owned = __get_cpu_var(mce_banks_owned); | ||
149 | for_each_set_bit(bank, owned, MAX_NR_BANKS) { | ||
150 | rdmsrl(MSR_IA32_MCx_CTL2(bank), val); | ||
151 | val &= ~MCI_CTL2_CMCI_EN; | ||
152 | wrmsrl(MSR_IA32_MCx_CTL2(bank), val); | ||
153 | } | ||
154 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
155 | } | ||
156 | |||
140 | static bool cmci_storm_detect(void) | 157 | static bool cmci_storm_detect(void) |
141 | { | 158 | { |
142 | unsigned int cnt = __this_cpu_read(cmci_storm_cnt); | 159 | unsigned int cnt = __this_cpu_read(cmci_storm_cnt); |
@@ -158,7 +175,7 @@ static bool cmci_storm_detect(void) | |||
158 | if (cnt <= CMCI_STORM_THRESHOLD) | 175 | if (cnt <= CMCI_STORM_THRESHOLD) |
159 | return false; | 176 | return false; |
160 | 177 | ||
161 | cmci_clear(); | 178 | cmci_storm_disable_banks(); |
162 | __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); | 179 | __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); |
163 | r = atomic_add_return(1, &cmci_storm_on_cpus); | 180 | r = atomic_add_return(1, &cmci_storm_on_cpus); |
164 | mce_timer_kick(CMCI_POLL_INTERVAL); | 181 | mce_timer_kick(CMCI_POLL_INTERVAL); |