aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/x86/x86_64/boot-options.txt7
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/mce.h13
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h12
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c94
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c168
7 files changed, 244 insertions, 59 deletions
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index c54b4f503e2a..de38429beb71 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -50,6 +50,13 @@ Machine check
50 monarchtimeout: 50 monarchtimeout:
51 Sets the time in us to wait for other CPUs on machine checks. 0 51 Sets the time in us to wait for other CPUs on machine checks. 0
52 to disable. 52 to disable.
53 mce=bios_cmci_threshold
54 Don't overwrite the bios-set CMCI threshold. This boot option
55 prevents Linux from overwriting the CMCI threshold set by the
56 bios. Without this option, Linux always sets the CMCI
57 threshold to 1. Enabling this may make memory predictive failure
58 analysis less effective if the bios sets thresholds for memory
59 errors since we will not see details for all errors.
53 60
54 nomce (for compatibility with i386): same as mce=off 61 nomce (for compatibility with i386): same as mce=off
55 62
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 57fecc1db94d..6cd6f24e1223 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -874,6 +874,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
874 874
875config X86_MCE 875config X86_MCE
876 bool "Machine Check / overheating reporting" 876 bool "Machine Check / overheating reporting"
877 default y
877 ---help--- 878 ---help---
878 Machine Check support allows the processor to notify the 879 Machine Check support allows the processor to notify the
879 kernel if it detects a problem (e.g. overheating, data corruption). 880 kernel if it detects a problem (e.g. overheating, data corruption).
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index a3ac52b29cbf..54d73b1f00a0 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -116,19 +116,9 @@ struct mce_log {
116/* Software defined banks */ 116/* Software defined banks */
117#define MCE_EXTENDED_BANK 128 117#define MCE_EXTENDED_BANK 128
118#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 118#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0
119 119#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
120#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */
121#define K8_MCE_THRESHOLD_BANK_0 (MCE_THRESHOLD_BASE + 0 * 9)
122#define K8_MCE_THRESHOLD_BANK_1 (MCE_THRESHOLD_BASE + 1 * 9)
123#define K8_MCE_THRESHOLD_BANK_2 (MCE_THRESHOLD_BASE + 2 * 9)
124#define K8_MCE_THRESHOLD_BANK_3 (MCE_THRESHOLD_BASE + 3 * 9)
125#define K8_MCE_THRESHOLD_BANK_4 (MCE_THRESHOLD_BASE + 4 * 9)
126#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
127#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
128
129 120
130#ifdef __KERNEL__ 121#ifdef __KERNEL__
131
132extern void mce_register_decode_chain(struct notifier_block *nb); 122extern void mce_register_decode_chain(struct notifier_block *nb);
133extern void mce_unregister_decode_chain(struct notifier_block *nb); 123extern void mce_unregister_decode_chain(struct notifier_block *nb);
134 124
@@ -171,6 +161,7 @@ DECLARE_PER_CPU(struct device *, mce_device);
171#ifdef CONFIG_X86_MCE_INTEL 161#ifdef CONFIG_X86_MCE_INTEL
172extern int mce_cmci_disabled; 162extern int mce_cmci_disabled;
173extern int mce_ignore_ce; 163extern int mce_ignore_ce;
164extern int mce_bios_cmci_threshold;
174void mce_intel_feature_init(struct cpuinfo_x86 *c); 165void mce_intel_feature_init(struct cpuinfo_x86 *c);
175void cmci_clear(void); 166void cmci_clear(void);
176void cmci_reenable(void); 167void cmci_reenable(void);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index fc4beb393577..ddc72f839332 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
78} 78}
79 79
80static cpumask_var_t mce_inject_cpumask; 80static cpumask_var_t mce_inject_cpumask;
81static DEFINE_MUTEX(mce_inject_mutex);
81 82
82static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) 83static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
83{ 84{
@@ -194,7 +195,11 @@ static void raise_mce(struct mce *m)
194 put_online_cpus(); 195 put_online_cpus();
195 } else 196 } else
196#endif 197#endif
198 {
199 preempt_disable();
197 raise_local(); 200 raise_local();
201 preempt_enable();
202 }
198} 203}
199 204
200/* Error injection interface */ 205/* Error injection interface */
@@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
225 * so do it a jiffie or two later everywhere. 230 * so do it a jiffie or two later everywhere.
226 */ 231 */
227 schedule_timeout(2); 232 schedule_timeout(2);
233
234 mutex_lock(&mce_inject_mutex);
228 raise_mce(&m); 235 raise_mce(&m);
236 mutex_unlock(&mce_inject_mutex);
229 return usize; 237 return usize;
230} 238}
231 239
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index ed44c8a65858..6a05c1d327a9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,6 +28,18 @@ extern int mce_ser;
28 28
29extern struct mce_bank *mce_banks; 29extern struct mce_bank *mce_banks;
30 30
31#ifdef CONFIG_X86_MCE_INTEL
32unsigned long mce_intel_adjust_timer(unsigned long interval);
33void mce_intel_cmci_poll(void);
34void mce_intel_hcpu_update(unsigned long cpu);
35#else
36# define mce_intel_adjust_timer mce_adjust_timer_default
37static inline void mce_intel_cmci_poll(void) { }
38static inline void mce_intel_hcpu_update(unsigned long cpu) { }
39#endif
40
41void mce_timer_kick(unsigned long interval);
42
31#ifdef CONFIG_ACPI_APEI 43#ifdef CONFIG_ACPI_APEI
32int apei_write_mce(struct mce *m); 44int apei_write_mce(struct mce *m);
33ssize_t apei_read_mce(struct mce *m, u64 *record_id); 45ssize_t apei_read_mce(struct mce *m, u64 *record_id);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 292d0258311c..29e87d3b2843 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly;
83int mce_cmci_disabled __read_mostly; 83int mce_cmci_disabled __read_mostly;
84int mce_ignore_ce __read_mostly; 84int mce_ignore_ce __read_mostly;
85int mce_ser __read_mostly; 85int mce_ser __read_mostly;
86int mce_bios_cmci_threshold __read_mostly;
86 87
87struct mce_bank *mce_banks __read_mostly; 88struct mce_bank *mce_banks __read_mostly;
88 89
@@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */
1266static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ 1267static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
1267static DEFINE_PER_CPU(struct timer_list, mce_timer); 1268static DEFINE_PER_CPU(struct timer_list, mce_timer);
1268 1269
1270static unsigned long mce_adjust_timer_default(unsigned long interval)
1271{
1272 return interval;
1273}
1274
1275static unsigned long (*mce_adjust_timer)(unsigned long interval) =
1276 mce_adjust_timer_default;
1277
1269static void mce_timer_fn(unsigned long data) 1278static void mce_timer_fn(unsigned long data)
1270{ 1279{
1271 struct timer_list *t = &__get_cpu_var(mce_timer); 1280 struct timer_list *t = &__get_cpu_var(mce_timer);
@@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data)
1276 if (mce_available(__this_cpu_ptr(&cpu_info))) { 1285 if (mce_available(__this_cpu_ptr(&cpu_info))) {
1277 machine_check_poll(MCP_TIMESTAMP, 1286 machine_check_poll(MCP_TIMESTAMP,
1278 &__get_cpu_var(mce_poll_banks)); 1287 &__get_cpu_var(mce_poll_banks));
1288 mce_intel_cmci_poll();
1279 } 1289 }
1280 1290
1281 /* 1291 /*
@@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data)
1283 * polling interval, otherwise increase the polling interval. 1293 * polling interval, otherwise increase the polling interval.
1284 */ 1294 */
1285 iv = __this_cpu_read(mce_next_interval); 1295 iv = __this_cpu_read(mce_next_interval);
1286 if (mce_notify_irq()) 1296 if (mce_notify_irq()) {
1287 iv = max(iv / 2, (unsigned long) HZ/100); 1297 iv = max(iv / 2, (unsigned long) HZ/100);
1288 else 1298 } else {
1289 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); 1299 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
1300 iv = mce_adjust_timer(iv);
1301 }
1290 __this_cpu_write(mce_next_interval, iv); 1302 __this_cpu_write(mce_next_interval, iv);
1303 /* Might have become 0 after CMCI storm subsided */
1304 if (iv) {
1305 t->expires = jiffies + iv;
1306 add_timer_on(t, smp_processor_id());
1307 }
1308}
1291 1309
1292 t->expires = jiffies + iv; 1310/*
1293 add_timer_on(t, smp_processor_id()); 1311 * Ensure that the timer is firing in @interval from now.
1312 */
1313void mce_timer_kick(unsigned long interval)
1314{
1315 struct timer_list *t = &__get_cpu_var(mce_timer);
1316 unsigned long when = jiffies + interval;
1317 unsigned long iv = __this_cpu_read(mce_next_interval);
1318
1319 if (timer_pending(t)) {
1320 if (time_before(when, t->expires))
1321 mod_timer_pinned(t, when);
1322 } else {
1323 t->expires = round_jiffies(when);
1324 add_timer_on(t, smp_processor_id());
1325 }
1326 if (interval < iv)
1327 __this_cpu_write(mce_next_interval, interval);
1294} 1328}
1295 1329
1296/* Must not be called in IRQ context where del_timer_sync() can deadlock */ 1330/* Must not be called in IRQ context where del_timer_sync() can deadlock */
@@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1585 switch (c->x86_vendor) { 1619 switch (c->x86_vendor) {
1586 case X86_VENDOR_INTEL: 1620 case X86_VENDOR_INTEL:
1587 mce_intel_feature_init(c); 1621 mce_intel_feature_init(c);
1622 mce_adjust_timer = mce_intel_adjust_timer;
1588 break; 1623 break;
1589 case X86_VENDOR_AMD: 1624 case X86_VENDOR_AMD:
1590 mce_amd_feature_init(c); 1625 mce_amd_feature_init(c);
@@ -1594,23 +1629,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1594 } 1629 }
1595} 1630}
1596 1631
1597static void __mcheck_cpu_init_timer(void) 1632static void mce_start_timer(unsigned int cpu, struct timer_list *t)
1598{ 1633{
1599 struct timer_list *t = &__get_cpu_var(mce_timer); 1634 unsigned long iv = mce_adjust_timer(check_interval * HZ);
1600 unsigned long iv = check_interval * HZ;
1601 1635
1602 setup_timer(t, mce_timer_fn, smp_processor_id()); 1636 __this_cpu_write(mce_next_interval, iv);
1603 1637
1604 if (mce_ignore_ce) 1638 if (mce_ignore_ce || !iv)
1605 return; 1639 return;
1606 1640
1607 __this_cpu_write(mce_next_interval, iv);
1608 if (!iv)
1609 return;
1610 t->expires = round_jiffies(jiffies + iv); 1641 t->expires = round_jiffies(jiffies + iv);
1611 add_timer_on(t, smp_processor_id()); 1642 add_timer_on(t, smp_processor_id());
1612} 1643}
1613 1644
1645static void __mcheck_cpu_init_timer(void)
1646{
1647 struct timer_list *t = &__get_cpu_var(mce_timer);
1648 unsigned int cpu = smp_processor_id();
1649
1650 setup_timer(t, mce_timer_fn, cpu);
1651 mce_start_timer(cpu, t);
1652}
1653
1614/* Handle unconfigured int18 (should never happen) */ 1654/* Handle unconfigured int18 (should never happen) */
1615static void unexpected_machine_check(struct pt_regs *regs, long error_code) 1655static void unexpected_machine_check(struct pt_regs *regs, long error_code)
1616{ 1656{
@@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = {
1907 * check, or 0 to not wait 1947 * check, or 0 to not wait
1908 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. 1948 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
1909 * mce=nobootlog Don't log MCEs from before booting. 1949 * mce=nobootlog Don't log MCEs from before booting.
1950 * mce=bios_cmci_threshold Don't program the CMCI threshold
1910 */ 1951 */
1911static int __init mcheck_enable(char *str) 1952static int __init mcheck_enable(char *str)
1912{ 1953{
@@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str)
1926 mce_ignore_ce = 1; 1967 mce_ignore_ce = 1;
1927 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) 1968 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
1928 mce_bootlog = (str[0] == 'b'); 1969 mce_bootlog = (str[0] == 'b');
1970 else if (!strcmp(str, "bios_cmci_threshold"))
1971 mce_bios_cmci_threshold = 1;
1929 else if (isdigit(str[0])) { 1972 else if (isdigit(str[0])) {
1930 get_option(&str, &tolerant); 1973 get_option(&str, &tolerant);
1931 if (*str == ',') { 1974 if (*str == ',') {
@@ -2166,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
2166 &mce_cmci_disabled 2209 &mce_cmci_disabled
2167}; 2210};
2168 2211
2212static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
2213 __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
2214 &mce_bios_cmci_threshold
2215};
2216
2169static struct device_attribute *mce_device_attrs[] = { 2217static struct device_attribute *mce_device_attrs[] = {
2170 &dev_attr_tolerant.attr, 2218 &dev_attr_tolerant.attr,
2171 &dev_attr_check_interval.attr, 2219 &dev_attr_check_interval.attr,
@@ -2174,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = {
2174 &dev_attr_dont_log_ce.attr, 2222 &dev_attr_dont_log_ce.attr,
2175 &dev_attr_ignore_ce.attr, 2223 &dev_attr_ignore_ce.attr,
2176 &dev_attr_cmci_disabled.attr, 2224 &dev_attr_cmci_disabled.attr,
2225 &dev_attr_bios_cmci_threshold.attr,
2177 NULL 2226 NULL
2178}; 2227};
2179 2228
@@ -2294,38 +2343,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2294 unsigned int cpu = (unsigned long)hcpu; 2343 unsigned int cpu = (unsigned long)hcpu;
2295 struct timer_list *t = &per_cpu(mce_timer, cpu); 2344 struct timer_list *t = &per_cpu(mce_timer, cpu);
2296 2345
2297 switch (action) { 2346 switch (action & ~CPU_TASKS_FROZEN) {
2298 case CPU_ONLINE: 2347 case CPU_ONLINE:
2299 case CPU_ONLINE_FROZEN:
2300 mce_device_create(cpu); 2348 mce_device_create(cpu);
2301 if (threshold_cpu_callback) 2349 if (threshold_cpu_callback)
2302 threshold_cpu_callback(action, cpu); 2350 threshold_cpu_callback(action, cpu);
2303 break; 2351 break;
2304 case CPU_DEAD: 2352 case CPU_DEAD:
2305 case CPU_DEAD_FROZEN:
2306 if (threshold_cpu_callback) 2353 if (threshold_cpu_callback)
2307 threshold_cpu_callback(action, cpu); 2354 threshold_cpu_callback(action, cpu);
2308 mce_device_remove(cpu); 2355 mce_device_remove(cpu);
2356 mce_intel_hcpu_update(cpu);
2309 break; 2357 break;
2310 case CPU_DOWN_PREPARE: 2358 case CPU_DOWN_PREPARE:
2311 case CPU_DOWN_PREPARE_FROZEN:
2312 del_timer_sync(t);
2313 smp_call_function_single(cpu, mce_disable_cpu, &action, 1); 2359 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
2360 del_timer_sync(t);
2314 break; 2361 break;
2315 case CPU_DOWN_FAILED: 2362 case CPU_DOWN_FAILED:
2316 case CPU_DOWN_FAILED_FROZEN:
2317 if (!mce_ignore_ce && check_interval) {
2318 t->expires = round_jiffies(jiffies +
2319 per_cpu(mce_next_interval, cpu));
2320 add_timer_on(t, cpu);
2321 }
2322 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); 2363 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
2364 mce_start_timer(cpu, t);
2323 break; 2365 break;
2324 case CPU_POST_DEAD: 2366 }
2367
2368 if (action == CPU_POST_DEAD) {
2325 /* intentionally ignoring frozen here */ 2369 /* intentionally ignoring frozen here */
2326 cmci_rediscover(cpu); 2370 cmci_rediscover(cpu);
2327 break;
2328 } 2371 }
2372
2329 return NOTIFY_OK; 2373 return NOTIFY_OK;
2330} 2374}
2331 2375
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 38e49bc95ffc..5f88abf07e9c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -15,6 +15,8 @@
15#include <asm/msr.h> 15#include <asm/msr.h>
16#include <asm/mce.h> 16#include <asm/mce.h>
17 17
18#include "mce-internal.h"
19
18/* 20/*
19 * Support for Intel Correct Machine Check Interrupts. This allows 21 * Support for Intel Correct Machine Check Interrupts. This allows
20 * the CPU to raise an interrupt when a corrected machine check happened. 22 * the CPU to raise an interrupt when a corrected machine check happened.
@@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
30 */ 32 */
31static DEFINE_RAW_SPINLOCK(cmci_discover_lock); 33static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
32 34
33#define CMCI_THRESHOLD 1 35#define CMCI_THRESHOLD 1
36#define CMCI_POLL_INTERVAL (30 * HZ)
37#define CMCI_STORM_INTERVAL (1 * HZ)
38#define CMCI_STORM_THRESHOLD 15
39
40static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
41static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
42static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
43
44enum {
45 CMCI_STORM_NONE,
46 CMCI_STORM_ACTIVE,
47 CMCI_STORM_SUBSIDED,
48};
49
50static atomic_t cmci_storm_on_cpus;
34 51
35static int cmci_supported(int *banks) 52static int cmci_supported(int *banks)
36{ 53{
@@ -53,6 +70,93 @@ static int cmci_supported(int *banks)
53 return !!(cap & MCG_CMCI_P); 70 return !!(cap & MCG_CMCI_P);
54} 71}
55 72
73void mce_intel_cmci_poll(void)
74{
75 if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
76 return;
77 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
78}
79
80void mce_intel_hcpu_update(unsigned long cpu)
81{
82 if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
83 atomic_dec(&cmci_storm_on_cpus);
84
85 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
86}
87
88unsigned long mce_intel_adjust_timer(unsigned long interval)
89{
90 int r;
91
92 if (interval < CMCI_POLL_INTERVAL)
93 return interval;
94
95 switch (__this_cpu_read(cmci_storm_state)) {
96 case CMCI_STORM_ACTIVE:
97 /*
98 * We switch back to interrupt mode once the poll timer has
99 * silenced itself. That means no events recorded and the
100 * timer interval is back to our poll interval.
101 */
102 __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
103 r = atomic_sub_return(1, &cmci_storm_on_cpus);
104 if (r == 0)
105 pr_notice("CMCI storm subsided: switching to interrupt mode\n");
106 /* FALLTHROUGH */
107
108 case CMCI_STORM_SUBSIDED:
109 /*
110 * We wait for all cpus to go back to SUBSIDED
111 * state. When that happens we switch back to
112 * interrupt mode.
113 */
114 if (!atomic_read(&cmci_storm_on_cpus)) {
115 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
116 cmci_reenable();
117 cmci_recheck();
118 }
119 return CMCI_POLL_INTERVAL;
120 default:
121 /*
122 * We have shiny weather. Let the poll do whatever it
123 * thinks.
124 */
125 return interval;
126 }
127}
128
129static bool cmci_storm_detect(void)
130{
131 unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
132 unsigned long ts = __this_cpu_read(cmci_time_stamp);
133 unsigned long now = jiffies;
134 int r;
135
136 if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
137 return true;
138
139 if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
140 cnt++;
141 } else {
142 cnt = 1;
143 __this_cpu_write(cmci_time_stamp, now);
144 }
145 __this_cpu_write(cmci_storm_cnt, cnt);
146
147 if (cnt <= CMCI_STORM_THRESHOLD)
148 return false;
149
150 cmci_clear();
151 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
152 r = atomic_add_return(1, &cmci_storm_on_cpus);
153 mce_timer_kick(CMCI_POLL_INTERVAL);
154
155 if (r == 1)
156 pr_notice("CMCI storm detected: switching to poll mode\n");
157 return true;
158}
159
56/* 160/*
57 * The interrupt handler. This is called on every event. 161 * The interrupt handler. This is called on every event.
58 * Just call the poller directly to log any events. 162 * Just call the poller directly to log any events.
@@ -61,33 +165,28 @@ static int cmci_supported(int *banks)
61 */ 165 */
62static void intel_threshold_interrupt(void) 166static void intel_threshold_interrupt(void)
63{ 167{
168 if (cmci_storm_detect())
169 return;
64 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); 170 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
65 mce_notify_irq(); 171 mce_notify_irq();
66} 172}
67 173
68static void print_update(char *type, int *hdr, int num)
69{
70 if (*hdr == 0)
71 printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
72 *hdr = 1;
73 printk(KERN_CONT " %s:%d", type, num);
74}
75
76/* 174/*
77 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks 175 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
78 * on this CPU. Use the algorithm recommended in the SDM to discover shared 176 * on this CPU. Use the algorithm recommended in the SDM to discover shared
79 * banks. 177 * banks.
80 */ 178 */
81static void cmci_discover(int banks, int boot) 179static void cmci_discover(int banks)
82{ 180{
83 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); 181 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
84 unsigned long flags; 182 unsigned long flags;
85 int hdr = 0;
86 int i; 183 int i;
184 int bios_wrong_thresh = 0;
87 185
88 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 186 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
89 for (i = 0; i < banks; i++) { 187 for (i = 0; i < banks; i++) {
90 u64 val; 188 u64 val;
189 int bios_zero_thresh = 0;
91 190
92 if (test_bit(i, owned)) 191 if (test_bit(i, owned))
93 continue; 192 continue;
@@ -96,29 +195,52 @@ static void cmci_discover(int banks, int boot)
96 195
97 /* Already owned by someone else? */ 196 /* Already owned by someone else? */
98 if (val & MCI_CTL2_CMCI_EN) { 197 if (val & MCI_CTL2_CMCI_EN) {
99 if (test_and_clear_bit(i, owned) && !boot) 198 clear_bit(i, owned);
100 print_update("SHD", &hdr, i);
101 __clear_bit(i, __get_cpu_var(mce_poll_banks)); 199 __clear_bit(i, __get_cpu_var(mce_poll_banks));
102 continue; 200 continue;
103 } 201 }
104 202
105 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 203 if (!mce_bios_cmci_threshold) {
106 val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; 204 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
205 val |= CMCI_THRESHOLD;
206 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
207 /*
208 * If bios_cmci_threshold boot option was specified
209 * but the threshold is zero, we'll try to initialize
210 * it to 1.
211 */
212 bios_zero_thresh = 1;
213 val |= CMCI_THRESHOLD;
214 }
215
216 val |= MCI_CTL2_CMCI_EN;
107 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 217 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
108 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 218 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
109 219
110 /* Did the enable bit stick? -- the bank supports CMCI */ 220 /* Did the enable bit stick? -- the bank supports CMCI */
111 if (val & MCI_CTL2_CMCI_EN) { 221 if (val & MCI_CTL2_CMCI_EN) {
112 if (!test_and_set_bit(i, owned) && !boot) 222 set_bit(i, owned);
113 print_update("CMCI", &hdr, i);
114 __clear_bit(i, __get_cpu_var(mce_poll_banks)); 223 __clear_bit(i, __get_cpu_var(mce_poll_banks));
224 /*
225 * We are able to set thresholds for some banks that
226 * had a threshold of 0. This means the BIOS has not
227 * set the thresholds properly or does not work with
228 * this boot option. Note down now and report later.
229 */
230 if (mce_bios_cmci_threshold && bios_zero_thresh &&
231 (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
232 bios_wrong_thresh = 1;
115 } else { 233 } else {
116 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); 234 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
117 } 235 }
118 } 236 }
119 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 237 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
120 if (hdr) 238 if (mce_bios_cmci_threshold && bios_wrong_thresh) {
121 printk(KERN_CONT "\n"); 239 pr_info_once(
240 "bios_cmci_threshold: Some banks do not have valid thresholds set\n");
241 pr_info_once(
242 "bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
243 }
122} 244}
123 245
124/* 246/*
@@ -156,7 +278,7 @@ void cmci_clear(void)
156 continue; 278 continue;
157 /* Disable CMCI */ 279 /* Disable CMCI */
158 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 280 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
159 val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); 281 val &= ~MCI_CTL2_CMCI_EN;
160 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 282 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
161 __clear_bit(i, __get_cpu_var(mce_banks_owned)); 283 __clear_bit(i, __get_cpu_var(mce_banks_owned));
162 } 284 }
@@ -186,7 +308,7 @@ void cmci_rediscover(int dying)
186 continue; 308 continue;
187 /* Recheck banks in case CPUs don't all have the same */ 309 /* Recheck banks in case CPUs don't all have the same */
188 if (cmci_supported(&banks)) 310 if (cmci_supported(&banks))
189 cmci_discover(banks, 0); 311 cmci_discover(banks);
190 } 312 }
191 313
192 set_cpus_allowed_ptr(current, old); 314 set_cpus_allowed_ptr(current, old);
@@ -200,7 +322,7 @@ void cmci_reenable(void)
200{ 322{
201 int banks; 323 int banks;
202 if (cmci_supported(&banks)) 324 if (cmci_supported(&banks))
203 cmci_discover(banks, 0); 325 cmci_discover(banks);
204} 326}
205 327
206static void intel_init_cmci(void) 328static void intel_init_cmci(void)
@@ -211,7 +333,7 @@ static void intel_init_cmci(void)
211 return; 333 return;
212 334
213 mce_threshold_vector = intel_threshold_interrupt; 335 mce_threshold_vector = intel_threshold_interrupt;
214 cmci_discover(banks, 1); 336 cmci_discover(banks);
215 /* 337 /*
216 * For CPU #0 this runs with still disabled APIC, but that's 338 * For CPU #0 this runs with still disabled APIC, but that's
217 * ok because only the vector is set up. We still do another 339 * ok because only the vector is set up. We still do another