diff options
-rw-r--r-- | Documentation/x86/x86_64/boot-options.txt | 7 | ||||
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/mce.h | 13 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-inject.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-internal.h | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 94 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 168 |
7 files changed, 244 insertions, 59 deletions
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index c54b4f503e2a..de38429beb71 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt | |||
@@ -50,6 +50,13 @@ Machine check | |||
50 | monarchtimeout: | 50 | monarchtimeout: |
51 | Sets the time in us to wait for other CPUs on machine checks. 0 | 51 | Sets the time in us to wait for other CPUs on machine checks. 0 |
52 | to disable. | 52 | to disable. |
53 | mce=bios_cmci_threshold | ||
54 | Don't overwrite the bios-set CMCI threshold. This boot option | ||
55 | prevents Linux from overwriting the CMCI threshold set by the | ||
56 | bios. Without this option, Linux always sets the CMCI | ||
57 | threshold to 1. Enabling this may make memory predictive failure | ||
58 | analysis less effective if the bios sets thresholds for memory | ||
59 | errors since we will not see details for all errors. | ||
53 | 60 | ||
54 | nomce (for compatibility with i386): same as mce=off | 61 | nomce (for compatibility with i386): same as mce=off |
55 | 62 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 57fecc1db94d..6cd6f24e1223 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -874,6 +874,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS | |||
874 | 874 | ||
875 | config X86_MCE | 875 | config X86_MCE |
876 | bool "Machine Check / overheating reporting" | 876 | bool "Machine Check / overheating reporting" |
877 | default y | ||
877 | ---help--- | 878 | ---help--- |
878 | Machine Check support allows the processor to notify the | 879 | Machine Check support allows the processor to notify the |
879 | kernel if it detects a problem (e.g. overheating, data corruption). | 880 | kernel if it detects a problem (e.g. overheating, data corruption). |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index a3ac52b29cbf..54d73b1f00a0 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -116,19 +116,9 @@ struct mce_log { | |||
116 | /* Software defined banks */ | 116 | /* Software defined banks */ |
117 | #define MCE_EXTENDED_BANK 128 | 117 | #define MCE_EXTENDED_BANK 128 |
118 | #define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 | 118 | #define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 |
119 | 119 | #define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) | |
120 | #define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */ | ||
121 | #define K8_MCE_THRESHOLD_BANK_0 (MCE_THRESHOLD_BASE + 0 * 9) | ||
122 | #define K8_MCE_THRESHOLD_BANK_1 (MCE_THRESHOLD_BASE + 1 * 9) | ||
123 | #define K8_MCE_THRESHOLD_BANK_2 (MCE_THRESHOLD_BASE + 2 * 9) | ||
124 | #define K8_MCE_THRESHOLD_BANK_3 (MCE_THRESHOLD_BASE + 3 * 9) | ||
125 | #define K8_MCE_THRESHOLD_BANK_4 (MCE_THRESHOLD_BASE + 4 * 9) | ||
126 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) | ||
127 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) | ||
128 | |||
129 | 120 | ||
130 | #ifdef __KERNEL__ | 121 | #ifdef __KERNEL__ |
131 | |||
132 | extern void mce_register_decode_chain(struct notifier_block *nb); | 122 | extern void mce_register_decode_chain(struct notifier_block *nb); |
133 | extern void mce_unregister_decode_chain(struct notifier_block *nb); | 123 | extern void mce_unregister_decode_chain(struct notifier_block *nb); |
134 | 124 | ||
@@ -171,6 +161,7 @@ DECLARE_PER_CPU(struct device *, mce_device); | |||
171 | #ifdef CONFIG_X86_MCE_INTEL | 161 | #ifdef CONFIG_X86_MCE_INTEL |
172 | extern int mce_cmci_disabled; | 162 | extern int mce_cmci_disabled; |
173 | extern int mce_ignore_ce; | 163 | extern int mce_ignore_ce; |
164 | extern int mce_bios_cmci_threshold; | ||
174 | void mce_intel_feature_init(struct cpuinfo_x86 *c); | 165 | void mce_intel_feature_init(struct cpuinfo_x86 *c); |
175 | void cmci_clear(void); | 166 | void cmci_clear(void); |
176 | void cmci_reenable(void); | 167 | void cmci_reenable(void); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index fc4beb393577..ddc72f839332 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) | |||
78 | } | 78 | } |
79 | 79 | ||
80 | static cpumask_var_t mce_inject_cpumask; | 80 | static cpumask_var_t mce_inject_cpumask; |
81 | static DEFINE_MUTEX(mce_inject_mutex); | ||
81 | 82 | ||
82 | static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) | 83 | static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) |
83 | { | 84 | { |
@@ -194,7 +195,11 @@ static void raise_mce(struct mce *m) | |||
194 | put_online_cpus(); | 195 | put_online_cpus(); |
195 | } else | 196 | } else |
196 | #endif | 197 | #endif |
198 | { | ||
199 | preempt_disable(); | ||
197 | raise_local(); | 200 | raise_local(); |
201 | preempt_enable(); | ||
202 | } | ||
198 | } | 203 | } |
199 | 204 | ||
200 | /* Error injection interface */ | 205 | /* Error injection interface */ |
@@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, | |||
225 | * so do it a jiffie or two later everywhere. | 230 | * so do it a jiffie or two later everywhere. |
226 | */ | 231 | */ |
227 | schedule_timeout(2); | 232 | schedule_timeout(2); |
233 | |||
234 | mutex_lock(&mce_inject_mutex); | ||
228 | raise_mce(&m); | 235 | raise_mce(&m); |
236 | mutex_unlock(&mce_inject_mutex); | ||
229 | return usize; | 237 | return usize; |
230 | } | 238 | } |
231 | 239 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index ed44c8a65858..6a05c1d327a9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -28,6 +28,18 @@ extern int mce_ser; | |||
28 | 28 | ||
29 | extern struct mce_bank *mce_banks; | 29 | extern struct mce_bank *mce_banks; |
30 | 30 | ||
31 | #ifdef CONFIG_X86_MCE_INTEL | ||
32 | unsigned long mce_intel_adjust_timer(unsigned long interval); | ||
33 | void mce_intel_cmci_poll(void); | ||
34 | void mce_intel_hcpu_update(unsigned long cpu); | ||
35 | #else | ||
36 | # define mce_intel_adjust_timer mce_adjust_timer_default | ||
37 | static inline void mce_intel_cmci_poll(void) { } | ||
38 | static inline void mce_intel_hcpu_update(unsigned long cpu) { } | ||
39 | #endif | ||
40 | |||
41 | void mce_timer_kick(unsigned long interval); | ||
42 | |||
31 | #ifdef CONFIG_ACPI_APEI | 43 | #ifdef CONFIG_ACPI_APEI |
32 | int apei_write_mce(struct mce *m); | 44 | int apei_write_mce(struct mce *m); |
33 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); | 45 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 292d0258311c..29e87d3b2843 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly; | |||
83 | int mce_cmci_disabled __read_mostly; | 83 | int mce_cmci_disabled __read_mostly; |
84 | int mce_ignore_ce __read_mostly; | 84 | int mce_ignore_ce __read_mostly; |
85 | int mce_ser __read_mostly; | 85 | int mce_ser __read_mostly; |
86 | int mce_bios_cmci_threshold __read_mostly; | ||
86 | 87 | ||
87 | struct mce_bank *mce_banks __read_mostly; | 88 | struct mce_bank *mce_banks __read_mostly; |
88 | 89 | ||
@@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */ | |||
1266 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ | 1267 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ |
1267 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1268 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
1268 | 1269 | ||
1270 | static unsigned long mce_adjust_timer_default(unsigned long interval) | ||
1271 | { | ||
1272 | return interval; | ||
1273 | } | ||
1274 | |||
1275 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = | ||
1276 | mce_adjust_timer_default; | ||
1277 | |||
1269 | static void mce_timer_fn(unsigned long data) | 1278 | static void mce_timer_fn(unsigned long data) |
1270 | { | 1279 | { |
1271 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1280 | struct timer_list *t = &__get_cpu_var(mce_timer); |
@@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data) | |||
1276 | if (mce_available(__this_cpu_ptr(&cpu_info))) { | 1285 | if (mce_available(__this_cpu_ptr(&cpu_info))) { |
1277 | machine_check_poll(MCP_TIMESTAMP, | 1286 | machine_check_poll(MCP_TIMESTAMP, |
1278 | &__get_cpu_var(mce_poll_banks)); | 1287 | &__get_cpu_var(mce_poll_banks)); |
1288 | mce_intel_cmci_poll(); | ||
1279 | } | 1289 | } |
1280 | 1290 | ||
1281 | /* | 1291 | /* |
@@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data) | |||
1283 | * polling interval, otherwise increase the polling interval. | 1293 | * polling interval, otherwise increase the polling interval. |
1284 | */ | 1294 | */ |
1285 | iv = __this_cpu_read(mce_next_interval); | 1295 | iv = __this_cpu_read(mce_next_interval); |
1286 | if (mce_notify_irq()) | 1296 | if (mce_notify_irq()) { |
1287 | iv = max(iv / 2, (unsigned long) HZ/100); | 1297 | iv = max(iv / 2, (unsigned long) HZ/100); |
1288 | else | 1298 | } else { |
1289 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); | 1299 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); |
1300 | iv = mce_adjust_timer(iv); | ||
1301 | } | ||
1290 | __this_cpu_write(mce_next_interval, iv); | 1302 | __this_cpu_write(mce_next_interval, iv); |
1303 | /* Might have become 0 after CMCI storm subsided */ | ||
1304 | if (iv) { | ||
1305 | t->expires = jiffies + iv; | ||
1306 | add_timer_on(t, smp_processor_id()); | ||
1307 | } | ||
1308 | } | ||
1291 | 1309 | ||
1292 | t->expires = jiffies + iv; | 1310 | /* |
1293 | add_timer_on(t, smp_processor_id()); | 1311 | * Ensure that the timer is firing in @interval from now. |
1312 | */ | ||
1313 | void mce_timer_kick(unsigned long interval) | ||
1314 | { | ||
1315 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
1316 | unsigned long when = jiffies + interval; | ||
1317 | unsigned long iv = __this_cpu_read(mce_next_interval); | ||
1318 | |||
1319 | if (timer_pending(t)) { | ||
1320 | if (time_before(when, t->expires)) | ||
1321 | mod_timer_pinned(t, when); | ||
1322 | } else { | ||
1323 | t->expires = round_jiffies(when); | ||
1324 | add_timer_on(t, smp_processor_id()); | ||
1325 | } | ||
1326 | if (interval < iv) | ||
1327 | __this_cpu_write(mce_next_interval, interval); | ||
1294 | } | 1328 | } |
1295 | 1329 | ||
1296 | /* Must not be called in IRQ context where del_timer_sync() can deadlock */ | 1330 | /* Must not be called in IRQ context where del_timer_sync() can deadlock */ |
@@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
1585 | switch (c->x86_vendor) { | 1619 | switch (c->x86_vendor) { |
1586 | case X86_VENDOR_INTEL: | 1620 | case X86_VENDOR_INTEL: |
1587 | mce_intel_feature_init(c); | 1621 | mce_intel_feature_init(c); |
1622 | mce_adjust_timer = mce_intel_adjust_timer; | ||
1588 | break; | 1623 | break; |
1589 | case X86_VENDOR_AMD: | 1624 | case X86_VENDOR_AMD: |
1590 | mce_amd_feature_init(c); | 1625 | mce_amd_feature_init(c); |
@@ -1594,23 +1629,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
1594 | } | 1629 | } |
1595 | } | 1630 | } |
1596 | 1631 | ||
1597 | static void __mcheck_cpu_init_timer(void) | 1632 | static void mce_start_timer(unsigned int cpu, struct timer_list *t) |
1598 | { | 1633 | { |
1599 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1634 | unsigned long iv = mce_adjust_timer(check_interval * HZ); |
1600 | unsigned long iv = check_interval * HZ; | ||
1601 | 1635 | ||
1602 | setup_timer(t, mce_timer_fn, smp_processor_id()); | 1636 | __this_cpu_write(mce_next_interval, iv); |
1603 | 1637 | ||
1604 | if (mce_ignore_ce) | 1638 | if (mce_ignore_ce || !iv) |
1605 | return; | 1639 | return; |
1606 | 1640 | ||
1607 | __this_cpu_write(mce_next_interval, iv); | ||
1608 | if (!iv) | ||
1609 | return; | ||
1610 | t->expires = round_jiffies(jiffies + iv); | 1641 | t->expires = round_jiffies(jiffies + iv); |
1611 | add_timer_on(t, smp_processor_id()); | 1642 | add_timer_on(t, smp_processor_id()); |
1612 | } | 1643 | } |
1613 | 1644 | ||
1645 | static void __mcheck_cpu_init_timer(void) | ||
1646 | { | ||
1647 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
1648 | unsigned int cpu = smp_processor_id(); | ||
1649 | |||
1650 | setup_timer(t, mce_timer_fn, cpu); | ||
1651 | mce_start_timer(cpu, t); | ||
1652 | } | ||
1653 | |||
1614 | /* Handle unconfigured int18 (should never happen) */ | 1654 | /* Handle unconfigured int18 (should never happen) */ |
1615 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | 1655 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) |
1616 | { | 1656 | { |
@@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = { | |||
1907 | * check, or 0 to not wait | 1947 | * check, or 0 to not wait |
1908 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. | 1948 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. |
1909 | * mce=nobootlog Don't log MCEs from before booting. | 1949 | * mce=nobootlog Don't log MCEs from before booting. |
1950 | * mce=bios_cmci_threshold Don't program the CMCI threshold | ||
1910 | */ | 1951 | */ |
1911 | static int __init mcheck_enable(char *str) | 1952 | static int __init mcheck_enable(char *str) |
1912 | { | 1953 | { |
@@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str) | |||
1926 | mce_ignore_ce = 1; | 1967 | mce_ignore_ce = 1; |
1927 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) | 1968 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) |
1928 | mce_bootlog = (str[0] == 'b'); | 1969 | mce_bootlog = (str[0] == 'b'); |
1970 | else if (!strcmp(str, "bios_cmci_threshold")) | ||
1971 | mce_bios_cmci_threshold = 1; | ||
1929 | else if (isdigit(str[0])) { | 1972 | else if (isdigit(str[0])) { |
1930 | get_option(&str, &tolerant); | 1973 | get_option(&str, &tolerant); |
1931 | if (*str == ',') { | 1974 | if (*str == ',') { |
@@ -2166,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = { | |||
2166 | &mce_cmci_disabled | 2209 | &mce_cmci_disabled |
2167 | }; | 2210 | }; |
2168 | 2211 | ||
2212 | static struct dev_ext_attribute dev_attr_bios_cmci_threshold = { | ||
2213 | __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL), | ||
2214 | &mce_bios_cmci_threshold | ||
2215 | }; | ||
2216 | |||
2169 | static struct device_attribute *mce_device_attrs[] = { | 2217 | static struct device_attribute *mce_device_attrs[] = { |
2170 | &dev_attr_tolerant.attr, | 2218 | &dev_attr_tolerant.attr, |
2171 | &dev_attr_check_interval.attr, | 2219 | &dev_attr_check_interval.attr, |
@@ -2174,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = { | |||
2174 | &dev_attr_dont_log_ce.attr, | 2222 | &dev_attr_dont_log_ce.attr, |
2175 | &dev_attr_ignore_ce.attr, | 2223 | &dev_attr_ignore_ce.attr, |
2176 | &dev_attr_cmci_disabled.attr, | 2224 | &dev_attr_cmci_disabled.attr, |
2225 | &dev_attr_bios_cmci_threshold.attr, | ||
2177 | NULL | 2226 | NULL |
2178 | }; | 2227 | }; |
2179 | 2228 | ||
@@ -2294,38 +2343,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2294 | unsigned int cpu = (unsigned long)hcpu; | 2343 | unsigned int cpu = (unsigned long)hcpu; |
2295 | struct timer_list *t = &per_cpu(mce_timer, cpu); | 2344 | struct timer_list *t = &per_cpu(mce_timer, cpu); |
2296 | 2345 | ||
2297 | switch (action) { | 2346 | switch (action & ~CPU_TASKS_FROZEN) { |
2298 | case CPU_ONLINE: | 2347 | case CPU_ONLINE: |
2299 | case CPU_ONLINE_FROZEN: | ||
2300 | mce_device_create(cpu); | 2348 | mce_device_create(cpu); |
2301 | if (threshold_cpu_callback) | 2349 | if (threshold_cpu_callback) |
2302 | threshold_cpu_callback(action, cpu); | 2350 | threshold_cpu_callback(action, cpu); |
2303 | break; | 2351 | break; |
2304 | case CPU_DEAD: | 2352 | case CPU_DEAD: |
2305 | case CPU_DEAD_FROZEN: | ||
2306 | if (threshold_cpu_callback) | 2353 | if (threshold_cpu_callback) |
2307 | threshold_cpu_callback(action, cpu); | 2354 | threshold_cpu_callback(action, cpu); |
2308 | mce_device_remove(cpu); | 2355 | mce_device_remove(cpu); |
2356 | mce_intel_hcpu_update(cpu); | ||
2309 | break; | 2357 | break; |
2310 | case CPU_DOWN_PREPARE: | 2358 | case CPU_DOWN_PREPARE: |
2311 | case CPU_DOWN_PREPARE_FROZEN: | ||
2312 | del_timer_sync(t); | ||
2313 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | 2359 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); |
2360 | del_timer_sync(t); | ||
2314 | break; | 2361 | break; |
2315 | case CPU_DOWN_FAILED: | 2362 | case CPU_DOWN_FAILED: |
2316 | case CPU_DOWN_FAILED_FROZEN: | ||
2317 | if (!mce_ignore_ce && check_interval) { | ||
2318 | t->expires = round_jiffies(jiffies + | ||
2319 | per_cpu(mce_next_interval, cpu)); | ||
2320 | add_timer_on(t, cpu); | ||
2321 | } | ||
2322 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | 2363 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); |
2364 | mce_start_timer(cpu, t); | ||
2323 | break; | 2365 | break; |
2324 | case CPU_POST_DEAD: | 2366 | } |
2367 | |||
2368 | if (action == CPU_POST_DEAD) { | ||
2325 | /* intentionally ignoring frozen here */ | 2369 | /* intentionally ignoring frozen here */ |
2326 | cmci_rediscover(cpu); | 2370 | cmci_rediscover(cpu); |
2327 | break; | ||
2328 | } | 2371 | } |
2372 | |||
2329 | return NOTIFY_OK; | 2373 | return NOTIFY_OK; |
2330 | } | 2374 | } |
2331 | 2375 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 38e49bc95ffc..5f88abf07e9c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -15,6 +15,8 @@ | |||
15 | #include <asm/msr.h> | 15 | #include <asm/msr.h> |
16 | #include <asm/mce.h> | 16 | #include <asm/mce.h> |
17 | 17 | ||
18 | #include "mce-internal.h" | ||
19 | |||
18 | /* | 20 | /* |
19 | * Support for Intel Correct Machine Check Interrupts. This allows | 21 | * Support for Intel Correct Machine Check Interrupts. This allows |
20 | * the CPU to raise an interrupt when a corrected machine check happened. | 22 | * the CPU to raise an interrupt when a corrected machine check happened. |
@@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); | |||
30 | */ | 32 | */ |
31 | static DEFINE_RAW_SPINLOCK(cmci_discover_lock); | 33 | static DEFINE_RAW_SPINLOCK(cmci_discover_lock); |
32 | 34 | ||
33 | #define CMCI_THRESHOLD 1 | 35 | #define CMCI_THRESHOLD 1 |
36 | #define CMCI_POLL_INTERVAL (30 * HZ) | ||
37 | #define CMCI_STORM_INTERVAL (1 * HZ) | ||
38 | #define CMCI_STORM_THRESHOLD 15 | ||
39 | |||
40 | static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); | ||
41 | static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); | ||
42 | static DEFINE_PER_CPU(unsigned int, cmci_storm_state); | ||
43 | |||
44 | enum { | ||
45 | CMCI_STORM_NONE, | ||
46 | CMCI_STORM_ACTIVE, | ||
47 | CMCI_STORM_SUBSIDED, | ||
48 | }; | ||
49 | |||
50 | static atomic_t cmci_storm_on_cpus; | ||
34 | 51 | ||
35 | static int cmci_supported(int *banks) | 52 | static int cmci_supported(int *banks) |
36 | { | 53 | { |
@@ -53,6 +70,93 @@ static int cmci_supported(int *banks) | |||
53 | return !!(cap & MCG_CMCI_P); | 70 | return !!(cap & MCG_CMCI_P); |
54 | } | 71 | } |
55 | 72 | ||
73 | void mce_intel_cmci_poll(void) | ||
74 | { | ||
75 | if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) | ||
76 | return; | ||
77 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
78 | } | ||
79 | |||
80 | void mce_intel_hcpu_update(unsigned long cpu) | ||
81 | { | ||
82 | if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) | ||
83 | atomic_dec(&cmci_storm_on_cpus); | ||
84 | |||
85 | per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; | ||
86 | } | ||
87 | |||
88 | unsigned long mce_intel_adjust_timer(unsigned long interval) | ||
89 | { | ||
90 | int r; | ||
91 | |||
92 | if (interval < CMCI_POLL_INTERVAL) | ||
93 | return interval; | ||
94 | |||
95 | switch (__this_cpu_read(cmci_storm_state)) { | ||
96 | case CMCI_STORM_ACTIVE: | ||
97 | /* | ||
98 | * We switch back to interrupt mode once the poll timer has | ||
99 | * silenced itself. That means no events recorded and the | ||
100 | * timer interval is back to our poll interval. | ||
101 | */ | ||
102 | __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); | ||
103 | r = atomic_sub_return(1, &cmci_storm_on_cpus); | ||
104 | if (r == 0) | ||
105 | pr_notice("CMCI storm subsided: switching to interrupt mode\n"); | ||
106 | /* FALLTHROUGH */ | ||
107 | |||
108 | case CMCI_STORM_SUBSIDED: | ||
109 | /* | ||
110 | * We wait for all cpus to go back to SUBSIDED | ||
111 | * state. When that happens we switch back to | ||
112 | * interrupt mode. | ||
113 | */ | ||
114 | if (!atomic_read(&cmci_storm_on_cpus)) { | ||
115 | __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); | ||
116 | cmci_reenable(); | ||
117 | cmci_recheck(); | ||
118 | } | ||
119 | return CMCI_POLL_INTERVAL; | ||
120 | default: | ||
121 | /* | ||
122 | * We have shiny weather. Let the poll do whatever it | ||
123 | * thinks. | ||
124 | */ | ||
125 | return interval; | ||
126 | } | ||
127 | } | ||
128 | |||
129 | static bool cmci_storm_detect(void) | ||
130 | { | ||
131 | unsigned int cnt = __this_cpu_read(cmci_storm_cnt); | ||
132 | unsigned long ts = __this_cpu_read(cmci_time_stamp); | ||
133 | unsigned long now = jiffies; | ||
134 | int r; | ||
135 | |||
136 | if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) | ||
137 | return true; | ||
138 | |||
139 | if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { | ||
140 | cnt++; | ||
141 | } else { | ||
142 | cnt = 1; | ||
143 | __this_cpu_write(cmci_time_stamp, now); | ||
144 | } | ||
145 | __this_cpu_write(cmci_storm_cnt, cnt); | ||
146 | |||
147 | if (cnt <= CMCI_STORM_THRESHOLD) | ||
148 | return false; | ||
149 | |||
150 | cmci_clear(); | ||
151 | __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); | ||
152 | r = atomic_add_return(1, &cmci_storm_on_cpus); | ||
153 | mce_timer_kick(CMCI_POLL_INTERVAL); | ||
154 | |||
155 | if (r == 1) | ||
156 | pr_notice("CMCI storm detected: switching to poll mode\n"); | ||
157 | return true; | ||
158 | } | ||
159 | |||
56 | /* | 160 | /* |
57 | * The interrupt handler. This is called on every event. | 161 | * The interrupt handler. This is called on every event. |
58 | * Just call the poller directly to log any events. | 162 | * Just call the poller directly to log any events. |
@@ -61,33 +165,28 @@ static int cmci_supported(int *banks) | |||
61 | */ | 165 | */ |
62 | static void intel_threshold_interrupt(void) | 166 | static void intel_threshold_interrupt(void) |
63 | { | 167 | { |
168 | if (cmci_storm_detect()) | ||
169 | return; | ||
64 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | 170 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); |
65 | mce_notify_irq(); | 171 | mce_notify_irq(); |
66 | } | 172 | } |
67 | 173 | ||
68 | static void print_update(char *type, int *hdr, int num) | ||
69 | { | ||
70 | if (*hdr == 0) | ||
71 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); | ||
72 | *hdr = 1; | ||
73 | printk(KERN_CONT " %s:%d", type, num); | ||
74 | } | ||
75 | |||
76 | /* | 174 | /* |
77 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks | 175 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks |
78 | * on this CPU. Use the algorithm recommended in the SDM to discover shared | 176 | * on this CPU. Use the algorithm recommended in the SDM to discover shared |
79 | * banks. | 177 | * banks. |
80 | */ | 178 | */ |
81 | static void cmci_discover(int banks, int boot) | 179 | static void cmci_discover(int banks) |
82 | { | 180 | { |
83 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | 181 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); |
84 | unsigned long flags; | 182 | unsigned long flags; |
85 | int hdr = 0; | ||
86 | int i; | 183 | int i; |
184 | int bios_wrong_thresh = 0; | ||
87 | 185 | ||
88 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); | 186 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); |
89 | for (i = 0; i < banks; i++) { | 187 | for (i = 0; i < banks; i++) { |
90 | u64 val; | 188 | u64 val; |
189 | int bios_zero_thresh = 0; | ||
91 | 190 | ||
92 | if (test_bit(i, owned)) | 191 | if (test_bit(i, owned)) |
93 | continue; | 192 | continue; |
@@ -96,29 +195,52 @@ static void cmci_discover(int banks, int boot) | |||
96 | 195 | ||
97 | /* Already owned by someone else? */ | 196 | /* Already owned by someone else? */ |
98 | if (val & MCI_CTL2_CMCI_EN) { | 197 | if (val & MCI_CTL2_CMCI_EN) { |
99 | if (test_and_clear_bit(i, owned) && !boot) | 198 | clear_bit(i, owned); |
100 | print_update("SHD", &hdr, i); | ||
101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 199 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
102 | continue; | 200 | continue; |
103 | } | 201 | } |
104 | 202 | ||
105 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; | 203 | if (!mce_bios_cmci_threshold) { |
106 | val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; | 204 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
205 | val |= CMCI_THRESHOLD; | ||
206 | } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { | ||
207 | /* | ||
208 | * If bios_cmci_threshold boot option was specified | ||
209 | * but the threshold is zero, we'll try to initialize | ||
210 | * it to 1. | ||
211 | */ | ||
212 | bios_zero_thresh = 1; | ||
213 | val |= CMCI_THRESHOLD; | ||
214 | } | ||
215 | |||
216 | val |= MCI_CTL2_CMCI_EN; | ||
107 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 217 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
108 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 218 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
109 | 219 | ||
110 | /* Did the enable bit stick? -- the bank supports CMCI */ | 220 | /* Did the enable bit stick? -- the bank supports CMCI */ |
111 | if (val & MCI_CTL2_CMCI_EN) { | 221 | if (val & MCI_CTL2_CMCI_EN) { |
112 | if (!test_and_set_bit(i, owned) && !boot) | 222 | set_bit(i, owned); |
113 | print_update("CMCI", &hdr, i); | ||
114 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 223 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
224 | /* | ||
225 | * We are able to set thresholds for some banks that | ||
226 | * had a threshold of 0. This means the BIOS has not | ||
227 | * set the thresholds properly or does not work with | ||
228 | * this boot option. Note down now and report later. | ||
229 | */ | ||
230 | if (mce_bios_cmci_threshold && bios_zero_thresh && | ||
231 | (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) | ||
232 | bios_wrong_thresh = 1; | ||
115 | } else { | 233 | } else { |
116 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | 234 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); |
117 | } | 235 | } |
118 | } | 236 | } |
119 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | 237 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
120 | if (hdr) | 238 | if (mce_bios_cmci_threshold && bios_wrong_thresh) { |
121 | printk(KERN_CONT "\n"); | 239 | pr_info_once( |
240 | "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); | ||
241 | pr_info_once( | ||
242 | "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); | ||
243 | } | ||
122 | } | 244 | } |
123 | 245 | ||
124 | /* | 246 | /* |
@@ -156,7 +278,7 @@ void cmci_clear(void) | |||
156 | continue; | 278 | continue; |
157 | /* Disable CMCI */ | 279 | /* Disable CMCI */ |
158 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 280 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
159 | val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); | 281 | val &= ~MCI_CTL2_CMCI_EN; |
160 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 282 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
161 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 283 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
162 | } | 284 | } |
@@ -186,7 +308,7 @@ void cmci_rediscover(int dying) | |||
186 | continue; | 308 | continue; |
187 | /* Recheck banks in case CPUs don't all have the same */ | 309 | /* Recheck banks in case CPUs don't all have the same */ |
188 | if (cmci_supported(&banks)) | 310 | if (cmci_supported(&banks)) |
189 | cmci_discover(banks, 0); | 311 | cmci_discover(banks); |
190 | } | 312 | } |
191 | 313 | ||
192 | set_cpus_allowed_ptr(current, old); | 314 | set_cpus_allowed_ptr(current, old); |
@@ -200,7 +322,7 @@ void cmci_reenable(void) | |||
200 | { | 322 | { |
201 | int banks; | 323 | int banks; |
202 | if (cmci_supported(&banks)) | 324 | if (cmci_supported(&banks)) |
203 | cmci_discover(banks, 0); | 325 | cmci_discover(banks); |
204 | } | 326 | } |
205 | 327 | ||
206 | static void intel_init_cmci(void) | 328 | static void intel_init_cmci(void) |
@@ -211,7 +333,7 @@ static void intel_init_cmci(void) | |||
211 | return; | 333 | return; |
212 | 334 | ||
213 | mce_threshold_vector = intel_threshold_interrupt; | 335 | mce_threshold_vector = intel_threshold_interrupt; |
214 | cmci_discover(banks, 1); | 336 | cmci_discover(banks); |
215 | /* | 337 | /* |
216 | * For CPU #0 this runs with still disabled APIC, but that's | 338 | * For CPU #0 this runs with still disabled APIC, but that's |
217 | * ok because only the vector is set up. We still do another | 339 | * ok because only the vector is set up. We still do another |