diff options
| author | H. Peter Anvin <hpa@linux.intel.com> | 2012-10-19 10:54:24 -0400 |
|---|---|---|
| committer | H. Peter Anvin <hpa@linux.intel.com> | 2012-10-19 10:55:09 -0400 |
| commit | 4533d86270d7986e00594495dde9a109d6be27ae (patch) | |
| tree | c2473cac653f7b98e5bd5e6475e63734be4b7644 /arch/x86/kernel/cpu/mcheck | |
| parent | 21c5e50e15b1abd797e62f18fd7f90b9cc004cbd (diff) | |
| parent | 5bc66170dc486556a1e36fd384463536573f4b82 (diff) | |
Merge commit '5bc66170dc486556a1e36fd384463536573f4b82' into x86/urgent
From Borislav Petkov <bp@amd64.org>:
Below is a RAS fix which reverts the addition of a sysfs attribute
which we agreed is not needed, post-factum. And this should go in now
because that sysfs attribute is going to end up in 3.7 otherwise and
thus exposed to userspace; removing it then would be a lot harder.
This is done as a merge rather than a simple patch/cherry-pick since
the baseline for this patch was not in the previous x86/urgent.
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-inject.c | 8 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-internal.h | 12 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 88 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 168 |
4 files changed, 228 insertions, 48 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index fc4beb39357..ddc72f83933 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
| @@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) | |||
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | static cpumask_var_t mce_inject_cpumask; | 80 | static cpumask_var_t mce_inject_cpumask; |
| 81 | static DEFINE_MUTEX(mce_inject_mutex); | ||
| 81 | 82 | ||
| 82 | static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) | 83 | static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) |
| 83 | { | 84 | { |
| @@ -194,7 +195,11 @@ static void raise_mce(struct mce *m) | |||
| 194 | put_online_cpus(); | 195 | put_online_cpus(); |
| 195 | } else | 196 | } else |
| 196 | #endif | 197 | #endif |
| 198 | { | ||
| 199 | preempt_disable(); | ||
| 197 | raise_local(); | 200 | raise_local(); |
| 201 | preempt_enable(); | ||
| 202 | } | ||
| 198 | } | 203 | } |
| 199 | 204 | ||
| 200 | /* Error injection interface */ | 205 | /* Error injection interface */ |
| @@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, | |||
| 225 | * so do it a jiffie or two later everywhere. | 230 | * so do it a jiffie or two later everywhere. |
| 226 | */ | 231 | */ |
| 227 | schedule_timeout(2); | 232 | schedule_timeout(2); |
| 233 | |||
| 234 | mutex_lock(&mce_inject_mutex); | ||
| 228 | raise_mce(&m); | 235 | raise_mce(&m); |
| 236 | mutex_unlock(&mce_inject_mutex); | ||
| 229 | return usize; | 237 | return usize; |
| 230 | } | 238 | } |
| 231 | 239 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index ed44c8a6585..6a05c1d327a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
| @@ -28,6 +28,18 @@ extern int mce_ser; | |||
| 28 | 28 | ||
| 29 | extern struct mce_bank *mce_banks; | 29 | extern struct mce_bank *mce_banks; |
| 30 | 30 | ||
| 31 | #ifdef CONFIG_X86_MCE_INTEL | ||
| 32 | unsigned long mce_intel_adjust_timer(unsigned long interval); | ||
| 33 | void mce_intel_cmci_poll(void); | ||
| 34 | void mce_intel_hcpu_update(unsigned long cpu); | ||
| 35 | #else | ||
| 36 | # define mce_intel_adjust_timer mce_adjust_timer_default | ||
| 37 | static inline void mce_intel_cmci_poll(void) { } | ||
| 38 | static inline void mce_intel_hcpu_update(unsigned long cpu) { } | ||
| 39 | #endif | ||
| 40 | |||
| 41 | void mce_timer_kick(unsigned long interval); | ||
| 42 | |||
| 31 | #ifdef CONFIG_ACPI_APEI | 43 | #ifdef CONFIG_ACPI_APEI |
| 32 | int apei_write_mce(struct mce *m); | 44 | int apei_write_mce(struct mce *m); |
| 33 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); | 45 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 292d0258311..46cbf868969 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
| @@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly; | |||
| 83 | int mce_cmci_disabled __read_mostly; | 83 | int mce_cmci_disabled __read_mostly; |
| 84 | int mce_ignore_ce __read_mostly; | 84 | int mce_ignore_ce __read_mostly; |
| 85 | int mce_ser __read_mostly; | 85 | int mce_ser __read_mostly; |
| 86 | int mce_bios_cmci_threshold __read_mostly; | ||
| 86 | 87 | ||
| 87 | struct mce_bank *mce_banks __read_mostly; | 88 | struct mce_bank *mce_banks __read_mostly; |
| 88 | 89 | ||
| @@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */ | |||
| 1266 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ | 1267 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ |
| 1267 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1268 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
| 1268 | 1269 | ||
| 1270 | static unsigned long mce_adjust_timer_default(unsigned long interval) | ||
| 1271 | { | ||
| 1272 | return interval; | ||
| 1273 | } | ||
| 1274 | |||
| 1275 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = | ||
| 1276 | mce_adjust_timer_default; | ||
| 1277 | |||
| 1269 | static void mce_timer_fn(unsigned long data) | 1278 | static void mce_timer_fn(unsigned long data) |
| 1270 | { | 1279 | { |
| 1271 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1280 | struct timer_list *t = &__get_cpu_var(mce_timer); |
| @@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data) | |||
| 1276 | if (mce_available(__this_cpu_ptr(&cpu_info))) { | 1285 | if (mce_available(__this_cpu_ptr(&cpu_info))) { |
| 1277 | machine_check_poll(MCP_TIMESTAMP, | 1286 | machine_check_poll(MCP_TIMESTAMP, |
| 1278 | &__get_cpu_var(mce_poll_banks)); | 1287 | &__get_cpu_var(mce_poll_banks)); |
| 1288 | mce_intel_cmci_poll(); | ||
| 1279 | } | 1289 | } |
| 1280 | 1290 | ||
| 1281 | /* | 1291 | /* |
| @@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data) | |||
| 1283 | * polling interval, otherwise increase the polling interval. | 1293 | * polling interval, otherwise increase the polling interval. |
| 1284 | */ | 1294 | */ |
| 1285 | iv = __this_cpu_read(mce_next_interval); | 1295 | iv = __this_cpu_read(mce_next_interval); |
| 1286 | if (mce_notify_irq()) | 1296 | if (mce_notify_irq()) { |
| 1287 | iv = max(iv / 2, (unsigned long) HZ/100); | 1297 | iv = max(iv / 2, (unsigned long) HZ/100); |
| 1288 | else | 1298 | } else { |
| 1289 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); | 1299 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); |
| 1300 | iv = mce_adjust_timer(iv); | ||
| 1301 | } | ||
| 1290 | __this_cpu_write(mce_next_interval, iv); | 1302 | __this_cpu_write(mce_next_interval, iv); |
| 1303 | /* Might have become 0 after CMCI storm subsided */ | ||
| 1304 | if (iv) { | ||
| 1305 | t->expires = jiffies + iv; | ||
| 1306 | add_timer_on(t, smp_processor_id()); | ||
| 1307 | } | ||
| 1308 | } | ||
| 1291 | 1309 | ||
| 1292 | t->expires = jiffies + iv; | 1310 | /* |
| 1293 | add_timer_on(t, smp_processor_id()); | 1311 | * Ensure that the timer is firing in @interval from now. |
| 1312 | */ | ||
| 1313 | void mce_timer_kick(unsigned long interval) | ||
| 1314 | { | ||
| 1315 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
| 1316 | unsigned long when = jiffies + interval; | ||
| 1317 | unsigned long iv = __this_cpu_read(mce_next_interval); | ||
| 1318 | |||
| 1319 | if (timer_pending(t)) { | ||
| 1320 | if (time_before(when, t->expires)) | ||
| 1321 | mod_timer_pinned(t, when); | ||
| 1322 | } else { | ||
| 1323 | t->expires = round_jiffies(when); | ||
| 1324 | add_timer_on(t, smp_processor_id()); | ||
| 1325 | } | ||
| 1326 | if (interval < iv) | ||
| 1327 | __this_cpu_write(mce_next_interval, interval); | ||
| 1294 | } | 1328 | } |
| 1295 | 1329 | ||
| 1296 | /* Must not be called in IRQ context where del_timer_sync() can deadlock */ | 1330 | /* Must not be called in IRQ context where del_timer_sync() can deadlock */ |
| @@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
| 1585 | switch (c->x86_vendor) { | 1619 | switch (c->x86_vendor) { |
| 1586 | case X86_VENDOR_INTEL: | 1620 | case X86_VENDOR_INTEL: |
| 1587 | mce_intel_feature_init(c); | 1621 | mce_intel_feature_init(c); |
| 1622 | mce_adjust_timer = mce_intel_adjust_timer; | ||
| 1588 | break; | 1623 | break; |
| 1589 | case X86_VENDOR_AMD: | 1624 | case X86_VENDOR_AMD: |
| 1590 | mce_amd_feature_init(c); | 1625 | mce_amd_feature_init(c); |
| @@ -1594,23 +1629,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
| 1594 | } | 1629 | } |
| 1595 | } | 1630 | } |
| 1596 | 1631 | ||
| 1597 | static void __mcheck_cpu_init_timer(void) | 1632 | static void mce_start_timer(unsigned int cpu, struct timer_list *t) |
| 1598 | { | 1633 | { |
| 1599 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1634 | unsigned long iv = mce_adjust_timer(check_interval * HZ); |
| 1600 | unsigned long iv = check_interval * HZ; | ||
| 1601 | 1635 | ||
| 1602 | setup_timer(t, mce_timer_fn, smp_processor_id()); | 1636 | __this_cpu_write(mce_next_interval, iv); |
| 1603 | 1637 | ||
| 1604 | if (mce_ignore_ce) | 1638 | if (mce_ignore_ce || !iv) |
| 1605 | return; | 1639 | return; |
| 1606 | 1640 | ||
| 1607 | __this_cpu_write(mce_next_interval, iv); | ||
| 1608 | if (!iv) | ||
| 1609 | return; | ||
| 1610 | t->expires = round_jiffies(jiffies + iv); | 1641 | t->expires = round_jiffies(jiffies + iv); |
| 1611 | add_timer_on(t, smp_processor_id()); | 1642 | add_timer_on(t, smp_processor_id()); |
| 1612 | } | 1643 | } |
| 1613 | 1644 | ||
| 1645 | static void __mcheck_cpu_init_timer(void) | ||
| 1646 | { | ||
| 1647 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
| 1648 | unsigned int cpu = smp_processor_id(); | ||
| 1649 | |||
| 1650 | setup_timer(t, mce_timer_fn, cpu); | ||
| 1651 | mce_start_timer(cpu, t); | ||
| 1652 | } | ||
| 1653 | |||
| 1614 | /* Handle unconfigured int18 (should never happen) */ | 1654 | /* Handle unconfigured int18 (should never happen) */ |
| 1615 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | 1655 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) |
| 1616 | { | 1656 | { |
| @@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = { | |||
| 1907 | * check, or 0 to not wait | 1947 | * check, or 0 to not wait |
| 1908 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. | 1948 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. |
| 1909 | * mce=nobootlog Don't log MCEs from before booting. | 1949 | * mce=nobootlog Don't log MCEs from before booting. |
| 1950 | * mce=bios_cmci_threshold Don't program the CMCI threshold | ||
| 1910 | */ | 1951 | */ |
| 1911 | static int __init mcheck_enable(char *str) | 1952 | static int __init mcheck_enable(char *str) |
| 1912 | { | 1953 | { |
| @@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str) | |||
| 1926 | mce_ignore_ce = 1; | 1967 | mce_ignore_ce = 1; |
| 1927 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) | 1968 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) |
| 1928 | mce_bootlog = (str[0] == 'b'); | 1969 | mce_bootlog = (str[0] == 'b'); |
| 1970 | else if (!strcmp(str, "bios_cmci_threshold")) | ||
| 1971 | mce_bios_cmci_threshold = 1; | ||
| 1929 | else if (isdigit(str[0])) { | 1972 | else if (isdigit(str[0])) { |
| 1930 | get_option(&str, &tolerant); | 1973 | get_option(&str, &tolerant); |
| 1931 | if (*str == ',') { | 1974 | if (*str == ',') { |
| @@ -2294,38 +2337,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 2294 | unsigned int cpu = (unsigned long)hcpu; | 2337 | unsigned int cpu = (unsigned long)hcpu; |
| 2295 | struct timer_list *t = &per_cpu(mce_timer, cpu); | 2338 | struct timer_list *t = &per_cpu(mce_timer, cpu); |
| 2296 | 2339 | ||
| 2297 | switch (action) { | 2340 | switch (action & ~CPU_TASKS_FROZEN) { |
| 2298 | case CPU_ONLINE: | 2341 | case CPU_ONLINE: |
| 2299 | case CPU_ONLINE_FROZEN: | ||
| 2300 | mce_device_create(cpu); | 2342 | mce_device_create(cpu); |
| 2301 | if (threshold_cpu_callback) | 2343 | if (threshold_cpu_callback) |
| 2302 | threshold_cpu_callback(action, cpu); | 2344 | threshold_cpu_callback(action, cpu); |
| 2303 | break; | 2345 | break; |
| 2304 | case CPU_DEAD: | 2346 | case CPU_DEAD: |
| 2305 | case CPU_DEAD_FROZEN: | ||
| 2306 | if (threshold_cpu_callback) | 2347 | if (threshold_cpu_callback) |
| 2307 | threshold_cpu_callback(action, cpu); | 2348 | threshold_cpu_callback(action, cpu); |
| 2308 | mce_device_remove(cpu); | 2349 | mce_device_remove(cpu); |
| 2350 | mce_intel_hcpu_update(cpu); | ||
| 2309 | break; | 2351 | break; |
| 2310 | case CPU_DOWN_PREPARE: | 2352 | case CPU_DOWN_PREPARE: |
| 2311 | case CPU_DOWN_PREPARE_FROZEN: | ||
| 2312 | del_timer_sync(t); | ||
| 2313 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | 2353 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); |
| 2354 | del_timer_sync(t); | ||
| 2314 | break; | 2355 | break; |
| 2315 | case CPU_DOWN_FAILED: | 2356 | case CPU_DOWN_FAILED: |
| 2316 | case CPU_DOWN_FAILED_FROZEN: | ||
| 2317 | if (!mce_ignore_ce && check_interval) { | ||
| 2318 | t->expires = round_jiffies(jiffies + | ||
| 2319 | per_cpu(mce_next_interval, cpu)); | ||
| 2320 | add_timer_on(t, cpu); | ||
| 2321 | } | ||
| 2322 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | 2357 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); |
| 2358 | mce_start_timer(cpu, t); | ||
| 2323 | break; | 2359 | break; |
| 2324 | case CPU_POST_DEAD: | 2360 | } |
| 2361 | |||
| 2362 | if (action == CPU_POST_DEAD) { | ||
| 2325 | /* intentionally ignoring frozen here */ | 2363 | /* intentionally ignoring frozen here */ |
| 2326 | cmci_rediscover(cpu); | 2364 | cmci_rediscover(cpu); |
| 2327 | break; | ||
| 2328 | } | 2365 | } |
| 2366 | |||
| 2329 | return NOTIFY_OK; | 2367 | return NOTIFY_OK; |
| 2330 | } | 2368 | } |
| 2331 | 2369 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 38e49bc95ff..5f88abf07e9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
| @@ -15,6 +15,8 @@ | |||
| 15 | #include <asm/msr.h> | 15 | #include <asm/msr.h> |
| 16 | #include <asm/mce.h> | 16 | #include <asm/mce.h> |
| 17 | 17 | ||
| 18 | #include "mce-internal.h" | ||
| 19 | |||
| 18 | /* | 20 | /* |
| 19 | * Support for Intel Correct Machine Check Interrupts. This allows | 21 | * Support for Intel Correct Machine Check Interrupts. This allows |
| 20 | * the CPU to raise an interrupt when a corrected machine check happened. | 22 | * the CPU to raise an interrupt when a corrected machine check happened. |
| @@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); | |||
| 30 | */ | 32 | */ |
| 31 | static DEFINE_RAW_SPINLOCK(cmci_discover_lock); | 33 | static DEFINE_RAW_SPINLOCK(cmci_discover_lock); |
| 32 | 34 | ||
| 33 | #define CMCI_THRESHOLD 1 | 35 | #define CMCI_THRESHOLD 1 |
| 36 | #define CMCI_POLL_INTERVAL (30 * HZ) | ||
| 37 | #define CMCI_STORM_INTERVAL (1 * HZ) | ||
| 38 | #define CMCI_STORM_THRESHOLD 15 | ||
| 39 | |||
| 40 | static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); | ||
| 41 | static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); | ||
| 42 | static DEFINE_PER_CPU(unsigned int, cmci_storm_state); | ||
| 43 | |||
| 44 | enum { | ||
| 45 | CMCI_STORM_NONE, | ||
| 46 | CMCI_STORM_ACTIVE, | ||
| 47 | CMCI_STORM_SUBSIDED, | ||
| 48 | }; | ||
| 49 | |||
| 50 | static atomic_t cmci_storm_on_cpus; | ||
| 34 | 51 | ||
| 35 | static int cmci_supported(int *banks) | 52 | static int cmci_supported(int *banks) |
| 36 | { | 53 | { |
| @@ -53,6 +70,93 @@ static int cmci_supported(int *banks) | |||
| 53 | return !!(cap & MCG_CMCI_P); | 70 | return !!(cap & MCG_CMCI_P); |
| 54 | } | 71 | } |
| 55 | 72 | ||
| 73 | void mce_intel_cmci_poll(void) | ||
| 74 | { | ||
| 75 | if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) | ||
| 76 | return; | ||
| 77 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
| 78 | } | ||
| 79 | |||
| 80 | void mce_intel_hcpu_update(unsigned long cpu) | ||
| 81 | { | ||
| 82 | if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) | ||
| 83 | atomic_dec(&cmci_storm_on_cpus); | ||
| 84 | |||
| 85 | per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; | ||
| 86 | } | ||
| 87 | |||
| 88 | unsigned long mce_intel_adjust_timer(unsigned long interval) | ||
| 89 | { | ||
| 90 | int r; | ||
| 91 | |||
| 92 | if (interval < CMCI_POLL_INTERVAL) | ||
| 93 | return interval; | ||
| 94 | |||
| 95 | switch (__this_cpu_read(cmci_storm_state)) { | ||
| 96 | case CMCI_STORM_ACTIVE: | ||
| 97 | /* | ||
| 98 | * We switch back to interrupt mode once the poll timer has | ||
| 99 | * silenced itself. That means no events recorded and the | ||
| 100 | * timer interval is back to our poll interval. | ||
| 101 | */ | ||
| 102 | __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); | ||
| 103 | r = atomic_sub_return(1, &cmci_storm_on_cpus); | ||
| 104 | if (r == 0) | ||
| 105 | pr_notice("CMCI storm subsided: switching to interrupt mode\n"); | ||
| 106 | /* FALLTHROUGH */ | ||
| 107 | |||
| 108 | case CMCI_STORM_SUBSIDED: | ||
| 109 | /* | ||
| 110 | * We wait for all cpus to go back to SUBSIDED | ||
| 111 | * state. When that happens we switch back to | ||
| 112 | * interrupt mode. | ||
| 113 | */ | ||
| 114 | if (!atomic_read(&cmci_storm_on_cpus)) { | ||
| 115 | __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); | ||
| 116 | cmci_reenable(); | ||
| 117 | cmci_recheck(); | ||
| 118 | } | ||
| 119 | return CMCI_POLL_INTERVAL; | ||
| 120 | default: | ||
| 121 | /* | ||
| 122 | * We have shiny weather. Let the poll do whatever it | ||
| 123 | * thinks. | ||
| 124 | */ | ||
| 125 | return interval; | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | static bool cmci_storm_detect(void) | ||
| 130 | { | ||
| 131 | unsigned int cnt = __this_cpu_read(cmci_storm_cnt); | ||
| 132 | unsigned long ts = __this_cpu_read(cmci_time_stamp); | ||
| 133 | unsigned long now = jiffies; | ||
| 134 | int r; | ||
| 135 | |||
| 136 | if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) | ||
| 137 | return true; | ||
| 138 | |||
| 139 | if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { | ||
| 140 | cnt++; | ||
| 141 | } else { | ||
| 142 | cnt = 1; | ||
| 143 | __this_cpu_write(cmci_time_stamp, now); | ||
| 144 | } | ||
| 145 | __this_cpu_write(cmci_storm_cnt, cnt); | ||
| 146 | |||
| 147 | if (cnt <= CMCI_STORM_THRESHOLD) | ||
| 148 | return false; | ||
| 149 | |||
| 150 | cmci_clear(); | ||
| 151 | __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); | ||
| 152 | r = atomic_add_return(1, &cmci_storm_on_cpus); | ||
| 153 | mce_timer_kick(CMCI_POLL_INTERVAL); | ||
| 154 | |||
| 155 | if (r == 1) | ||
| 156 | pr_notice("CMCI storm detected: switching to poll mode\n"); | ||
| 157 | return true; | ||
| 158 | } | ||
| 159 | |||
| 56 | /* | 160 | /* |
| 57 | * The interrupt handler. This is called on every event. | 161 | * The interrupt handler. This is called on every event. |
| 58 | * Just call the poller directly to log any events. | 162 | * Just call the poller directly to log any events. |
| @@ -61,33 +165,28 @@ static int cmci_supported(int *banks) | |||
| 61 | */ | 165 | */ |
| 62 | static void intel_threshold_interrupt(void) | 166 | static void intel_threshold_interrupt(void) |
| 63 | { | 167 | { |
| 168 | if (cmci_storm_detect()) | ||
| 169 | return; | ||
| 64 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | 170 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); |
| 65 | mce_notify_irq(); | 171 | mce_notify_irq(); |
| 66 | } | 172 | } |
| 67 | 173 | ||
| 68 | static void print_update(char *type, int *hdr, int num) | ||
| 69 | { | ||
| 70 | if (*hdr == 0) | ||
| 71 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); | ||
| 72 | *hdr = 1; | ||
| 73 | printk(KERN_CONT " %s:%d", type, num); | ||
| 74 | } | ||
| 75 | |||
| 76 | /* | 174 | /* |
| 77 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks | 175 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks |
| 78 | * on this CPU. Use the algorithm recommended in the SDM to discover shared | 176 | * on this CPU. Use the algorithm recommended in the SDM to discover shared |
| 79 | * banks. | 177 | * banks. |
| 80 | */ | 178 | */ |
| 81 | static void cmci_discover(int banks, int boot) | 179 | static void cmci_discover(int banks) |
| 82 | { | 180 | { |
| 83 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | 181 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); |
| 84 | unsigned long flags; | 182 | unsigned long flags; |
| 85 | int hdr = 0; | ||
| 86 | int i; | 183 | int i; |
| 184 | int bios_wrong_thresh = 0; | ||
| 87 | 185 | ||
| 88 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); | 186 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); |
| 89 | for (i = 0; i < banks; i++) { | 187 | for (i = 0; i < banks; i++) { |
| 90 | u64 val; | 188 | u64 val; |
| 189 | int bios_zero_thresh = 0; | ||
| 91 | 190 | ||
| 92 | if (test_bit(i, owned)) | 191 | if (test_bit(i, owned)) |
| 93 | continue; | 192 | continue; |
| @@ -96,29 +195,52 @@ static void cmci_discover(int banks, int boot) | |||
| 96 | 195 | ||
| 97 | /* Already owned by someone else? */ | 196 | /* Already owned by someone else? */ |
| 98 | if (val & MCI_CTL2_CMCI_EN) { | 197 | if (val & MCI_CTL2_CMCI_EN) { |
| 99 | if (test_and_clear_bit(i, owned) && !boot) | 198 | clear_bit(i, owned); |
| 100 | print_update("SHD", &hdr, i); | ||
| 101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 199 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
| 102 | continue; | 200 | continue; |
| 103 | } | 201 | } |
| 104 | 202 | ||
| 105 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; | 203 | if (!mce_bios_cmci_threshold) { |
| 106 | val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; | 204 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
| 205 | val |= CMCI_THRESHOLD; | ||
| 206 | } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { | ||
| 207 | /* | ||
| 208 | * If bios_cmci_threshold boot option was specified | ||
| 209 | * but the threshold is zero, we'll try to initialize | ||
| 210 | * it to 1. | ||
| 211 | */ | ||
| 212 | bios_zero_thresh = 1; | ||
| 213 | val |= CMCI_THRESHOLD; | ||
| 214 | } | ||
| 215 | |||
| 216 | val |= MCI_CTL2_CMCI_EN; | ||
| 107 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 217 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 108 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 218 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 109 | 219 | ||
| 110 | /* Did the enable bit stick? -- the bank supports CMCI */ | 220 | /* Did the enable bit stick? -- the bank supports CMCI */ |
| 111 | if (val & MCI_CTL2_CMCI_EN) { | 221 | if (val & MCI_CTL2_CMCI_EN) { |
| 112 | if (!test_and_set_bit(i, owned) && !boot) | 222 | set_bit(i, owned); |
| 113 | print_update("CMCI", &hdr, i); | ||
| 114 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 223 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
| 224 | /* | ||
| 225 | * We are able to set thresholds for some banks that | ||
| 226 | * had a threshold of 0. This means the BIOS has not | ||
| 227 | * set the thresholds properly or does not work with | ||
| 228 | * this boot option. Note down now and report later. | ||
| 229 | */ | ||
| 230 | if (mce_bios_cmci_threshold && bios_zero_thresh && | ||
| 231 | (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) | ||
| 232 | bios_wrong_thresh = 1; | ||
| 115 | } else { | 233 | } else { |
| 116 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | 234 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); |
| 117 | } | 235 | } |
| 118 | } | 236 | } |
| 119 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | 237 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
| 120 | if (hdr) | 238 | if (mce_bios_cmci_threshold && bios_wrong_thresh) { |
| 121 | printk(KERN_CONT "\n"); | 239 | pr_info_once( |
| 240 | "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); | ||
| 241 | pr_info_once( | ||
| 242 | "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); | ||
| 243 | } | ||
| 122 | } | 244 | } |
| 123 | 245 | ||
| 124 | /* | 246 | /* |
| @@ -156,7 +278,7 @@ void cmci_clear(void) | |||
| 156 | continue; | 278 | continue; |
| 157 | /* Disable CMCI */ | 279 | /* Disable CMCI */ |
| 158 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 280 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 159 | val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); | 281 | val &= ~MCI_CTL2_CMCI_EN; |
| 160 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 282 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 161 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 283 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
| 162 | } | 284 | } |
| @@ -186,7 +308,7 @@ void cmci_rediscover(int dying) | |||
| 186 | continue; | 308 | continue; |
| 187 | /* Recheck banks in case CPUs don't all have the same */ | 309 | /* Recheck banks in case CPUs don't all have the same */ |
| 188 | if (cmci_supported(&banks)) | 310 | if (cmci_supported(&banks)) |
| 189 | cmci_discover(banks, 0); | 311 | cmci_discover(banks); |
| 190 | } | 312 | } |
| 191 | 313 | ||
| 192 | set_cpus_allowed_ptr(current, old); | 314 | set_cpus_allowed_ptr(current, old); |
| @@ -200,7 +322,7 @@ void cmci_reenable(void) | |||
| 200 | { | 322 | { |
| 201 | int banks; | 323 | int banks; |
| 202 | if (cmci_supported(&banks)) | 324 | if (cmci_supported(&banks)) |
| 203 | cmci_discover(banks, 0); | 325 | cmci_discover(banks); |
| 204 | } | 326 | } |
| 205 | 327 | ||
| 206 | static void intel_init_cmci(void) | 328 | static void intel_init_cmci(void) |
| @@ -211,7 +333,7 @@ static void intel_init_cmci(void) | |||
| 211 | return; | 333 | return; |
| 212 | 334 | ||
| 213 | mce_threshold_vector = intel_threshold_interrupt; | 335 | mce_threshold_vector = intel_threshold_interrupt; |
| 214 | cmci_discover(banks, 1); | 336 | cmci_discover(banks); |
| 215 | /* | 337 | /* |
| 216 | * For CPU #0 this runs with still disabled APIC, but that's | 338 | * For CPU #0 this runs with still disabled APIC, but that's |
| 217 | * ok because only the vector is set up. We still do another | 339 | * ok because only the vector is set up. We still do another |
