diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 18:02:37 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 18:02:37 -0500 |
| commit | edf7c8148ec40c0fd27c0ef3f688defcc65e3913 (patch) | |
| tree | dde7448208538c616ad3ba25b41a816fccc015af | |
| parent | 82406da4a6998a0c98db0c5afb1695f97889bf79 (diff) | |
| parent | a228b5892b0527b8574c06edc72cacaf8c25418d (diff) | |
Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86: add IRQ context simulation in module mce-inject
x86, mce, therm_throt: Don't report power limit and package level thermal throttle events in mcelog
x86, MCE: Drain mcelog buffer
x86, mce: Add wrappers for registering on the decode chain
| -rw-r--r-- | arch/x86/include/asm/mce.h | 12 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-inject.c | 34 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 64 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 29 | ||||
| -rw-r--r-- | drivers/edac/i7core_edac.c | 4 | ||||
| -rw-r--r-- | drivers/edac/mce_amd.c | 4 | ||||
| -rw-r--r-- | drivers/edac/sb_edac.c | 6 |
7 files changed, 107 insertions, 46 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 0e8ae57d3656..6add827381c9 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
| @@ -50,10 +50,11 @@ | |||
| 50 | #define MCJ_CTX_MASK 3 | 50 | #define MCJ_CTX_MASK 3 |
| 51 | #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) | 51 | #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) |
| 52 | #define MCJ_CTX_RANDOM 0 /* inject context: random */ | 52 | #define MCJ_CTX_RANDOM 0 /* inject context: random */ |
| 53 | #define MCJ_CTX_PROCESS 1 /* inject context: process */ | 53 | #define MCJ_CTX_PROCESS 0x1 /* inject context: process */ |
| 54 | #define MCJ_CTX_IRQ 2 /* inject context: IRQ */ | 54 | #define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ |
| 55 | #define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */ | 55 | #define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ |
| 56 | #define MCJ_EXCEPTION 8 /* raise as exception */ | 56 | #define MCJ_EXCEPTION 0x8 /* raise as exception */ |
| 57 | #define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */ | ||
| 57 | 58 | ||
| 58 | /* Fields are zero when not available */ | 59 | /* Fields are zero when not available */ |
| 59 | struct mce { | 60 | struct mce { |
| @@ -120,7 +121,8 @@ struct mce_log { | |||
| 120 | 121 | ||
| 121 | #ifdef __KERNEL__ | 122 | #ifdef __KERNEL__ |
| 122 | 123 | ||
| 123 | extern struct atomic_notifier_head x86_mce_decoder_chain; | 124 | extern void mce_register_decode_chain(struct notifier_block *nb); |
| 125 | extern void mce_unregister_decode_chain(struct notifier_block *nb); | ||
| 124 | 126 | ||
| 125 | #include <linux/percpu.h> | 127 | #include <linux/percpu.h> |
| 126 | #include <linux/init.h> | 128 | #include <linux/init.h> |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 319882ef848d..fc4beb393577 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
| 18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
| 19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
| 20 | #include <linux/preempt.h> | ||
| 20 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
| 21 | #include <linux/notifier.h> | 22 | #include <linux/notifier.h> |
| 22 | #include <linux/kdebug.h> | 23 | #include <linux/kdebug.h> |
| @@ -92,6 +93,18 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) | |||
| 92 | return NMI_HANDLED; | 93 | return NMI_HANDLED; |
| 93 | } | 94 | } |
| 94 | 95 | ||
| 96 | static void mce_irq_ipi(void *info) | ||
| 97 | { | ||
| 98 | int cpu = smp_processor_id(); | ||
| 99 | struct mce *m = &__get_cpu_var(injectm); | ||
| 100 | |||
| 101 | if (cpumask_test_cpu(cpu, mce_inject_cpumask) && | ||
| 102 | m->inject_flags & MCJ_EXCEPTION) { | ||
| 103 | cpumask_clear_cpu(cpu, mce_inject_cpumask); | ||
| 104 | raise_exception(m, NULL); | ||
| 105 | } | ||
| 106 | } | ||
| 107 | |||
| 95 | /* Inject mce on current CPU */ | 108 | /* Inject mce on current CPU */ |
| 96 | static int raise_local(void) | 109 | static int raise_local(void) |
| 97 | { | 110 | { |
| @@ -139,9 +152,10 @@ static void raise_mce(struct mce *m) | |||
| 139 | return; | 152 | return; |
| 140 | 153 | ||
| 141 | #ifdef CONFIG_X86_LOCAL_APIC | 154 | #ifdef CONFIG_X86_LOCAL_APIC |
| 142 | if (m->inject_flags & MCJ_NMI_BROADCAST) { | 155 | if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) { |
| 143 | unsigned long start; | 156 | unsigned long start; |
| 144 | int cpu; | 157 | int cpu; |
| 158 | |||
| 145 | get_online_cpus(); | 159 | get_online_cpus(); |
| 146 | cpumask_copy(mce_inject_cpumask, cpu_online_mask); | 160 | cpumask_copy(mce_inject_cpumask, cpu_online_mask); |
| 147 | cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); | 161 | cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); |
| @@ -151,13 +165,25 @@ static void raise_mce(struct mce *m) | |||
| 151 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) | 165 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) |
| 152 | cpumask_clear_cpu(cpu, mce_inject_cpumask); | 166 | cpumask_clear_cpu(cpu, mce_inject_cpumask); |
| 153 | } | 167 | } |
| 154 | if (!cpumask_empty(mce_inject_cpumask)) | 168 | if (!cpumask_empty(mce_inject_cpumask)) { |
| 155 | apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR); | 169 | if (m->inject_flags & MCJ_IRQ_BRAODCAST) { |
| 170 | /* | ||
| 171 | * don't wait because mce_irq_ipi is necessary | ||
| 172 | * to be sync with following raise_local | ||
| 173 | */ | ||
| 174 | preempt_disable(); | ||
| 175 | smp_call_function_many(mce_inject_cpumask, | ||
| 176 | mce_irq_ipi, NULL, 0); | ||
| 177 | preempt_enable(); | ||
| 178 | } else if (m->inject_flags & MCJ_NMI_BROADCAST) | ||
| 179 | apic->send_IPI_mask(mce_inject_cpumask, | ||
| 180 | NMI_VECTOR); | ||
| 181 | } | ||
| 156 | start = jiffies; | 182 | start = jiffies; |
| 157 | while (!cpumask_empty(mce_inject_cpumask)) { | 183 | while (!cpumask_empty(mce_inject_cpumask)) { |
| 158 | if (!time_before(jiffies, start + 2*HZ)) { | 184 | if (!time_before(jiffies, start + 2*HZ)) { |
| 159 | printk(KERN_ERR | 185 | printk(KERN_ERR |
| 160 | "Timeout waiting for mce inject NMI %lx\n", | 186 | "Timeout waiting for mce inject %lx\n", |
| 161 | *cpumask_bits(mce_inject_cpumask)); | 187 | *cpumask_bits(mce_inject_cpumask)); |
| 162 | break; | 188 | break; |
| 163 | } | 189 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e9c9d0aab36a..cbe82b5918ce 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
| @@ -95,13 +95,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); | |||
| 95 | static DEFINE_PER_CPU(struct mce, mces_seen); | 95 | static DEFINE_PER_CPU(struct mce, mces_seen); |
| 96 | static int cpu_missing; | 96 | static int cpu_missing; |
| 97 | 97 | ||
| 98 | /* | ||
| 99 | * CPU/chipset specific EDAC code can register a notifier call here to print | ||
| 100 | * MCE errors in a human-readable form. | ||
| 101 | */ | ||
| 102 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | ||
| 103 | EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | ||
| 104 | |||
| 105 | /* MCA banks polled by the period polling timer for corrected events */ | 98 | /* MCA banks polled by the period polling timer for corrected events */ |
| 106 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 99 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
| 107 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | 100 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL |
| @@ -109,6 +102,12 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | |||
| 109 | 102 | ||
| 110 | static DEFINE_PER_CPU(struct work_struct, mce_work); | 103 | static DEFINE_PER_CPU(struct work_struct, mce_work); |
| 111 | 104 | ||
| 105 | /* | ||
| 106 | * CPU/chipset specific EDAC code can register a notifier call here to print | ||
| 107 | * MCE errors in a human-readable form. | ||
| 108 | */ | ||
| 109 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | ||
| 110 | |||
| 112 | /* Do initial initialization of a struct mce */ | 111 | /* Do initial initialization of a struct mce */ |
| 113 | void mce_setup(struct mce *m) | 112 | void mce_setup(struct mce *m) |
| 114 | { | 113 | { |
| @@ -188,6 +187,57 @@ void mce_log(struct mce *mce) | |||
| 188 | set_bit(0, &mce_need_notify); | 187 | set_bit(0, &mce_need_notify); |
| 189 | } | 188 | } |
| 190 | 189 | ||
| 190 | static void drain_mcelog_buffer(void) | ||
| 191 | { | ||
| 192 | unsigned int next, i, prev = 0; | ||
| 193 | |||
| 194 | next = rcu_dereference_check_mce(mcelog.next); | ||
| 195 | |||
| 196 | do { | ||
| 197 | struct mce *m; | ||
| 198 | |||
| 199 | /* drain what was logged during boot */ | ||
| 200 | for (i = prev; i < next; i++) { | ||
| 201 | unsigned long start = jiffies; | ||
| 202 | unsigned retries = 1; | ||
| 203 | |||
| 204 | m = &mcelog.entry[i]; | ||
| 205 | |||
| 206 | while (!m->finished) { | ||
| 207 | if (time_after_eq(jiffies, start + 2*retries)) | ||
| 208 | retries++; | ||
| 209 | |||
| 210 | cpu_relax(); | ||
| 211 | |||
| 212 | if (!m->finished && retries >= 4) { | ||
| 213 | pr_err("MCE: skipping error being logged currently!\n"); | ||
| 214 | break; | ||
| 215 | } | ||
| 216 | } | ||
| 217 | smp_rmb(); | ||
| 218 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); | ||
| 219 | } | ||
| 220 | |||
| 221 | memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m)); | ||
| 222 | prev = next; | ||
| 223 | next = cmpxchg(&mcelog.next, prev, 0); | ||
| 224 | } while (next != prev); | ||
| 225 | } | ||
| 226 | |||
| 227 | |||
| 228 | void mce_register_decode_chain(struct notifier_block *nb) | ||
| 229 | { | ||
| 230 | atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); | ||
| 231 | drain_mcelog_buffer(); | ||
| 232 | } | ||
| 233 | EXPORT_SYMBOL_GPL(mce_register_decode_chain); | ||
| 234 | |||
| 235 | void mce_unregister_decode_chain(struct notifier_block *nb) | ||
| 236 | { | ||
| 237 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); | ||
| 238 | } | ||
| 239 | EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); | ||
| 240 | |||
| 191 | static void print_mce(struct mce *m) | 241 | static void print_mce(struct mce *m) |
| 192 | { | 242 | { |
| 193 | int ret = 0; | 243 | int ret = 0; |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index ce215616d5b9..39c6089891e4 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
| @@ -323,17 +323,6 @@ device_initcall(thermal_throttle_init_device); | |||
| 323 | 323 | ||
| 324 | #endif /* CONFIG_SYSFS */ | 324 | #endif /* CONFIG_SYSFS */ |
| 325 | 325 | ||
| 326 | /* | ||
| 327 | * Set up the most two significant bit to notify mce log that this thermal | ||
| 328 | * event type. | ||
| 329 | * This is a temp solution. May be changed in the future with mce log | ||
| 330 | * infrasture. | ||
| 331 | */ | ||
| 332 | #define CORE_THROTTLED (0) | ||
| 333 | #define CORE_POWER_LIMIT ((__u64)1 << 62) | ||
| 334 | #define PACKAGE_THROTTLED ((__u64)2 << 62) | ||
| 335 | #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) | ||
| 336 | |||
| 337 | static void notify_thresholds(__u64 msr_val) | 326 | static void notify_thresholds(__u64 msr_val) |
| 338 | { | 327 | { |
| 339 | /* check whether the interrupt handler is defined; | 328 | /* check whether the interrupt handler is defined; |
| @@ -363,27 +352,23 @@ static void intel_thermal_interrupt(void) | |||
| 363 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, | 352 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, |
| 364 | THERMAL_THROTTLING_EVENT, | 353 | THERMAL_THROTTLING_EVENT, |
| 365 | CORE_LEVEL) != 0) | 354 | CORE_LEVEL) != 0) |
| 366 | mce_log_therm_throt_event(CORE_THROTTLED | msr_val); | 355 | mce_log_therm_throt_event(msr_val); |
| 367 | 356 | ||
| 368 | if (this_cpu_has(X86_FEATURE_PLN)) | 357 | if (this_cpu_has(X86_FEATURE_PLN)) |
| 369 | if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, | 358 | therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, |
| 370 | POWER_LIMIT_EVENT, | 359 | POWER_LIMIT_EVENT, |
| 371 | CORE_LEVEL) != 0) | 360 | CORE_LEVEL); |
| 372 | mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); | ||
| 373 | 361 | ||
| 374 | if (this_cpu_has(X86_FEATURE_PTS)) { | 362 | if (this_cpu_has(X86_FEATURE_PTS)) { |
| 375 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | 363 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); |
| 376 | if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, | 364 | therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, |
| 377 | THERMAL_THROTTLING_EVENT, | 365 | THERMAL_THROTTLING_EVENT, |
| 378 | PACKAGE_LEVEL) != 0) | 366 | PACKAGE_LEVEL); |
| 379 | mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); | ||
| 380 | if (this_cpu_has(X86_FEATURE_PLN)) | 367 | if (this_cpu_has(X86_FEATURE_PLN)) |
| 381 | if (therm_throt_process(msr_val & | 368 | therm_throt_process(msr_val & |
| 382 | PACKAGE_THERM_STATUS_POWER_LIMIT, | 369 | PACKAGE_THERM_STATUS_POWER_LIMIT, |
| 383 | POWER_LIMIT_EVENT, | 370 | POWER_LIMIT_EVENT, |
| 384 | PACKAGE_LEVEL) != 0) | 371 | PACKAGE_LEVEL); |
| 385 | mce_log_therm_throt_event(PACKAGE_POWER_LIMIT | ||
| 386 | | msr_val); | ||
| 387 | } | 372 | } |
| 388 | } | 373 | } |
| 389 | 374 | ||
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 70ad8923f1d7..8568d9b61875 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c | |||
| @@ -2234,7 +2234,7 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev) | |||
| 2234 | if (pvt->enable_scrub) | 2234 | if (pvt->enable_scrub) |
| 2235 | disable_sdram_scrub_setting(mci); | 2235 | disable_sdram_scrub_setting(mci); |
| 2236 | 2236 | ||
| 2237 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec); | 2237 | mce_unregister_decode_chain(&i7_mce_dec); |
| 2238 | 2238 | ||
| 2239 | /* Disable EDAC polling */ | 2239 | /* Disable EDAC polling */ |
| 2240 | i7core_pci_ctl_release(pvt); | 2240 | i7core_pci_ctl_release(pvt); |
| @@ -2336,7 +2336,7 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) | |||
| 2336 | /* DCLK for scrub rate setting */ | 2336 | /* DCLK for scrub rate setting */ |
| 2337 | pvt->dclk_freq = get_dclk_freq(); | 2337 | pvt->dclk_freq = get_dclk_freq(); |
| 2338 | 2338 | ||
| 2339 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec); | 2339 | mce_register_decode_chain(&i7_mce_dec); |
| 2340 | 2340 | ||
| 2341 | return 0; | 2341 | return 0; |
| 2342 | 2342 | ||
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index d0864d9c38ad..bd926ea2e00c 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c | |||
| @@ -884,7 +884,7 @@ static int __init mce_amd_init(void) | |||
| 884 | 884 | ||
| 885 | pr_info("MCE: In-kernel MCE decoding enabled.\n"); | 885 | pr_info("MCE: In-kernel MCE decoding enabled.\n"); |
| 886 | 886 | ||
| 887 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); | 887 | mce_register_decode_chain(&amd_mce_dec_nb); |
| 888 | 888 | ||
| 889 | return 0; | 889 | return 0; |
| 890 | } | 890 | } |
| @@ -893,7 +893,7 @@ early_initcall(mce_amd_init); | |||
| 893 | #ifdef MODULE | 893 | #ifdef MODULE |
| 894 | static void __exit mce_amd_exit(void) | 894 | static void __exit mce_amd_exit(void) |
| 895 | { | 895 | { |
| 896 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); | 896 | mce_unregister_decode_chain(&amd_mce_dec_nb); |
| 897 | kfree(fam_ops); | 897 | kfree(fam_ops); |
| 898 | } | 898 | } |
| 899 | 899 | ||
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 88df48956c1b..1dc118d83cc6 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c | |||
| @@ -1659,8 +1659,7 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) | |||
| 1659 | debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", | 1659 | debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", |
| 1660 | __func__, mci, &sbridge_dev->pdev[0]->dev); | 1660 | __func__, mci, &sbridge_dev->pdev[0]->dev); |
| 1661 | 1661 | ||
| 1662 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, | 1662 | mce_unregister_decode_chain(&sbridge_mce_dec); |
| 1663 | &sbridge_mce_dec); | ||
| 1664 | 1663 | ||
| 1665 | /* Remove MC sysfs nodes */ | 1664 | /* Remove MC sysfs nodes */ |
| 1666 | edac_mc_del_mc(mci->dev); | 1665 | edac_mc_del_mc(mci->dev); |
| @@ -1729,8 +1728,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) | |||
| 1729 | goto fail0; | 1728 | goto fail0; |
| 1730 | } | 1729 | } |
| 1731 | 1730 | ||
| 1732 | atomic_notifier_chain_register(&x86_mce_decoder_chain, | 1731 | mce_register_decode_chain(&sbridge_mce_dec); |
| 1733 | &sbridge_mce_dec); | ||
| 1734 | return 0; | 1732 | return 0; |
| 1735 | 1733 | ||
| 1736 | fail0: | 1734 | fail0: |
