diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 18:02:37 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 18:02:37 -0500 |
commit | edf7c8148ec40c0fd27c0ef3f688defcc65e3913 (patch) | |
tree | dde7448208538c616ad3ba25b41a816fccc015af | |
parent | 82406da4a6998a0c98db0c5afb1695f97889bf79 (diff) | |
parent | a228b5892b0527b8574c06edc72cacaf8c25418d (diff) |
Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86: add IRQ context simulation in module mce-inject
x86, mce, therm_throt: Don't report power limit and package level thermal throttle events in mcelog
x86, MCE: Drain mcelog buffer
x86, mce: Add wrappers for registering on the decode chain
-rw-r--r-- | arch/x86/include/asm/mce.h | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-inject.c | 34 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 64 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 29 | ||||
-rw-r--r-- | drivers/edac/i7core_edac.c | 4 | ||||
-rw-r--r-- | drivers/edac/mce_amd.c | 4 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 6 |
7 files changed, 107 insertions, 46 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 0e8ae57d3656..6add827381c9 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -50,10 +50,11 @@ | |||
50 | #define MCJ_CTX_MASK 3 | 50 | #define MCJ_CTX_MASK 3 |
51 | #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) | 51 | #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) |
52 | #define MCJ_CTX_RANDOM 0 /* inject context: random */ | 52 | #define MCJ_CTX_RANDOM 0 /* inject context: random */ |
53 | #define MCJ_CTX_PROCESS 1 /* inject context: process */ | 53 | #define MCJ_CTX_PROCESS 0x1 /* inject context: process */ |
54 | #define MCJ_CTX_IRQ 2 /* inject context: IRQ */ | 54 | #define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ |
55 | #define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */ | 55 | #define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ |
56 | #define MCJ_EXCEPTION 8 /* raise as exception */ | 56 | #define MCJ_EXCEPTION 0x8 /* raise as exception */ |
57 | #define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */ | ||
57 | 58 | ||
58 | /* Fields are zero when not available */ | 59 | /* Fields are zero when not available */ |
59 | struct mce { | 60 | struct mce { |
@@ -120,7 +121,8 @@ struct mce_log { | |||
120 | 121 | ||
121 | #ifdef __KERNEL__ | 122 | #ifdef __KERNEL__ |
122 | 123 | ||
123 | extern struct atomic_notifier_head x86_mce_decoder_chain; | 124 | extern void mce_register_decode_chain(struct notifier_block *nb); |
125 | extern void mce_unregister_decode_chain(struct notifier_block *nb); | ||
124 | 126 | ||
125 | #include <linux/percpu.h> | 127 | #include <linux/percpu.h> |
126 | #include <linux/init.h> | 128 | #include <linux/init.h> |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 319882ef848d..fc4beb393577 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include <linux/preempt.h> | ||
20 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
21 | #include <linux/notifier.h> | 22 | #include <linux/notifier.h> |
22 | #include <linux/kdebug.h> | 23 | #include <linux/kdebug.h> |
@@ -92,6 +93,18 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) | |||
92 | return NMI_HANDLED; | 93 | return NMI_HANDLED; |
93 | } | 94 | } |
94 | 95 | ||
96 | static void mce_irq_ipi(void *info) | ||
97 | { | ||
98 | int cpu = smp_processor_id(); | ||
99 | struct mce *m = &__get_cpu_var(injectm); | ||
100 | |||
101 | if (cpumask_test_cpu(cpu, mce_inject_cpumask) && | ||
102 | m->inject_flags & MCJ_EXCEPTION) { | ||
103 | cpumask_clear_cpu(cpu, mce_inject_cpumask); | ||
104 | raise_exception(m, NULL); | ||
105 | } | ||
106 | } | ||
107 | |||
95 | /* Inject mce on current CPU */ | 108 | /* Inject mce on current CPU */ |
96 | static int raise_local(void) | 109 | static int raise_local(void) |
97 | { | 110 | { |
@@ -139,9 +152,10 @@ static void raise_mce(struct mce *m) | |||
139 | return; | 152 | return; |
140 | 153 | ||
141 | #ifdef CONFIG_X86_LOCAL_APIC | 154 | #ifdef CONFIG_X86_LOCAL_APIC |
142 | if (m->inject_flags & MCJ_NMI_BROADCAST) { | 155 | if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) { |
143 | unsigned long start; | 156 | unsigned long start; |
144 | int cpu; | 157 | int cpu; |
158 | |||
145 | get_online_cpus(); | 159 | get_online_cpus(); |
146 | cpumask_copy(mce_inject_cpumask, cpu_online_mask); | 160 | cpumask_copy(mce_inject_cpumask, cpu_online_mask); |
147 | cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); | 161 | cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); |
@@ -151,13 +165,25 @@ static void raise_mce(struct mce *m) | |||
151 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) | 165 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) |
152 | cpumask_clear_cpu(cpu, mce_inject_cpumask); | 166 | cpumask_clear_cpu(cpu, mce_inject_cpumask); |
153 | } | 167 | } |
154 | if (!cpumask_empty(mce_inject_cpumask)) | 168 | if (!cpumask_empty(mce_inject_cpumask)) { |
155 | apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR); | 169 | if (m->inject_flags & MCJ_IRQ_BRAODCAST) { |
170 | /* | ||
171 | * don't wait because mce_irq_ipi is necessary | ||
172 | * to be sync with following raise_local | ||
173 | */ | ||
174 | preempt_disable(); | ||
175 | smp_call_function_many(mce_inject_cpumask, | ||
176 | mce_irq_ipi, NULL, 0); | ||
177 | preempt_enable(); | ||
178 | } else if (m->inject_flags & MCJ_NMI_BROADCAST) | ||
179 | apic->send_IPI_mask(mce_inject_cpumask, | ||
180 | NMI_VECTOR); | ||
181 | } | ||
156 | start = jiffies; | 182 | start = jiffies; |
157 | while (!cpumask_empty(mce_inject_cpumask)) { | 183 | while (!cpumask_empty(mce_inject_cpumask)) { |
158 | if (!time_before(jiffies, start + 2*HZ)) { | 184 | if (!time_before(jiffies, start + 2*HZ)) { |
159 | printk(KERN_ERR | 185 | printk(KERN_ERR |
160 | "Timeout waiting for mce inject NMI %lx\n", | 186 | "Timeout waiting for mce inject %lx\n", |
161 | *cpumask_bits(mce_inject_cpumask)); | 187 | *cpumask_bits(mce_inject_cpumask)); |
162 | break; | 188 | break; |
163 | } | 189 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e9c9d0aab36a..cbe82b5918ce 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -95,13 +95,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); | |||
95 | static DEFINE_PER_CPU(struct mce, mces_seen); | 95 | static DEFINE_PER_CPU(struct mce, mces_seen); |
96 | static int cpu_missing; | 96 | static int cpu_missing; |
97 | 97 | ||
98 | /* | ||
99 | * CPU/chipset specific EDAC code can register a notifier call here to print | ||
100 | * MCE errors in a human-readable form. | ||
101 | */ | ||
102 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | ||
103 | EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | ||
104 | |||
105 | /* MCA banks polled by the period polling timer for corrected events */ | 98 | /* MCA banks polled by the period polling timer for corrected events */ |
106 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 99 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
107 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | 100 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL |
@@ -109,6 +102,12 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | |||
109 | 102 | ||
110 | static DEFINE_PER_CPU(struct work_struct, mce_work); | 103 | static DEFINE_PER_CPU(struct work_struct, mce_work); |
111 | 104 | ||
105 | /* | ||
106 | * CPU/chipset specific EDAC code can register a notifier call here to print | ||
107 | * MCE errors in a human-readable form. | ||
108 | */ | ||
109 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | ||
110 | |||
112 | /* Do initial initialization of a struct mce */ | 111 | /* Do initial initialization of a struct mce */ |
113 | void mce_setup(struct mce *m) | 112 | void mce_setup(struct mce *m) |
114 | { | 113 | { |
@@ -188,6 +187,57 @@ void mce_log(struct mce *mce) | |||
188 | set_bit(0, &mce_need_notify); | 187 | set_bit(0, &mce_need_notify); |
189 | } | 188 | } |
190 | 189 | ||
190 | static void drain_mcelog_buffer(void) | ||
191 | { | ||
192 | unsigned int next, i, prev = 0; | ||
193 | |||
194 | next = rcu_dereference_check_mce(mcelog.next); | ||
195 | |||
196 | do { | ||
197 | struct mce *m; | ||
198 | |||
199 | /* drain what was logged during boot */ | ||
200 | for (i = prev; i < next; i++) { | ||
201 | unsigned long start = jiffies; | ||
202 | unsigned retries = 1; | ||
203 | |||
204 | m = &mcelog.entry[i]; | ||
205 | |||
206 | while (!m->finished) { | ||
207 | if (time_after_eq(jiffies, start + 2*retries)) | ||
208 | retries++; | ||
209 | |||
210 | cpu_relax(); | ||
211 | |||
212 | if (!m->finished && retries >= 4) { | ||
213 | pr_err("MCE: skipping error being logged currently!\n"); | ||
214 | break; | ||
215 | } | ||
216 | } | ||
217 | smp_rmb(); | ||
218 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); | ||
219 | } | ||
220 | |||
221 | memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m)); | ||
222 | prev = next; | ||
223 | next = cmpxchg(&mcelog.next, prev, 0); | ||
224 | } while (next != prev); | ||
225 | } | ||
226 | |||
227 | |||
228 | void mce_register_decode_chain(struct notifier_block *nb) | ||
229 | { | ||
230 | atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); | ||
231 | drain_mcelog_buffer(); | ||
232 | } | ||
233 | EXPORT_SYMBOL_GPL(mce_register_decode_chain); | ||
234 | |||
235 | void mce_unregister_decode_chain(struct notifier_block *nb) | ||
236 | { | ||
237 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); | ||
238 | } | ||
239 | EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); | ||
240 | |||
191 | static void print_mce(struct mce *m) | 241 | static void print_mce(struct mce *m) |
192 | { | 242 | { |
193 | int ret = 0; | 243 | int ret = 0; |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index ce215616d5b9..39c6089891e4 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -323,17 +323,6 @@ device_initcall(thermal_throttle_init_device); | |||
323 | 323 | ||
324 | #endif /* CONFIG_SYSFS */ | 324 | #endif /* CONFIG_SYSFS */ |
325 | 325 | ||
326 | /* | ||
327 | * Set up the most two significant bit to notify mce log that this thermal | ||
328 | * event type. | ||
329 | * This is a temp solution. May be changed in the future with mce log | ||
330 | * infrasture. | ||
331 | */ | ||
332 | #define CORE_THROTTLED (0) | ||
333 | #define CORE_POWER_LIMIT ((__u64)1 << 62) | ||
334 | #define PACKAGE_THROTTLED ((__u64)2 << 62) | ||
335 | #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) | ||
336 | |||
337 | static void notify_thresholds(__u64 msr_val) | 326 | static void notify_thresholds(__u64 msr_val) |
338 | { | 327 | { |
339 | /* check whether the interrupt handler is defined; | 328 | /* check whether the interrupt handler is defined; |
@@ -363,27 +352,23 @@ static void intel_thermal_interrupt(void) | |||
363 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, | 352 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, |
364 | THERMAL_THROTTLING_EVENT, | 353 | THERMAL_THROTTLING_EVENT, |
365 | CORE_LEVEL) != 0) | 354 | CORE_LEVEL) != 0) |
366 | mce_log_therm_throt_event(CORE_THROTTLED | msr_val); | 355 | mce_log_therm_throt_event(msr_val); |
367 | 356 | ||
368 | if (this_cpu_has(X86_FEATURE_PLN)) | 357 | if (this_cpu_has(X86_FEATURE_PLN)) |
369 | if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, | 358 | therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, |
370 | POWER_LIMIT_EVENT, | 359 | POWER_LIMIT_EVENT, |
371 | CORE_LEVEL) != 0) | 360 | CORE_LEVEL); |
372 | mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); | ||
373 | 361 | ||
374 | if (this_cpu_has(X86_FEATURE_PTS)) { | 362 | if (this_cpu_has(X86_FEATURE_PTS)) { |
375 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | 363 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); |
376 | if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, | 364 | therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, |
377 | THERMAL_THROTTLING_EVENT, | 365 | THERMAL_THROTTLING_EVENT, |
378 | PACKAGE_LEVEL) != 0) | 366 | PACKAGE_LEVEL); |
379 | mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); | ||
380 | if (this_cpu_has(X86_FEATURE_PLN)) | 367 | if (this_cpu_has(X86_FEATURE_PLN)) |
381 | if (therm_throt_process(msr_val & | 368 | therm_throt_process(msr_val & |
382 | PACKAGE_THERM_STATUS_POWER_LIMIT, | 369 | PACKAGE_THERM_STATUS_POWER_LIMIT, |
383 | POWER_LIMIT_EVENT, | 370 | POWER_LIMIT_EVENT, |
384 | PACKAGE_LEVEL) != 0) | 371 | PACKAGE_LEVEL); |
385 | mce_log_therm_throt_event(PACKAGE_POWER_LIMIT | ||
386 | | msr_val); | ||
387 | } | 372 | } |
388 | } | 373 | } |
389 | 374 | ||
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 70ad8923f1d7..8568d9b61875 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c | |||
@@ -2234,7 +2234,7 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev) | |||
2234 | if (pvt->enable_scrub) | 2234 | if (pvt->enable_scrub) |
2235 | disable_sdram_scrub_setting(mci); | 2235 | disable_sdram_scrub_setting(mci); |
2236 | 2236 | ||
2237 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec); | 2237 | mce_unregister_decode_chain(&i7_mce_dec); |
2238 | 2238 | ||
2239 | /* Disable EDAC polling */ | 2239 | /* Disable EDAC polling */ |
2240 | i7core_pci_ctl_release(pvt); | 2240 | i7core_pci_ctl_release(pvt); |
@@ -2336,7 +2336,7 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) | |||
2336 | /* DCLK for scrub rate setting */ | 2336 | /* DCLK for scrub rate setting */ |
2337 | pvt->dclk_freq = get_dclk_freq(); | 2337 | pvt->dclk_freq = get_dclk_freq(); |
2338 | 2338 | ||
2339 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec); | 2339 | mce_register_decode_chain(&i7_mce_dec); |
2340 | 2340 | ||
2341 | return 0; | 2341 | return 0; |
2342 | 2342 | ||
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index d0864d9c38ad..bd926ea2e00c 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c | |||
@@ -884,7 +884,7 @@ static int __init mce_amd_init(void) | |||
884 | 884 | ||
885 | pr_info("MCE: In-kernel MCE decoding enabled.\n"); | 885 | pr_info("MCE: In-kernel MCE decoding enabled.\n"); |
886 | 886 | ||
887 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); | 887 | mce_register_decode_chain(&amd_mce_dec_nb); |
888 | 888 | ||
889 | return 0; | 889 | return 0; |
890 | } | 890 | } |
@@ -893,7 +893,7 @@ early_initcall(mce_amd_init); | |||
893 | #ifdef MODULE | 893 | #ifdef MODULE |
894 | static void __exit mce_amd_exit(void) | 894 | static void __exit mce_amd_exit(void) |
895 | { | 895 | { |
896 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); | 896 | mce_unregister_decode_chain(&amd_mce_dec_nb); |
897 | kfree(fam_ops); | 897 | kfree(fam_ops); |
898 | } | 898 | } |
899 | 899 | ||
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 88df48956c1b..1dc118d83cc6 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c | |||
@@ -1659,8 +1659,7 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) | |||
1659 | debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", | 1659 | debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", |
1660 | __func__, mci, &sbridge_dev->pdev[0]->dev); | 1660 | __func__, mci, &sbridge_dev->pdev[0]->dev); |
1661 | 1661 | ||
1662 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, | 1662 | mce_unregister_decode_chain(&sbridge_mce_dec); |
1663 | &sbridge_mce_dec); | ||
1664 | 1663 | ||
1665 | /* Remove MC sysfs nodes */ | 1664 | /* Remove MC sysfs nodes */ |
1666 | edac_mc_del_mc(mci->dev); | 1665 | edac_mc_del_mc(mci->dev); |
@@ -1729,8 +1728,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) | |||
1729 | goto fail0; | 1728 | goto fail0; |
1730 | } | 1729 | } |
1731 | 1730 | ||
1732 | atomic_notifier_chain_register(&x86_mce_decoder_chain, | 1731 | mce_register_decode_chain(&sbridge_mce_dec); |
1733 | &sbridge_mce_dec); | ||
1734 | return 0; | 1732 | return 0; |
1735 | 1733 | ||
1736 | fail0: | 1734 | fail0: |