aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-06 18:02:37 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-06 18:02:37 -0500
commitedf7c8148ec40c0fd27c0ef3f688defcc65e3913 (patch)
treedde7448208538c616ad3ba25b41a816fccc015af
parent82406da4a6998a0c98db0c5afb1695f97889bf79 (diff)
parenta228b5892b0527b8574c06edc72cacaf8c25418d (diff)
Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86: add IRQ context simulation in module mce-inject x86, mce, therm_throt: Don't report power limit and package level thermal throttle events in mcelog x86, MCE: Drain mcelog buffer x86, mce: Add wrappers for registering on the decode chain
-rw-r--r--arch/x86/include/asm/mce.h12
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c34
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c64
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c29
-rw-r--r--drivers/edac/i7core_edac.c4
-rw-r--r--drivers/edac/mce_amd.c4
-rw-r--r--drivers/edac/sb_edac.c6
7 files changed, 107 insertions, 46 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 0e8ae57d3656..6add827381c9 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -50,10 +50,11 @@
50#define MCJ_CTX_MASK 3 50#define MCJ_CTX_MASK 3
51#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) 51#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
52#define MCJ_CTX_RANDOM 0 /* inject context: random */ 52#define MCJ_CTX_RANDOM 0 /* inject context: random */
53#define MCJ_CTX_PROCESS 1 /* inject context: process */ 53#define MCJ_CTX_PROCESS 0x1 /* inject context: process */
54#define MCJ_CTX_IRQ 2 /* inject context: IRQ */ 54#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
55#define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */ 55#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
56#define MCJ_EXCEPTION 8 /* raise as exception */ 56#define MCJ_EXCEPTION 0x8 /* raise as exception */
57#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
57 58
58/* Fields are zero when not available */ 59/* Fields are zero when not available */
59struct mce { 60struct mce {
@@ -120,7 +121,8 @@ struct mce_log {
120 121
121#ifdef __KERNEL__ 122#ifdef __KERNEL__
122 123
123extern struct atomic_notifier_head x86_mce_decoder_chain; 124extern void mce_register_decode_chain(struct notifier_block *nb);
125extern void mce_unregister_decode_chain(struct notifier_block *nb);
124 126
125#include <linux/percpu.h> 127#include <linux/percpu.h>
126#include <linux/init.h> 128#include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 319882ef848d..fc4beb393577 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -17,6 +17,7 @@
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/fs.h> 19#include <linux/fs.h>
20#include <linux/preempt.h>
20#include <linux/smp.h> 21#include <linux/smp.h>
21#include <linux/notifier.h> 22#include <linux/notifier.h>
22#include <linux/kdebug.h> 23#include <linux/kdebug.h>
@@ -92,6 +93,18 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
92 return NMI_HANDLED; 93 return NMI_HANDLED;
93} 94}
94 95
96static void mce_irq_ipi(void *info)
97{
98 int cpu = smp_processor_id();
99 struct mce *m = &__get_cpu_var(injectm);
100
101 if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
102 m->inject_flags & MCJ_EXCEPTION) {
103 cpumask_clear_cpu(cpu, mce_inject_cpumask);
104 raise_exception(m, NULL);
105 }
106}
107
95/* Inject mce on current CPU */ 108/* Inject mce on current CPU */
96static int raise_local(void) 109static int raise_local(void)
97{ 110{
@@ -139,9 +152,10 @@ static void raise_mce(struct mce *m)
139 return; 152 return;
140 153
141#ifdef CONFIG_X86_LOCAL_APIC 154#ifdef CONFIG_X86_LOCAL_APIC
142 if (m->inject_flags & MCJ_NMI_BROADCAST) { 155 if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) {
143 unsigned long start; 156 unsigned long start;
144 int cpu; 157 int cpu;
158
145 get_online_cpus(); 159 get_online_cpus();
146 cpumask_copy(mce_inject_cpumask, cpu_online_mask); 160 cpumask_copy(mce_inject_cpumask, cpu_online_mask);
147 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); 161 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
@@ -151,13 +165,25 @@ static void raise_mce(struct mce *m)
151 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) 165 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
152 cpumask_clear_cpu(cpu, mce_inject_cpumask); 166 cpumask_clear_cpu(cpu, mce_inject_cpumask);
153 } 167 }
154 if (!cpumask_empty(mce_inject_cpumask)) 168 if (!cpumask_empty(mce_inject_cpumask)) {
155 apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR); 169 if (m->inject_flags & MCJ_IRQ_BRAODCAST) {
170 /*
171 * don't wait because mce_irq_ipi is necessary
172 * to be sync with following raise_local
173 */
174 preempt_disable();
175 smp_call_function_many(mce_inject_cpumask,
176 mce_irq_ipi, NULL, 0);
177 preempt_enable();
178 } else if (m->inject_flags & MCJ_NMI_BROADCAST)
179 apic->send_IPI_mask(mce_inject_cpumask,
180 NMI_VECTOR);
181 }
156 start = jiffies; 182 start = jiffies;
157 while (!cpumask_empty(mce_inject_cpumask)) { 183 while (!cpumask_empty(mce_inject_cpumask)) {
158 if (!time_before(jiffies, start + 2*HZ)) { 184 if (!time_before(jiffies, start + 2*HZ)) {
159 printk(KERN_ERR 185 printk(KERN_ERR
160 "Timeout waiting for mce inject NMI %lx\n", 186 "Timeout waiting for mce inject %lx\n",
161 *cpumask_bits(mce_inject_cpumask)); 187 *cpumask_bits(mce_inject_cpumask));
162 break; 188 break;
163 } 189 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e9c9d0aab36a..cbe82b5918ce 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -95,13 +95,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
95static DEFINE_PER_CPU(struct mce, mces_seen); 95static DEFINE_PER_CPU(struct mce, mces_seen);
96static int cpu_missing; 96static int cpu_missing;
97 97
98/*
99 * CPU/chipset specific EDAC code can register a notifier call here to print
100 * MCE errors in a human-readable form.
101 */
102ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
103EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
104
105/* MCA banks polled by the period polling timer for corrected events */ 98/* MCA banks polled by the period polling timer for corrected events */
106DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 99DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
107 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 100 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
@@ -109,6 +102,12 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
109 102
110static DEFINE_PER_CPU(struct work_struct, mce_work); 103static DEFINE_PER_CPU(struct work_struct, mce_work);
111 104
105/*
106 * CPU/chipset specific EDAC code can register a notifier call here to print
107 * MCE errors in a human-readable form.
108 */
109ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
110
112/* Do initial initialization of a struct mce */ 111/* Do initial initialization of a struct mce */
113void mce_setup(struct mce *m) 112void mce_setup(struct mce *m)
114{ 113{
@@ -188,6 +187,57 @@ void mce_log(struct mce *mce)
188 set_bit(0, &mce_need_notify); 187 set_bit(0, &mce_need_notify);
189} 188}
190 189
190static void drain_mcelog_buffer(void)
191{
192 unsigned int next, i, prev = 0;
193
194 next = rcu_dereference_check_mce(mcelog.next);
195
196 do {
197 struct mce *m;
198
199 /* drain what was logged during boot */
200 for (i = prev; i < next; i++) {
201 unsigned long start = jiffies;
202 unsigned retries = 1;
203
204 m = &mcelog.entry[i];
205
206 while (!m->finished) {
207 if (time_after_eq(jiffies, start + 2*retries))
208 retries++;
209
210 cpu_relax();
211
212 if (!m->finished && retries >= 4) {
213 pr_err("MCE: skipping error being logged currently!\n");
214 break;
215 }
216 }
217 smp_rmb();
218 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
219 }
220
221 memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m));
222 prev = next;
223 next = cmpxchg(&mcelog.next, prev, 0);
224 } while (next != prev);
225}
226
227
228void mce_register_decode_chain(struct notifier_block *nb)
229{
230 atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
231 drain_mcelog_buffer();
232}
233EXPORT_SYMBOL_GPL(mce_register_decode_chain);
234
235void mce_unregister_decode_chain(struct notifier_block *nb)
236{
237 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
238}
239EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
240
191static void print_mce(struct mce *m) 241static void print_mce(struct mce *m)
192{ 242{
193 int ret = 0; 243 int ret = 0;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index ce215616d5b9..39c6089891e4 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -323,17 +323,6 @@ device_initcall(thermal_throttle_init_device);
323 323
324#endif /* CONFIG_SYSFS */ 324#endif /* CONFIG_SYSFS */
325 325
326/*
327 * Set up the most two significant bit to notify mce log that this thermal
328 * event type.
329 * This is a temp solution. May be changed in the future with mce log
330 * infrasture.
331 */
332#define CORE_THROTTLED (0)
333#define CORE_POWER_LIMIT ((__u64)1 << 62)
334#define PACKAGE_THROTTLED ((__u64)2 << 62)
335#define PACKAGE_POWER_LIMIT ((__u64)3 << 62)
336
337static void notify_thresholds(__u64 msr_val) 326static void notify_thresholds(__u64 msr_val)
338{ 327{
339 /* check whether the interrupt handler is defined; 328 /* check whether the interrupt handler is defined;
@@ -363,27 +352,23 @@ static void intel_thermal_interrupt(void)
363 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, 352 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
364 THERMAL_THROTTLING_EVENT, 353 THERMAL_THROTTLING_EVENT,
365 CORE_LEVEL) != 0) 354 CORE_LEVEL) != 0)
366 mce_log_therm_throt_event(CORE_THROTTLED | msr_val); 355 mce_log_therm_throt_event(msr_val);
367 356
368 if (this_cpu_has(X86_FEATURE_PLN)) 357 if (this_cpu_has(X86_FEATURE_PLN))
369 if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, 358 therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
370 POWER_LIMIT_EVENT, 359 POWER_LIMIT_EVENT,
371 CORE_LEVEL) != 0) 360 CORE_LEVEL);
372 mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
373 361
374 if (this_cpu_has(X86_FEATURE_PTS)) { 362 if (this_cpu_has(X86_FEATURE_PTS)) {
375 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); 363 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
376 if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, 364 therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
377 THERMAL_THROTTLING_EVENT, 365 THERMAL_THROTTLING_EVENT,
378 PACKAGE_LEVEL) != 0) 366 PACKAGE_LEVEL);
379 mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
380 if (this_cpu_has(X86_FEATURE_PLN)) 367 if (this_cpu_has(X86_FEATURE_PLN))
381 if (therm_throt_process(msr_val & 368 therm_throt_process(msr_val &
382 PACKAGE_THERM_STATUS_POWER_LIMIT, 369 PACKAGE_THERM_STATUS_POWER_LIMIT,
383 POWER_LIMIT_EVENT, 370 POWER_LIMIT_EVENT,
384 PACKAGE_LEVEL) != 0) 371 PACKAGE_LEVEL);
385 mce_log_therm_throt_event(PACKAGE_POWER_LIMIT
386 | msr_val);
387 } 372 }
388} 373}
389 374
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 70ad8923f1d7..8568d9b61875 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -2234,7 +2234,7 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2234 if (pvt->enable_scrub) 2234 if (pvt->enable_scrub)
2235 disable_sdram_scrub_setting(mci); 2235 disable_sdram_scrub_setting(mci);
2236 2236
2237 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec); 2237 mce_unregister_decode_chain(&i7_mce_dec);
2238 2238
2239 /* Disable EDAC polling */ 2239 /* Disable EDAC polling */
2240 i7core_pci_ctl_release(pvt); 2240 i7core_pci_ctl_release(pvt);
@@ -2336,7 +2336,7 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
2336 /* DCLK for scrub rate setting */ 2336 /* DCLK for scrub rate setting */
2337 pvt->dclk_freq = get_dclk_freq(); 2337 pvt->dclk_freq = get_dclk_freq();
2338 2338
2339 atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec); 2339 mce_register_decode_chain(&i7_mce_dec);
2340 2340
2341 return 0; 2341 return 0;
2342 2342
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index d0864d9c38ad..bd926ea2e00c 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -884,7 +884,7 @@ static int __init mce_amd_init(void)
884 884
885 pr_info("MCE: In-kernel MCE decoding enabled.\n"); 885 pr_info("MCE: In-kernel MCE decoding enabled.\n");
886 886
887 atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); 887 mce_register_decode_chain(&amd_mce_dec_nb);
888 888
889 return 0; 889 return 0;
890} 890}
@@ -893,7 +893,7 @@ early_initcall(mce_amd_init);
893#ifdef MODULE 893#ifdef MODULE
894static void __exit mce_amd_exit(void) 894static void __exit mce_amd_exit(void)
895{ 895{
896 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); 896 mce_unregister_decode_chain(&amd_mce_dec_nb);
897 kfree(fam_ops); 897 kfree(fam_ops);
898} 898}
899 899
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 88df48956c1b..1dc118d83cc6 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -1659,8 +1659,7 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
1659 debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", 1659 debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1660 __func__, mci, &sbridge_dev->pdev[0]->dev); 1660 __func__, mci, &sbridge_dev->pdev[0]->dev);
1661 1661
1662 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, 1662 mce_unregister_decode_chain(&sbridge_mce_dec);
1663 &sbridge_mce_dec);
1664 1663
1665 /* Remove MC sysfs nodes */ 1664 /* Remove MC sysfs nodes */
1666 edac_mc_del_mc(mci->dev); 1665 edac_mc_del_mc(mci->dev);
@@ -1729,8 +1728,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
1729 goto fail0; 1728 goto fail0;
1730 } 1729 }
1731 1730
1732 atomic_notifier_chain_register(&x86_mce_decoder_chain, 1731 mce_register_decode_chain(&sbridge_mce_dec);
1733 &sbridge_mce_dec);
1734 return 0; 1732 return 0;
1735 1733
1736fail0: 1734fail0: