aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mcheck/mce.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck/mce.c')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c396
1 files changed, 222 insertions, 174 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9bfe9d2ea615..b1598a9436d0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -34,6 +34,7 @@
34#include <linux/smp.h> 34#include <linux/smp.h>
35#include <linux/fs.h> 35#include <linux/fs.h>
36#include <linux/mm.h> 36#include <linux/mm.h>
37#include <linux/debugfs.h>
37 38
38#include <asm/processor.h> 39#include <asm/processor.h>
39#include <asm/hw_irq.h> 40#include <asm/hw_irq.h>
@@ -45,21 +46,8 @@
45 46
46#include "mce-internal.h" 47#include "mce-internal.h"
47 48
48/* Handle unconfigured int18 (should never happen) */
49static void unexpected_machine_check(struct pt_regs *regs, long error_code)
50{
51 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
52 smp_processor_id());
53}
54
55/* Call the installed machine check handler for this CPU setup. */
56void (*machine_check_vector)(struct pt_regs *, long error_code) =
57 unexpected_machine_check;
58
59int mce_disabled __read_mostly; 49int mce_disabled __read_mostly;
60 50
61#ifdef CONFIG_X86_NEW_MCE
62
63#define MISC_MCELOG_MINOR 227 51#define MISC_MCELOG_MINOR 227
64 52
65#define SPINUNIT 100 /* 100ns */ 53#define SPINUNIT 100 /* 100ns */
@@ -77,7 +65,6 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
77 */ 65 */
78static int tolerant __read_mostly = 1; 66static int tolerant __read_mostly = 1;
79static int banks __read_mostly; 67static int banks __read_mostly;
80static u64 *bank __read_mostly;
81static int rip_msr __read_mostly; 68static int rip_msr __read_mostly;
82static int mce_bootlog __read_mostly = -1; 69static int mce_bootlog __read_mostly = -1;
83static int monarch_timeout __read_mostly = -1; 70static int monarch_timeout __read_mostly = -1;
@@ -87,28 +74,35 @@ int mce_cmci_disabled __read_mostly;
87int mce_ignore_ce __read_mostly; 74int mce_ignore_ce __read_mostly;
88int mce_ser __read_mostly; 75int mce_ser __read_mostly;
89 76
77struct mce_bank *mce_banks __read_mostly;
78
90/* User mode helper program triggered by machine check event */ 79/* User mode helper program triggered by machine check event */
91static unsigned long mce_need_notify; 80static unsigned long mce_need_notify;
92static char mce_helper[128]; 81static char mce_helper[128];
93static char *mce_helper_argv[2] = { mce_helper, NULL }; 82static char *mce_helper_argv[2] = { mce_helper, NULL };
94 83
95static unsigned long dont_init_banks;
96
97static DECLARE_WAIT_QUEUE_HEAD(mce_wait); 84static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
98static DEFINE_PER_CPU(struct mce, mces_seen); 85static DEFINE_PER_CPU(struct mce, mces_seen);
99static int cpu_missing; 86static int cpu_missing;
100 87
88static void default_decode_mce(struct mce *m)
89{
90 pr_emerg("No human readable MCE decoding support on this CPU type.\n");
91 pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
92}
93
94/*
95 * CPU/chipset specific EDAC code can register a callback here to print
96 * MCE errors in a human-readable form:
97 */
98void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
99EXPORT_SYMBOL(x86_mce_decode_callback);
101 100
102/* MCA banks polled by the period polling timer for corrected events */ 101/* MCA banks polled by the period polling timer for corrected events */
103DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 102DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
104 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 103 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
105}; 104};
106 105
107static inline int skip_bank_init(int i)
108{
109 return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
110}
111
112static DEFINE_PER_CPU(struct work_struct, mce_work); 106static DEFINE_PER_CPU(struct work_struct, mce_work);
113 107
114/* Do initial initialization of a struct mce */ 108/* Do initial initialization of a struct mce */
@@ -183,59 +177,60 @@ void mce_log(struct mce *mce)
183 set_bit(0, &mce_need_notify); 177 set_bit(0, &mce_need_notify);
184} 178}
185 179
186void __weak decode_mce(struct mce *m)
187{
188 return;
189}
190
191static void print_mce(struct mce *m) 180static void print_mce(struct mce *m)
192{ 181{
193 printk(KERN_EMERG 182 pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
194 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
195 m->extcpu, m->mcgstatus, m->bank, m->status); 183 m->extcpu, m->mcgstatus, m->bank, m->status);
184
196 if (m->ip) { 185 if (m->ip) {
197 printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", 186 pr_emerg("RIP%s %02x:<%016Lx> ",
198 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", 187 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
199 m->cs, m->ip); 188 m->cs, m->ip);
189
200 if (m->cs == __KERNEL_CS) 190 if (m->cs == __KERNEL_CS)
201 print_symbol("{%s}", m->ip); 191 print_symbol("{%s}", m->ip);
202 printk(KERN_CONT "\n"); 192 pr_cont("\n");
203 } 193 }
204 printk(KERN_EMERG "TSC %llx ", m->tsc); 194
195 pr_emerg("TSC %llx ", m->tsc);
205 if (m->addr) 196 if (m->addr)
206 printk(KERN_CONT "ADDR %llx ", m->addr); 197 pr_cont("ADDR %llx ", m->addr);
207 if (m->misc) 198 if (m->misc)
208 printk(KERN_CONT "MISC %llx ", m->misc); 199 pr_cont("MISC %llx ", m->misc);
209 printk(KERN_CONT "\n");
210 printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
211 m->cpuvendor, m->cpuid, m->time, m->socketid,
212 m->apicid);
213 200
214 decode_mce(m); 201 pr_cont("\n");
202 pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
203 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
204
205 /*
206 * Print out human-readable details about the MCE error,
207 * (if the CPU has an implementation for that):
208 */
209 x86_mce_decode_callback(m);
215} 210}
216 211
217static void print_mce_head(void) 212static void print_mce_head(void)
218{ 213{
219 printk(KERN_EMERG "\nHARDWARE ERROR\n"); 214 pr_emerg("\nHARDWARE ERROR\n");
220} 215}
221 216
222static void print_mce_tail(void) 217static void print_mce_tail(void)
223{ 218{
224 printk(KERN_EMERG "This is not a software problem!\n" 219 pr_emerg("This is not a software problem!\n");
225#if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD))
226 "Run through mcelog --ascii to decode and contact your hardware vendor\n"
227#endif
228 );
229} 220}
230 221
231#define PANIC_TIMEOUT 5 /* 5 seconds */ 222#define PANIC_TIMEOUT 5 /* 5 seconds */
232 223
233static atomic_t mce_paniced; 224static atomic_t mce_paniced;
234 225
226static int fake_panic;
227static atomic_t mce_fake_paniced;
228
235/* Panic in progress. Enable interrupts and wait for final IPI */ 229/* Panic in progress. Enable interrupts and wait for final IPI */
236static void wait_for_panic(void) 230static void wait_for_panic(void)
237{ 231{
238 long timeout = PANIC_TIMEOUT*USEC_PER_SEC; 232 long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
233
239 preempt_disable(); 234 preempt_disable();
240 local_irq_enable(); 235 local_irq_enable();
241 while (timeout-- > 0) 236 while (timeout-- > 0)
@@ -249,15 +244,21 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
249{ 244{
250 int i; 245 int i;
251 246
252 /* 247 if (!fake_panic) {
253 * Make sure only one CPU runs in machine check panic 248 /*
254 */ 249 * Make sure only one CPU runs in machine check panic
255 if (atomic_add_return(1, &mce_paniced) > 1) 250 */
256 wait_for_panic(); 251 if (atomic_inc_return(&mce_paniced) > 1)
257 barrier(); 252 wait_for_panic();
253 barrier();
258 254
259 bust_spinlocks(1); 255 bust_spinlocks(1);
260 console_verbose(); 256 console_verbose();
257 } else {
258 /* Don't log too much for fake panic */
259 if (atomic_inc_return(&mce_fake_paniced) > 1)
260 return;
261 }
261 print_mce_head(); 262 print_mce_head();
262 /* First print corrected ones that are still unlogged */ 263 /* First print corrected ones that are still unlogged */
263 for (i = 0; i < MCE_LOG_LEN; i++) { 264 for (i = 0; i < MCE_LOG_LEN; i++) {
@@ -284,9 +285,12 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
284 print_mce_tail(); 285 print_mce_tail();
285 if (exp) 286 if (exp)
286 printk(KERN_EMERG "Machine check: %s\n", exp); 287 printk(KERN_EMERG "Machine check: %s\n", exp);
287 if (panic_timeout == 0) 288 if (!fake_panic) {
288 panic_timeout = mce_panic_timeout; 289 if (panic_timeout == 0)
289 panic(msg); 290 panic_timeout = mce_panic_timeout;
291 panic(msg);
292 } else
293 printk(KERN_EMERG "Fake kernel panic: %s\n", msg);
290} 294}
291 295
292/* Support code for software error injection */ 296/* Support code for software error injection */
@@ -294,13 +298,14 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
294static int msr_to_offset(u32 msr) 298static int msr_to_offset(u32 msr)
295{ 299{
296 unsigned bank = __get_cpu_var(injectm.bank); 300 unsigned bank = __get_cpu_var(injectm.bank);
301
297 if (msr == rip_msr) 302 if (msr == rip_msr)
298 return offsetof(struct mce, ip); 303 return offsetof(struct mce, ip);
299 if (msr == MSR_IA32_MC0_STATUS + bank*4) 304 if (msr == MSR_IA32_MCx_STATUS(bank))
300 return offsetof(struct mce, status); 305 return offsetof(struct mce, status);
301 if (msr == MSR_IA32_MC0_ADDR + bank*4) 306 if (msr == MSR_IA32_MCx_ADDR(bank))
302 return offsetof(struct mce, addr); 307 return offsetof(struct mce, addr);
303 if (msr == MSR_IA32_MC0_MISC + bank*4) 308 if (msr == MSR_IA32_MCx_MISC(bank))
304 return offsetof(struct mce, misc); 309 return offsetof(struct mce, misc);
305 if (msr == MSR_IA32_MCG_STATUS) 310 if (msr == MSR_IA32_MCG_STATUS)
306 return offsetof(struct mce, mcgstatus); 311 return offsetof(struct mce, mcgstatus);
@@ -311,13 +316,25 @@ static int msr_to_offset(u32 msr)
311static u64 mce_rdmsrl(u32 msr) 316static u64 mce_rdmsrl(u32 msr)
312{ 317{
313 u64 v; 318 u64 v;
319
314 if (__get_cpu_var(injectm).finished) { 320 if (__get_cpu_var(injectm).finished) {
315 int offset = msr_to_offset(msr); 321 int offset = msr_to_offset(msr);
322
316 if (offset < 0) 323 if (offset < 0)
317 return 0; 324 return 0;
318 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); 325 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
319 } 326 }
320 rdmsrl(msr, v); 327
328 if (rdmsrl_safe(msr, &v)) {
329 WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
330 /*
331 * Return zero in case the access faulted. This should
332 * not happen normally but can happen if the CPU does
333 * something weird, or if the code is buggy.
334 */
335 v = 0;
336 }
337
321 return v; 338 return v;
322} 339}
323 340
@@ -325,6 +342,7 @@ static void mce_wrmsrl(u32 msr, u64 v)
325{ 342{
326 if (__get_cpu_var(injectm).finished) { 343 if (__get_cpu_var(injectm).finished) {
327 int offset = msr_to_offset(msr); 344 int offset = msr_to_offset(msr);
345
328 if (offset >= 0) 346 if (offset >= 0)
329 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; 347 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
330 return; 348 return;
@@ -421,7 +439,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
421 m->ip = mce_rdmsrl(rip_msr); 439 m->ip = mce_rdmsrl(rip_msr);
422} 440}
423 441
424#ifdef CONFIG_X86_LOCAL_APIC 442#ifdef CONFIG_X86_LOCAL_APIC
425/* 443/*
426 * Called after interrupts have been reenabled again 444 * Called after interrupts have been reenabled again
427 * when a MCE happened during an interrupts off region 445 * when a MCE happened during an interrupts off region
@@ -505,7 +523,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
505 523
506 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 524 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
507 for (i = 0; i < banks; i++) { 525 for (i = 0; i < banks; i++) {
508 if (!bank[i] || !test_bit(i, *b)) 526 if (!mce_banks[i].ctl || !test_bit(i, *b))
509 continue; 527 continue;
510 528
511 m.misc = 0; 529 m.misc = 0;
@@ -514,7 +532,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
514 m.tsc = 0; 532 m.tsc = 0;
515 533
516 barrier(); 534 barrier();
517 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); 535 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
518 if (!(m.status & MCI_STATUS_VAL)) 536 if (!(m.status & MCI_STATUS_VAL))
519 continue; 537 continue;
520 538
@@ -529,9 +547,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
529 continue; 547 continue;
530 548
531 if (m.status & MCI_STATUS_MISCV) 549 if (m.status & MCI_STATUS_MISCV)
532 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); 550 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
533 if (m.status & MCI_STATUS_ADDRV) 551 if (m.status & MCI_STATUS_ADDRV)
534 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); 552 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
535 553
536 if (!(flags & MCP_TIMESTAMP)) 554 if (!(flags & MCP_TIMESTAMP))
537 m.tsc = 0; 555 m.tsc = 0;
@@ -547,7 +565,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
547 /* 565 /*
548 * Clear state for this bank. 566 * Clear state for this bank.
549 */ 567 */
550 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 568 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
551 } 569 }
552 570
553 /* 571 /*
@@ -568,7 +586,7 @@ static int mce_no_way_out(struct mce *m, char **msg)
568 int i; 586 int i;
569 587
570 for (i = 0; i < banks; i++) { 588 for (i = 0; i < banks; i++) {
571 m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); 589 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
572 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) 590 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
573 return 1; 591 return 1;
574 } 592 }
@@ -628,7 +646,7 @@ out:
628 * This way we prevent any potential data corruption in a unrecoverable case 646 * This way we prevent any potential data corruption in a unrecoverable case
629 * and also makes sure always all CPU's errors are examined. 647 * and also makes sure always all CPU's errors are examined.
630 * 648 *
631 * Also this detects the case of an machine check event coming from outer 649 * Also this detects the case of a machine check event coming from outer
632 * space (not detected by any CPUs) In this case some external agent wants 650 * space (not detected by any CPUs) In this case some external agent wants
633 * us to shut down, so panic too. 651 * us to shut down, so panic too.
634 * 652 *
@@ -681,7 +699,7 @@ static void mce_reign(void)
681 * No machine check event found. Must be some external 699 * No machine check event found. Must be some external
682 * source or one CPU is hung. Panic. 700 * source or one CPU is hung. Panic.
683 */ 701 */
684 if (!m && tolerant < 3) 702 if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3)
685 mce_panic("Machine check from unknown source", NULL, NULL); 703 mce_panic("Machine check from unknown source", NULL, NULL);
686 704
687 /* 705 /*
@@ -715,7 +733,7 @@ static int mce_start(int *no_way_out)
715 * global_nwo should be updated before mce_callin 733 * global_nwo should be updated before mce_callin
716 */ 734 */
717 smp_wmb(); 735 smp_wmb();
718 order = atomic_add_return(1, &mce_callin); 736 order = atomic_inc_return(&mce_callin);
719 737
720 /* 738 /*
721 * Wait for everyone. 739 * Wait for everyone.
@@ -852,7 +870,7 @@ static void mce_clear_state(unsigned long *toclear)
852 870
853 for (i = 0; i < banks; i++) { 871 for (i = 0; i < banks; i++) {
854 if (test_bit(i, toclear)) 872 if (test_bit(i, toclear))
855 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 873 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
856 } 874 }
857} 875}
858 876
@@ -905,11 +923,11 @@ void do_machine_check(struct pt_regs *regs, long error_code)
905 mce_setup(&m); 923 mce_setup(&m);
906 924
907 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 925 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
908 no_way_out = mce_no_way_out(&m, &msg);
909
910 final = &__get_cpu_var(mces_seen); 926 final = &__get_cpu_var(mces_seen);
911 *final = m; 927 *final = m;
912 928
929 no_way_out = mce_no_way_out(&m, &msg);
930
913 barrier(); 931 barrier();
914 932
915 /* 933 /*
@@ -926,14 +944,14 @@ void do_machine_check(struct pt_regs *regs, long error_code)
926 order = mce_start(&no_way_out); 944 order = mce_start(&no_way_out);
927 for (i = 0; i < banks; i++) { 945 for (i = 0; i < banks; i++) {
928 __clear_bit(i, toclear); 946 __clear_bit(i, toclear);
929 if (!bank[i]) 947 if (!mce_banks[i].ctl)
930 continue; 948 continue;
931 949
932 m.misc = 0; 950 m.misc = 0;
933 m.addr = 0; 951 m.addr = 0;
934 m.bank = i; 952 m.bank = i;
935 953
936 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); 954 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
937 if ((m.status & MCI_STATUS_VAL) == 0) 955 if ((m.status & MCI_STATUS_VAL) == 0)
938 continue; 956 continue;
939 957
@@ -974,9 +992,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
974 kill_it = 1; 992 kill_it = 1;
975 993
976 if (m.status & MCI_STATUS_MISCV) 994 if (m.status & MCI_STATUS_MISCV)
977 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); 995 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
978 if (m.status & MCI_STATUS_ADDRV) 996 if (m.status & MCI_STATUS_ADDRV)
979 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); 997 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
980 998
981 /* 999 /*
982 * Action optional error. Queue address for later processing. 1000 * Action optional error. Queue address for later processing.
@@ -1101,7 +1119,7 @@ void mce_log_therm_throt_event(__u64 status)
1101 */ 1119 */
1102static int check_interval = 5 * 60; /* 5 minutes */ 1120static int check_interval = 5 * 60; /* 5 minutes */
1103 1121
1104static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ 1122static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
1105static DEFINE_PER_CPU(struct timer_list, mce_timer); 1123static DEFINE_PER_CPU(struct timer_list, mce_timer);
1106 1124
1107static void mcheck_timer(unsigned long data) 1125static void mcheck_timer(unsigned long data)
@@ -1120,7 +1138,7 @@ static void mcheck_timer(unsigned long data)
1120 * Alert userspace if needed. If we logged an MCE, reduce the 1138 * Alert userspace if needed. If we logged an MCE, reduce the
1121 * polling interval, otherwise increase the polling interval. 1139 * polling interval, otherwise increase the polling interval.
1122 */ 1140 */
1123 n = &__get_cpu_var(next_interval); 1141 n = &__get_cpu_var(mce_next_interval);
1124 if (mce_notify_irq()) 1142 if (mce_notify_irq())
1125 *n = max(*n/2, HZ/100); 1143 *n = max(*n/2, HZ/100);
1126 else 1144 else
@@ -1169,10 +1187,26 @@ int mce_notify_irq(void)
1169} 1187}
1170EXPORT_SYMBOL_GPL(mce_notify_irq); 1188EXPORT_SYMBOL_GPL(mce_notify_irq);
1171 1189
1190static int mce_banks_init(void)
1191{
1192 int i;
1193
1194 mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
1195 if (!mce_banks)
1196 return -ENOMEM;
1197 for (i = 0; i < banks; i++) {
1198 struct mce_bank *b = &mce_banks[i];
1199
1200 b->ctl = -1ULL;
1201 b->init = 1;
1202 }
1203 return 0;
1204}
1205
1172/* 1206/*
1173 * Initialize Machine Checks for a CPU. 1207 * Initialize Machine Checks for a CPU.
1174 */ 1208 */
1175static int mce_cap_init(void) 1209static int __cpuinit mce_cap_init(void)
1176{ 1210{
1177 unsigned b; 1211 unsigned b;
1178 u64 cap; 1212 u64 cap;
@@ -1192,11 +1226,11 @@ static int mce_cap_init(void)
1192 /* Don't support asymmetric configurations today */ 1226 /* Don't support asymmetric configurations today */
1193 WARN_ON(banks != 0 && b != banks); 1227 WARN_ON(banks != 0 && b != banks);
1194 banks = b; 1228 banks = b;
1195 if (!bank) { 1229 if (!mce_banks) {
1196 bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); 1230 int err = mce_banks_init();
1197 if (!bank) 1231
1198 return -ENOMEM; 1232 if (err)
1199 memset(bank, 0xff, banks * sizeof(u64)); 1233 return err;
1200 } 1234 }
1201 1235
1202 /* Use accurate RIP reporting if available. */ 1236 /* Use accurate RIP reporting if available. */
@@ -1228,15 +1262,17 @@ static void mce_init(void)
1228 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 1262 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
1229 1263
1230 for (i = 0; i < banks; i++) { 1264 for (i = 0; i < banks; i++) {
1231 if (skip_bank_init(i)) 1265 struct mce_bank *b = &mce_banks[i];
1266
1267 if (!b->init)
1232 continue; 1268 continue;
1233 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); 1269 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
1234 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 1270 wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
1235 } 1271 }
1236} 1272}
1237 1273
1238/* Add per CPU specific workarounds here */ 1274/* Add per CPU specific workarounds here */
1239static int mce_cpu_quirks(struct cpuinfo_x86 *c) 1275static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
1240{ 1276{
1241 if (c->x86_vendor == X86_VENDOR_UNKNOWN) { 1277 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
1242 pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); 1278 pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1251,7 +1287,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1251 * trips off incorrectly with the IOMMU & 3ware 1287 * trips off incorrectly with the IOMMU & 3ware
1252 * & Cerberus: 1288 * & Cerberus:
1253 */ 1289 */
1254 clear_bit(10, (unsigned long *)&bank[4]); 1290 clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
1255 } 1291 }
1256 if (c->x86 <= 17 && mce_bootlog < 0) { 1292 if (c->x86 <= 17 && mce_bootlog < 0) {
1257 /* 1293 /*
@@ -1265,7 +1301,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1265 * by default. 1301 * by default.
1266 */ 1302 */
1267 if (c->x86 == 6 && banks > 0) 1303 if (c->x86 == 6 && banks > 0)
1268 bank[0] = 0; 1304 mce_banks[0].ctl = 0;
1269 } 1305 }
1270 1306
1271 if (c->x86_vendor == X86_VENDOR_INTEL) { 1307 if (c->x86_vendor == X86_VENDOR_INTEL) {
@@ -1278,8 +1314,8 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1278 * valid event later, merely don't write CTL0. 1314 * valid event later, merely don't write CTL0.
1279 */ 1315 */
1280 1316
1281 if (c->x86 == 6 && c->x86_model < 0x1A) 1317 if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0)
1282 __set_bit(0, &dont_init_banks); 1318 mce_banks[0].init = 0;
1283 1319
1284 /* 1320 /*
1285 * All newer Intel systems support MCE broadcasting. Enable 1321 * All newer Intel systems support MCE broadcasting. Enable
@@ -1335,7 +1371,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
1335static void mce_init_timer(void) 1371static void mce_init_timer(void)
1336{ 1372{
1337 struct timer_list *t = &__get_cpu_var(mce_timer); 1373 struct timer_list *t = &__get_cpu_var(mce_timer);
1338 int *n = &__get_cpu_var(next_interval); 1374 int *n = &__get_cpu_var(mce_next_interval);
1339 1375
1340 if (mce_ignore_ce) 1376 if (mce_ignore_ce)
1341 return; 1377 return;
@@ -1348,6 +1384,17 @@ static void mce_init_timer(void)
1348 add_timer_on(t, smp_processor_id()); 1384 add_timer_on(t, smp_processor_id());
1349} 1385}
1350 1386
1387/* Handle unconfigured int18 (should never happen) */
1388static void unexpected_machine_check(struct pt_regs *regs, long error_code)
1389{
1390 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
1391 smp_processor_id());
1392}
1393
1394/* Call the installed machine check handler for this CPU setup. */
1395void (*machine_check_vector)(struct pt_regs *, long error_code) =
1396 unexpected_machine_check;
1397
1351/* 1398/*
1352 * Called for each booted CPU to set up machine checks. 1399 * Called for each booted CPU to set up machine checks.
1353 * Must be called with preempt off: 1400 * Must be called with preempt off:
@@ -1561,8 +1608,10 @@ static struct miscdevice mce_log_device = {
1561 */ 1608 */
1562static int __init mcheck_enable(char *str) 1609static int __init mcheck_enable(char *str)
1563{ 1610{
1564 if (*str == 0) 1611 if (*str == 0) {
1565 enable_p5_mce(); 1612 enable_p5_mce();
1613 return 1;
1614 }
1566 if (*str == '=') 1615 if (*str == '=')
1567 str++; 1616 str++;
1568 if (!strcmp(str, "off")) 1617 if (!strcmp(str, "off"))
@@ -1603,8 +1652,10 @@ static int mce_disable(void)
1603 int i; 1652 int i;
1604 1653
1605 for (i = 0; i < banks; i++) { 1654 for (i = 0; i < banks; i++) {
1606 if (!skip_bank_init(i)) 1655 struct mce_bank *b = &mce_banks[i];
1607 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); 1656
1657 if (b->init)
1658 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
1608 } 1659 }
1609 return 0; 1660 return 0;
1610} 1661}
@@ -1679,14 +1730,15 @@ DEFINE_PER_CPU(struct sys_device, mce_dev);
1679__cpuinitdata 1730__cpuinitdata
1680void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 1731void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
1681 1732
1682static struct sysdev_attribute *bank_attrs; 1733static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr)
1734{
1735 return container_of(attr, struct mce_bank, attr);
1736}
1683 1737
1684static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, 1738static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
1685 char *buf) 1739 char *buf)
1686{ 1740{
1687 u64 b = bank[attr - bank_attrs]; 1741 return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
1688
1689 return sprintf(buf, "%llx\n", b);
1690} 1742}
1691 1743
1692static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, 1744static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
@@ -1697,7 +1749,7 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
1697 if (strict_strtoull(buf, 0, &new) < 0) 1749 if (strict_strtoull(buf, 0, &new) < 0)
1698 return -EINVAL; 1750 return -EINVAL;
1699 1751
1700 bank[attr - bank_attrs] = new; 1752 attr_to_bank(attr)->ctl = new;
1701 mce_restart(); 1753 mce_restart();
1702 1754
1703 return size; 1755 return size;
@@ -1839,7 +1891,7 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1839 } 1891 }
1840 for (j = 0; j < banks; j++) { 1892 for (j = 0; j < banks; j++) {
1841 err = sysdev_create_file(&per_cpu(mce_dev, cpu), 1893 err = sysdev_create_file(&per_cpu(mce_dev, cpu),
1842 &bank_attrs[j]); 1894 &mce_banks[j].attr);
1843 if (err) 1895 if (err)
1844 goto error2; 1896 goto error2;
1845 } 1897 }
@@ -1848,10 +1900,10 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1848 return 0; 1900 return 0;
1849error2: 1901error2:
1850 while (--j >= 0) 1902 while (--j >= 0)
1851 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); 1903 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
1852error: 1904error:
1853 while (--i >= 0) 1905 while (--i >= 0)
1854 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); 1906 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
1855 1907
1856 sysdev_unregister(&per_cpu(mce_dev, cpu)); 1908 sysdev_unregister(&per_cpu(mce_dev, cpu));
1857 1909
@@ -1869,7 +1921,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
1869 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); 1921 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1870 1922
1871 for (i = 0; i < banks; i++) 1923 for (i = 0; i < banks; i++)
1872 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); 1924 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
1873 1925
1874 sysdev_unregister(&per_cpu(mce_dev, cpu)); 1926 sysdev_unregister(&per_cpu(mce_dev, cpu));
1875 cpumask_clear_cpu(cpu, mce_dev_initialized); 1927 cpumask_clear_cpu(cpu, mce_dev_initialized);
@@ -1886,8 +1938,10 @@ static void mce_disable_cpu(void *h)
1886 if (!(action & CPU_TASKS_FROZEN)) 1938 if (!(action & CPU_TASKS_FROZEN))
1887 cmci_clear(); 1939 cmci_clear();
1888 for (i = 0; i < banks; i++) { 1940 for (i = 0; i < banks; i++) {
1889 if (!skip_bank_init(i)) 1941 struct mce_bank *b = &mce_banks[i];
1890 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); 1942
1943 if (b->init)
1944 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
1891 } 1945 }
1892} 1946}
1893 1947
@@ -1902,8 +1956,10 @@ static void mce_reenable_cpu(void *h)
1902 if (!(action & CPU_TASKS_FROZEN)) 1956 if (!(action & CPU_TASKS_FROZEN))
1903 cmci_reenable(); 1957 cmci_reenable();
1904 for (i = 0; i < banks; i++) { 1958 for (i = 0; i < banks; i++) {
1905 if (!skip_bank_init(i)) 1959 struct mce_bank *b = &mce_banks[i];
1906 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); 1960
1961 if (b->init)
1962 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
1907 } 1963 }
1908} 1964}
1909 1965
@@ -1935,7 +1991,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
1935 case CPU_DOWN_FAILED: 1991 case CPU_DOWN_FAILED:
1936 case CPU_DOWN_FAILED_FROZEN: 1992 case CPU_DOWN_FAILED_FROZEN:
1937 t->expires = round_jiffies(jiffies + 1993 t->expires = round_jiffies(jiffies +
1938 __get_cpu_var(next_interval)); 1994 __get_cpu_var(mce_next_interval));
1939 add_timer_on(t, cpu); 1995 add_timer_on(t, cpu);
1940 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); 1996 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
1941 break; 1997 break;
@@ -1951,35 +2007,21 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = {
1951 .notifier_call = mce_cpu_callback, 2007 .notifier_call = mce_cpu_callback,
1952}; 2008};
1953 2009
1954static __init int mce_init_banks(void) 2010static __init void mce_init_banks(void)
1955{ 2011{
1956 int i; 2012 int i;
1957 2013
1958 bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
1959 GFP_KERNEL);
1960 if (!bank_attrs)
1961 return -ENOMEM;
1962
1963 for (i = 0; i < banks; i++) { 2014 for (i = 0; i < banks; i++) {
1964 struct sysdev_attribute *a = &bank_attrs[i]; 2015 struct mce_bank *b = &mce_banks[i];
2016 struct sysdev_attribute *a = &b->attr;
1965 2017
1966 a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); 2018 a->attr.name = b->attrname;
1967 if (!a->attr.name) 2019 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
1968 goto nomem;
1969 2020
1970 a->attr.mode = 0644; 2021 a->attr.mode = 0644;
1971 a->show = show_bank; 2022 a->show = show_bank;
1972 a->store = set_bank; 2023 a->store = set_bank;
1973 } 2024 }
1974 return 0;
1975
1976nomem:
1977 while (--i >= 0)
1978 kfree(bank_attrs[i].attr.name);
1979 kfree(bank_attrs);
1980 bank_attrs = NULL;
1981
1982 return -ENOMEM;
1983} 2025}
1984 2026
1985static __init int mce_init_device(void) 2027static __init int mce_init_device(void)
@@ -1992,9 +2034,7 @@ static __init int mce_init_device(void)
1992 2034
1993 zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); 2035 zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
1994 2036
1995 err = mce_init_banks(); 2037 mce_init_banks();
1996 if (err)
1997 return err;
1998 2038
1999 err = sysdev_class_register(&mce_sysclass); 2039 err = sysdev_class_register(&mce_sysclass);
2000 if (err) 2040 if (err)
@@ -2014,57 +2054,65 @@ static __init int mce_init_device(void)
2014 2054
2015device_initcall(mce_init_device); 2055device_initcall(mce_init_device);
2016 2056
2017#else /* CONFIG_X86_OLD_MCE: */ 2057/*
2018 2058 * Old style boot options parsing. Only for compatibility.
2019int nr_mce_banks; 2059 */
2020EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ 2060static int __init mcheck_disable(char *str)
2061{
2062 mce_disabled = 1;
2063 return 1;
2064}
2065__setup("nomce", mcheck_disable);
2021 2066
2022/* This has to be run for each processor */ 2067#ifdef CONFIG_DEBUG_FS
2023void mcheck_init(struct cpuinfo_x86 *c) 2068struct dentry *mce_get_debugfs_dir(void)
2024{ 2069{
2025 if (mce_disabled) 2070 static struct dentry *dmce;
2026 return;
2027 2071
2028 switch (c->x86_vendor) { 2072 if (!dmce)
2029 case X86_VENDOR_AMD: 2073 dmce = debugfs_create_dir("mce", NULL);
2030 amd_mcheck_init(c);
2031 break;
2032 2074
2033 case X86_VENDOR_INTEL: 2075 return dmce;
2034 if (c->x86 == 5) 2076}
2035 intel_p5_mcheck_init(c);
2036 if (c->x86 == 6)
2037 intel_p6_mcheck_init(c);
2038 if (c->x86 == 15)
2039 intel_p4_mcheck_init(c);
2040 break;
2041 2077
2042 case X86_VENDOR_CENTAUR: 2078static void mce_reset(void)
2043 if (c->x86 == 5) 2079{
2044 winchip_mcheck_init(c); 2080 cpu_missing = 0;
2045 break; 2081 atomic_set(&mce_fake_paniced, 0);
2082 atomic_set(&mce_executing, 0);
2083 atomic_set(&mce_callin, 0);
2084 atomic_set(&global_nwo, 0);
2085}
2046 2086
2047 default: 2087static int fake_panic_get(void *data, u64 *val)
2048 break; 2088{
2049 } 2089 *val = fake_panic;
2050 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks); 2090 return 0;
2051} 2091}
2052 2092
2053static int __init mcheck_enable(char *str) 2093static int fake_panic_set(void *data, u64 val)
2054{ 2094{
2055 mce_p5_enabled = 1; 2095 mce_reset();
2056 return 1; 2096 fake_panic = val;
2097 return 0;
2057} 2098}
2058__setup("mce", mcheck_enable);
2059 2099
2060#endif /* CONFIG_X86_OLD_MCE */ 2100DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
2101 fake_panic_set, "%llu\n");
2061 2102
2062/* 2103static int __init mce_debugfs_init(void)
2063 * Old style boot options parsing. Only for compatibility.
2064 */
2065static int __init mcheck_disable(char *str)
2066{ 2104{
2067 mce_disabled = 1; 2105 struct dentry *dmce, *ffake_panic;
2068 return 1; 2106
2107 dmce = mce_get_debugfs_dir();
2108 if (!dmce)
2109 return -ENOMEM;
2110 ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL,
2111 &fake_panic_fops);
2112 if (!ffake_panic)
2113 return -ENOMEM;
2114
2115 return 0;
2069} 2116}
2070__setup("nomce", mcheck_disable); 2117late_initcall(mce_debugfs_init);
2118#endif