aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-18 00:07:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-18 00:07:08 -0400
commitdf58bee21ed218cb7dfb561a590b1bd2a99531cf (patch)
tree885acc2720996707dacb1da5700051a8ed99c655
parentdcbf77b9e86e1726f5fbd01bb98820dac06d456e (diff)
parente34e77ce348feac3c8c607774efb1f8a9262127d (diff)
Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (21 commits) x86, mce: Fix compilation with !CONFIG_DEBUG_FS in mce-severity.c x86, mce: CE in last bank prevents panic by unknown MCE x86, mce: Fake panic support for MCE testing x86, mce: Move debugfs mce dir creating to mce.c x86, mce: Support specifying raise mode for software MCE injection x86, mce: Support specifying context for software mce injection x86, mce: fix reporting of Thermal Monitoring mechanism enabled x86, mce: remove never executed code x86, mce: add missing __cpuinit tags x86, mce: fix "mce" boot option handling for CONFIG_X86_NEW_MCE x86, mce: don't log boot MCEs on Pentium M (model == 13) CPUs x86: mce: Lower maximum number of banks to architecture limit x86: mce: macros to compute banks MSRs x86: mce: Move per bank data in a single datastructure x86: mce: Move code in mce.c x86: mce: Rename CONFIG_X86_NEW_MCE to CONFIG_X86_MCE x86: mce: Remove old i386 machine check code x86: mce: Update X86_MCE description in x86/Kconfig x86: mce: Make CONFIG_X86_ANCIENT_MCE dependent on CONFIG_X86_MCE x86, mce: use atomic_inc_return() instead of add by 1 ... Manually fixed up trivial conflicts: Documentation/feature-removal-schedule.txt arch/x86/kernel/cpu/mcheck/mce.c
-rw-r--r--Documentation/feature-removal-schedule.txt10
-rw-r--r--arch/x86/Kconfig62
-rw-r--r--arch/x86/include/asm/entry_arch.h2
-rw-r--r--arch/x86/include/asm/mce.h32
-rw-r--r--arch/x86/include/asm/msr-index.h11
-rw-r--r--arch/x86/kernel/apic/nmi.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile5
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c116
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c158
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c304
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c94
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c163
-rw-r--r--arch/x86/kernel/cpu/mcheck/p6.c127
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c13
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/irqinit.c2
-rw-r--r--arch/x86/kernel/signal.c2
20 files changed, 363 insertions, 777 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 503d21216d58..fa75220f8d34 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -428,16 +428,6 @@ Who: Johannes Berg <johannes@sipsolutions.net>
428 428
429---------------------------- 429----------------------------
430 430
431What: CONFIG_X86_OLD_MCE
432When: 2.6.32
433Why: Remove the old legacy 32bit machine check code. This has been
434 superseded by the newer machine check code from the 64bit port,
435 but the old version has been kept around for easier testing. Note this
436 doesn't impact the old P5 and WinChip machine check handlers.
437Who: Andi Kleen <andi@firstfloor.org>
438
439----------------------------
440
441What: lock_policy_rwsem_* and unlock_policy_rwsem_* will not be 431What: lock_policy_rwsem_* and unlock_policy_rwsem_* will not be
442 exported interface anymore. 432 exported interface anymore.
443When: 2.6.33 433When: 2.6.33
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e98e81a04971..e5deee2dfcfe 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -783,41 +783,17 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
783 increased on these systems. 783 increased on these systems.
784 784
785config X86_MCE 785config X86_MCE
786 bool "Machine Check Exception" 786 bool "Machine Check / overheating reporting"
787 ---help--- 787 ---help---
788 Machine Check Exception support allows the processor to notify the 788 Machine Check support allows the processor to notify the
789 kernel if it detects a problem (e.g. overheating, component failure). 789 kernel if it detects a problem (e.g. overheating, data corruption).
790 The action the kernel takes depends on the severity of the problem, 790 The action the kernel takes depends on the severity of the problem,
791 ranging from a warning message on the console, to halting the machine. 791 ranging from warning messages to halting the machine.
792 Your processor must be a Pentium or newer to support this - check the
793 flags in /proc/cpuinfo for mce. Note that some older Pentium systems
794 have a design flaw which leads to false MCE events - hence MCE is
795 disabled on all P5 processors, unless explicitly enabled with "mce"
796 as a boot argument. Similarly, if MCE is built in and creates a
797 problem on some new non-standard machine, you can boot with "nomce"
798 to disable it. MCE support simply ignores non-MCE processors like
799 the 386 and 486, so nearly everyone can say Y here.
800
801config X86_OLD_MCE
802 depends on X86_32 && X86_MCE
803 bool "Use legacy machine check code (will go away)"
804 default n
805 select X86_ANCIENT_MCE
806 ---help---
807 Use the old i386 machine check code. This is merely intended for
808 testing in a transition period. Try this if you run into any machine
809 check related software problems, but report the problem to
810 linux-kernel. When in doubt say no.
811
812config X86_NEW_MCE
813 depends on X86_MCE
814 bool
815 default y if (!X86_OLD_MCE && X86_32) || X86_64
816 792
817config X86_MCE_INTEL 793config X86_MCE_INTEL
818 def_bool y 794 def_bool y
819 prompt "Intel MCE features" 795 prompt "Intel MCE features"
820 depends on X86_NEW_MCE && X86_LOCAL_APIC 796 depends on X86_MCE && X86_LOCAL_APIC
821 ---help--- 797 ---help---
822 Additional support for intel specific MCE features such as 798 Additional support for intel specific MCE features such as
823 the thermal monitor. 799 the thermal monitor.
@@ -825,14 +801,14 @@ config X86_MCE_INTEL
825config X86_MCE_AMD 801config X86_MCE_AMD
826 def_bool y 802 def_bool y
827 prompt "AMD MCE features" 803 prompt "AMD MCE features"
828 depends on X86_NEW_MCE && X86_LOCAL_APIC 804 depends on X86_MCE && X86_LOCAL_APIC
829 ---help--- 805 ---help---
830 Additional support for AMD specific MCE features such as 806 Additional support for AMD specific MCE features such as
831 the DRAM Error Threshold. 807 the DRAM Error Threshold.
832 808
833config X86_ANCIENT_MCE 809config X86_ANCIENT_MCE
834 def_bool n 810 def_bool n
835 depends on X86_32 811 depends on X86_32 && X86_MCE
836 prompt "Support for old Pentium 5 / WinChip machine checks" 812 prompt "Support for old Pentium 5 / WinChip machine checks"
837 ---help--- 813 ---help---
838 Include support for machine check handling on old Pentium 5 or WinChip 814 Include support for machine check handling on old Pentium 5 or WinChip
@@ -845,36 +821,16 @@ config X86_MCE_THRESHOLD
845 default y 821 default y
846 822
847config X86_MCE_INJECT 823config X86_MCE_INJECT
848 depends on X86_NEW_MCE 824 depends on X86_MCE
849 tristate "Machine check injector support" 825 tristate "Machine check injector support"
850 ---help--- 826 ---help---
851 Provide support for injecting machine checks for testing purposes. 827 Provide support for injecting machine checks for testing purposes.
852 If you don't know what a machine check is and you don't do kernel 828 If you don't know what a machine check is and you don't do kernel
853 QA it is safe to say n. 829 QA it is safe to say n.
854 830
855config X86_MCE_NONFATAL
856 tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
857 depends on X86_OLD_MCE
858 ---help---
859 Enabling this feature starts a timer that triggers every 5 seconds which
860 will look at the machine check registers to see if anything happened.
861 Non-fatal problems automatically get corrected (but still logged).
862 Disable this if you don't want to see these messages.
863 Seeing the messages this option prints out may be indicative of dying
864 or out-of-spec (ie, overclocked) hardware.
865 This option only does something on certain CPUs.
866 (AMD Athlon/Duron and Intel Pentium 4)
867
868config X86_MCE_P4THERMAL
869 bool "check for P4 thermal throttling interrupt."
870 depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP)
871 ---help---
872 Enabling this feature will cause a message to be printed when the P4
873 enters thermal throttling.
874
875config X86_THERMAL_VECTOR 831config X86_THERMAL_VECTOR
876 def_bool y 832 def_bool y
877 depends on X86_MCE_P4THERMAL || X86_MCE_INTEL 833 depends on X86_MCE_INTEL
878 834
879config VM86 835config VM86
880 bool "Enable VM86 support" if EMBEDDED 836 bool "Enable VM86 support" if EMBEDDED
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index ff8cbfa07851..5e3f2044f0d3 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -61,7 +61,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
61BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) 61BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
62#endif 62#endif
63 63
64#ifdef CONFIG_X86_NEW_MCE 64#ifdef CONFIG_X86_MCE
65BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR) 65BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR)
66#endif 66#endif
67 67
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 5cdd8d100ec9..b608a64c5814 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -9,7 +9,7 @@
9 */ 9 */
10 10
11#define MCG_BANKCNT_MASK 0xff /* Number of Banks */ 11#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
12#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ 12#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
13#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ 13#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
14#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ 14#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
15#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ 15#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
@@ -38,6 +38,14 @@
38#define MCM_ADDR_MEM 3 /* memory address */ 38#define MCM_ADDR_MEM 3 /* memory address */
39#define MCM_ADDR_GENERIC 7 /* generic */ 39#define MCM_ADDR_GENERIC 7 /* generic */
40 40
41#define MCJ_CTX_MASK 3
42#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
43#define MCJ_CTX_RANDOM 0 /* inject context: random */
44#define MCJ_CTX_PROCESS 1 /* inject context: process */
45#define MCJ_CTX_IRQ 2 /* inject context: IRQ */
46#define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */
47#define MCJ_EXCEPTION 8 /* raise as exception */
48
41/* Fields are zero when not available */ 49/* Fields are zero when not available */
42struct mce { 50struct mce {
43 __u64 status; 51 __u64 status;
@@ -48,8 +56,8 @@ struct mce {
48 __u64 tsc; /* cpu time stamp counter */ 56 __u64 tsc; /* cpu time stamp counter */
49 __u64 time; /* wall time_t when error was detected */ 57 __u64 time; /* wall time_t when error was detected */
50 __u8 cpuvendor; /* cpu vendor as encoded in system.h */ 58 __u8 cpuvendor; /* cpu vendor as encoded in system.h */
51 __u8 pad1; 59 __u8 inject_flags; /* software inject flags */
52 __u16 pad2; 60 __u16 pad;
53 __u32 cpuid; /* CPUID 1 EAX */ 61 __u32 cpuid; /* CPUID 1 EAX */
54 __u8 cs; /* code segment */ 62 __u8 cs; /* code segment */
55 __u8 bank; /* machine check bank */ 63 __u8 bank; /* machine check bank */
@@ -115,13 +123,6 @@ void mcheck_init(struct cpuinfo_x86 *c);
115static inline void mcheck_init(struct cpuinfo_x86 *c) {} 123static inline void mcheck_init(struct cpuinfo_x86 *c) {}
116#endif 124#endif
117 125
118#ifdef CONFIG_X86_OLD_MCE
119extern int nr_mce_banks;
120void amd_mcheck_init(struct cpuinfo_x86 *c);
121void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
122void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
123#endif
124
125#ifdef CONFIG_X86_ANCIENT_MCE 126#ifdef CONFIG_X86_ANCIENT_MCE
126void intel_p5_mcheck_init(struct cpuinfo_x86 *c); 127void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
127void winchip_mcheck_init(struct cpuinfo_x86 *c); 128void winchip_mcheck_init(struct cpuinfo_x86 *c);
@@ -137,10 +138,11 @@ void mce_log(struct mce *m);
137DECLARE_PER_CPU(struct sys_device, mce_dev); 138DECLARE_PER_CPU(struct sys_device, mce_dev);
138 139
139/* 140/*
140 * To support more than 128 would need to escape the predefined 141 * Maximum banks number.
141 * Linux defined extended banks first. 142 * This is the limit of the current register layout on
143 * Intel CPUs.
142 */ 144 */
143#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) 145#define MAX_NR_BANKS 32
144 146
145#ifdef CONFIG_X86_MCE_INTEL 147#ifdef CONFIG_X86_MCE_INTEL
146extern int mce_cmci_disabled; 148extern int mce_cmci_disabled;
@@ -208,11 +210,7 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
208 210
209void intel_init_thermal(struct cpuinfo_x86 *c); 211void intel_init_thermal(struct cpuinfo_x86 *c);
210 212
211#ifdef CONFIG_X86_NEW_MCE
212void mce_log_therm_throt_event(__u64 status); 213void mce_log_therm_throt_event(__u64 status);
213#else
214static inline void mce_log_therm_throt_event(__u64 status) {}
215#endif
216 214
217#endif /* __KERNEL__ */ 215#endif /* __KERNEL__ */
218#endif /* _ASM_X86_MCE_H */ 216#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index bd5549034a95..4ffe09b2ad75 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -81,8 +81,15 @@
81#define MSR_IA32_MC0_ADDR 0x00000402 81#define MSR_IA32_MC0_ADDR 0x00000402
82#define MSR_IA32_MC0_MISC 0x00000403 82#define MSR_IA32_MC0_MISC 0x00000403
83 83
84#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
85#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x))
86#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
87#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
88
84/* These are consecutive and not in the normal 4er MCE bank block */ 89/* These are consecutive and not in the normal 4er MCE bank block */
85#define MSR_IA32_MC0_CTL2 0x00000280 90#define MSR_IA32_MC0_CTL2 0x00000280
91#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
92
86#define CMCI_EN (1ULL << 30) 93#define CMCI_EN (1ULL << 30)
87#define CMCI_THRESHOLD_MASK 0xffffULL 94#define CMCI_THRESHOLD_MASK 0xffffULL
88 95
@@ -215,6 +222,10 @@
215 222
216#define THERM_STATUS_PROCHOT (1 << 0) 223#define THERM_STATUS_PROCHOT (1 << 0)
217 224
225#define MSR_THERM2_CTL 0x0000019d
226
227#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16)
228
218#define MSR_IA32_MISC_ENABLE 0x000001a0 229#define MSR_IA32_MISC_ENABLE 0x000001a0
219 230
220/* MISC_ENABLE bits: architectural */ 231/* MISC_ENABLE bits: architectural */
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index db7220220d09..cb66a22d98ad 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu)
66 66
67static inline int mce_in_progress(void) 67static inline int mce_in_progress(void)
68{ 68{
69#if defined(CONFIG_X86_NEW_MCE) 69#if defined(CONFIG_X86_MCE)
70 return atomic_read(&mce_entry) > 0; 70 return atomic_read(&mce_entry) > 0;
71#endif 71#endif
72 return 0; 72 return 0;
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 188a1ca5ad2b..4ac6d48fe11b 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,11 +1,8 @@
1obj-y = mce.o 1obj-y = mce.o mce-severity.o
2 2
3obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
4obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
5obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o 3obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
6obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 4obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
7obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o 5obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
8obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
9obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o 6obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
10obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o 7obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
11 8
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
deleted file mode 100644
index b945d5dbc609..000000000000
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ /dev/null
@@ -1,116 +0,0 @@
1/*
2 * Athlon specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Dave Jones <davej@redhat.com>
4 */
5#include <linux/interrupt.h>
6#include <linux/kernel.h>
7#include <linux/types.h>
8#include <linux/init.h>
9#include <linux/smp.h>
10
11#include <asm/processor.h>
12#include <asm/system.h>
13#include <asm/mce.h>
14#include <asm/msr.h>
15
16/* Machine Check Handler For AMD Athlon/Duron: */
17static void k7_machine_check(struct pt_regs *regs, long error_code)
18{
19 u32 alow, ahigh, high, low;
20 u32 mcgstl, mcgsth;
21 int recover = 1;
22 int i;
23
24 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
25 if (mcgstl & (1<<0)) /* Recoverable ? */
26 recover = 0;
27
28 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
29 smp_processor_id(), mcgsth, mcgstl);
30
31 for (i = 1; i < nr_mce_banks; i++) {
32 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
33 if (high & (1<<31)) {
34 char misc[20];
35 char addr[24];
36
37 misc[0] = '\0';
38 addr[0] = '\0';
39
40 if (high & (1<<29))
41 recover |= 1;
42 if (high & (1<<25))
43 recover |= 2;
44 high &= ~(1<<31);
45
46 if (high & (1<<27)) {
47 rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
48 snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
49 }
50 if (high & (1<<26)) {
51 rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
52 snprintf(addr, 24, " at %08x%08x", ahigh, alow);
53 }
54
55 printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
56 smp_processor_id(), i, high, low, misc, addr);
57
58 /* Clear it: */
59 wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
60 /* Serialize: */
61 wmb();
62 add_taint(TAINT_MACHINE_CHECK);
63 }
64 }
65
66 if (recover & 2)
67 panic("CPU context corrupt");
68 if (recover & 1)
69 panic("Unable to continue");
70
71 printk(KERN_EMERG "Attempting to continue.\n");
72
73 mcgstl &= ~(1<<2);
74 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
75}
76
77
78/* AMD K7 machine check is Intel like: */
79void amd_mcheck_init(struct cpuinfo_x86 *c)
80{
81 u32 l, h;
82 int i;
83
84 if (!cpu_has(c, X86_FEATURE_MCE))
85 return;
86
87 machine_check_vector = k7_machine_check;
88 /* Make sure the vector pointer is visible before we enable MCEs: */
89 wmb();
90
91 printk(KERN_INFO "Intel machine check architecture supported.\n");
92
93 rdmsr(MSR_IA32_MCG_CAP, l, h);
94 if (l & (1<<8)) /* Control register present ? */
95 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
96 nr_mce_banks = l & 0xff;
97
98 /*
99 * Clear status for MC index 0 separately, we don't touch CTL,
100 * as some K7 Athlons cause spurious MCEs when its enabled:
101 */
102 if (boot_cpu_data.x86 == 6) {
103 wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
104 i = 1;
105 } else
106 i = 0;
107
108 for (; i < nr_mce_banks; i++) {
109 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
110 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
111 }
112
113 set_in_cr4(X86_CR4_MCE);
114 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
115 smp_processor_id());
116}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index a3a235a53f09..7029f0e2acad 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -18,7 +18,12 @@
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/fs.h> 19#include <linux/fs.h>
20#include <linux/smp.h> 20#include <linux/smp.h>
21#include <linux/notifier.h>
22#include <linux/kdebug.h>
23#include <linux/cpu.h>
24#include <linux/sched.h>
21#include <asm/mce.h> 25#include <asm/mce.h>
26#include <asm/apic.h>
22 27
23/* Update fake mce registers on current CPU. */ 28/* Update fake mce registers on current CPU. */
24static void inject_mce(struct mce *m) 29static void inject_mce(struct mce *m)
@@ -39,44 +44,141 @@ static void inject_mce(struct mce *m)
39 i->finished = 1; 44 i->finished = 1;
40} 45}
41 46
42struct delayed_mce { 47static void raise_poll(struct mce *m)
43 struct timer_list timer; 48{
44 struct mce m; 49 unsigned long flags;
45}; 50 mce_banks_t b;
46 51
47/* Inject mce on current CPU */ 52 memset(&b, 0xff, sizeof(mce_banks_t));
48static void raise_mce(unsigned long data) 53 local_irq_save(flags);
54 machine_check_poll(0, &b);
55 local_irq_restore(flags);
56 m->finished = 0;
57}
58
59static void raise_exception(struct mce *m, struct pt_regs *pregs)
49{ 60{
50 struct delayed_mce *dm = (struct delayed_mce *)data; 61 struct pt_regs regs;
51 struct mce *m = &dm->m; 62 unsigned long flags;
52 int cpu = m->extcpu;
53 63
54 inject_mce(m); 64 if (!pregs) {
55 if (m->status & MCI_STATUS_UC) {
56 struct pt_regs regs;
57 memset(&regs, 0, sizeof(struct pt_regs)); 65 memset(&regs, 0, sizeof(struct pt_regs));
58 regs.ip = m->ip; 66 regs.ip = m->ip;
59 regs.cs = m->cs; 67 regs.cs = m->cs;
68 pregs = &regs;
69 }
70 /* in mcheck exeception handler, irq will be disabled */
71 local_irq_save(flags);
72 do_machine_check(pregs, 0);
73 local_irq_restore(flags);
74 m->finished = 0;
75}
76
77static cpumask_t mce_inject_cpumask;
78
79static int mce_raise_notify(struct notifier_block *self,
80 unsigned long val, void *data)
81{
82 struct die_args *args = (struct die_args *)data;
83 int cpu = smp_processor_id();
84 struct mce *m = &__get_cpu_var(injectm);
85 if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask))
86 return NOTIFY_DONE;
87 cpu_clear(cpu, mce_inject_cpumask);
88 if (m->inject_flags & MCJ_EXCEPTION)
89 raise_exception(m, args->regs);
90 else if (m->status)
91 raise_poll(m);
92 return NOTIFY_STOP;
93}
94
95static struct notifier_block mce_raise_nb = {
96 .notifier_call = mce_raise_notify,
97 .priority = 1000,
98};
99
100/* Inject mce on current CPU */
101static int raise_local(struct mce *m)
102{
103 int context = MCJ_CTX(m->inject_flags);
104 int ret = 0;
105 int cpu = m->extcpu;
106
107 if (m->inject_flags & MCJ_EXCEPTION) {
60 printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); 108 printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
61 do_machine_check(&regs, 0); 109 switch (context) {
110 case MCJ_CTX_IRQ:
111 /*
112 * Could do more to fake interrupts like
113 * calling irq_enter, but the necessary
114 * machinery isn't exported currently.
115 */
116 /*FALL THROUGH*/
117 case MCJ_CTX_PROCESS:
118 raise_exception(m, NULL);
119 break;
120 default:
121 printk(KERN_INFO "Invalid MCE context\n");
122 ret = -EINVAL;
123 }
62 printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); 124 printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
63 } else { 125 } else if (m->status) {
64 mce_banks_t b;
65 memset(&b, 0xff, sizeof(mce_banks_t));
66 printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); 126 printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
67 machine_check_poll(0, &b); 127 raise_poll(m);
68 mce_notify_irq(); 128 mce_notify_irq();
69 printk(KERN_INFO "Finished machine check poll on CPU %d\n", 129 printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
70 cpu); 130 } else
71 } 131 m->finished = 0;
72 kfree(dm); 132
133 return ret;
134}
135
136static void raise_mce(struct mce *m)
137{
138 int context = MCJ_CTX(m->inject_flags);
139
140 inject_mce(m);
141
142 if (context == MCJ_CTX_RANDOM)
143 return;
144
145#ifdef CONFIG_X86_LOCAL_APIC
146 if (m->inject_flags & MCJ_NMI_BROADCAST) {
147 unsigned long start;
148 int cpu;
149 get_online_cpus();
150 mce_inject_cpumask = cpu_online_map;
151 cpu_clear(get_cpu(), mce_inject_cpumask);
152 for_each_online_cpu(cpu) {
153 struct mce *mcpu = &per_cpu(injectm, cpu);
154 if (!mcpu->finished ||
155 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
156 cpu_clear(cpu, mce_inject_cpumask);
157 }
158 if (!cpus_empty(mce_inject_cpumask))
159 apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR);
160 start = jiffies;
161 while (!cpus_empty(mce_inject_cpumask)) {
162 if (!time_before(jiffies, start + 2*HZ)) {
163 printk(KERN_ERR
164 "Timeout waiting for mce inject NMI %lx\n",
165 *cpus_addr(mce_inject_cpumask));
166 break;
167 }
168 cpu_relax();
169 }
170 raise_local(m);
171 put_cpu();
172 put_online_cpus();
173 } else
174#endif
175 raise_local(m);
73} 176}
74 177
75/* Error injection interface */ 178/* Error injection interface */
76static ssize_t mce_write(struct file *filp, const char __user *ubuf, 179static ssize_t mce_write(struct file *filp, const char __user *ubuf,
77 size_t usize, loff_t *off) 180 size_t usize, loff_t *off)
78{ 181{
79 struct delayed_mce *dm;
80 struct mce m; 182 struct mce m;
81 183
82 if (!capable(CAP_SYS_ADMIN)) 184 if (!capable(CAP_SYS_ADMIN))
@@ -96,19 +198,12 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
96 if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) 198 if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
97 return -EINVAL; 199 return -EINVAL;
98 200
99 dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL);
100 if (!dm)
101 return -ENOMEM;
102
103 /* 201 /*
104 * Need to give user space some time to set everything up, 202 * Need to give user space some time to set everything up,
105 * so do it a jiffie or two later everywhere. 203 * so do it a jiffie or two later everywhere.
106 * Should we use a hrtimer here for better synchronization?
107 */ 204 */
108 memcpy(&dm->m, &m, sizeof(struct mce)); 205 schedule_timeout(2);
109 setup_timer(&dm->timer, raise_mce, (unsigned long)dm); 206 raise_mce(&m);
110 dm->timer.expires = jiffies + 2;
111 add_timer_on(&dm->timer, m.extcpu);
112 return usize; 207 return usize;
113} 208}
114 209
@@ -116,6 +211,7 @@ static int inject_init(void)
116{ 211{
117 printk(KERN_INFO "Machine check injector initialized\n"); 212 printk(KERN_INFO "Machine check injector initialized\n");
118 mce_chrdev_ops.write = mce_write; 213 mce_chrdev_ops.write = mce_write;
214 register_die_notifier(&mce_raise_nb);
119 return 0; 215 return 0;
120} 216}
121 217
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 54dcb8ff12e5..32996f9fab67 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -1,3 +1,4 @@
1#include <linux/sysdev.h>
1#include <asm/mce.h> 2#include <asm/mce.h>
2 3
3enum severity_level { 4enum severity_level {
@@ -10,6 +11,20 @@ enum severity_level {
10 MCE_PANIC_SEVERITY, 11 MCE_PANIC_SEVERITY,
11}; 12};
12 13
14#define ATTR_LEN 16
15
16/* One object for each MCE bank, shared by all CPUs */
17struct mce_bank {
18 u64 ctl; /* subevents to enable */
19 unsigned char init; /* initialise bank? */
20 struct sysdev_attribute attr; /* sysdev attribute */
21 char attrname[ATTR_LEN]; /* attribute name */
22};
23
13int mce_severity(struct mce *a, int tolerant, char **msg); 24int mce_severity(struct mce *a, int tolerant, char **msg);
25struct dentry *mce_get_debugfs_dir(void);
14 26
15extern int mce_ser; 27extern int mce_ser;
28
29extern struct mce_bank *mce_banks;
30
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index ff0807f97056..8a85dd1b1aa1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -139,6 +139,7 @@ int mce_severity(struct mce *a, int tolerant, char **msg)
139 } 139 }
140} 140}
141 141
142#ifdef CONFIG_DEBUG_FS
142static void *s_start(struct seq_file *f, loff_t *pos) 143static void *s_start(struct seq_file *f, loff_t *pos)
143{ 144{
144 if (*pos >= ARRAY_SIZE(severities)) 145 if (*pos >= ARRAY_SIZE(severities))
@@ -197,7 +198,7 @@ static int __init severities_debugfs_init(void)
197{ 198{
198 struct dentry *dmce = NULL, *fseverities_coverage = NULL; 199 struct dentry *dmce = NULL, *fseverities_coverage = NULL;
199 200
200 dmce = debugfs_create_dir("mce", NULL); 201 dmce = mce_get_debugfs_dir();
201 if (dmce == NULL) 202 if (dmce == NULL)
202 goto err_out; 203 goto err_out;
203 fseverities_coverage = debugfs_create_file("severities-coverage", 204 fseverities_coverage = debugfs_create_file("severities-coverage",
@@ -209,10 +210,7 @@ static int __init severities_debugfs_init(void)
209 return 0; 210 return 0;
210 211
211err_out: 212err_out:
212 if (fseverities_coverage)
213 debugfs_remove(fseverities_coverage);
214 if (dmce)
215 debugfs_remove(dmce);
216 return -ENOMEM; 213 return -ENOMEM;
217} 214}
218late_initcall(severities_debugfs_init); 215late_initcall(severities_debugfs_init);
216#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index fdd51b554355..2f5aab26320e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -34,6 +34,7 @@
34#include <linux/smp.h> 34#include <linux/smp.h>
35#include <linux/fs.h> 35#include <linux/fs.h>
36#include <linux/mm.h> 36#include <linux/mm.h>
37#include <linux/debugfs.h>
37 38
38#include <asm/processor.h> 39#include <asm/processor.h>
39#include <asm/hw_irq.h> 40#include <asm/hw_irq.h>
@@ -45,21 +46,8 @@
45 46
46#include "mce-internal.h" 47#include "mce-internal.h"
47 48
48/* Handle unconfigured int18 (should never happen) */
49static void unexpected_machine_check(struct pt_regs *regs, long error_code)
50{
51 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
52 smp_processor_id());
53}
54
55/* Call the installed machine check handler for this CPU setup. */
56void (*machine_check_vector)(struct pt_regs *, long error_code) =
57 unexpected_machine_check;
58
59int mce_disabled __read_mostly; 49int mce_disabled __read_mostly;
60 50
61#ifdef CONFIG_X86_NEW_MCE
62
63#define MISC_MCELOG_MINOR 227 51#define MISC_MCELOG_MINOR 227
64 52
65#define SPINUNIT 100 /* 100ns */ 53#define SPINUNIT 100 /* 100ns */
@@ -77,7 +65,6 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
77 */ 65 */
78static int tolerant __read_mostly = 1; 66static int tolerant __read_mostly = 1;
79static int banks __read_mostly; 67static int banks __read_mostly;
80static u64 *bank __read_mostly;
81static int rip_msr __read_mostly; 68static int rip_msr __read_mostly;
82static int mce_bootlog __read_mostly = -1; 69static int mce_bootlog __read_mostly = -1;
83static int monarch_timeout __read_mostly = -1; 70static int monarch_timeout __read_mostly = -1;
@@ -87,13 +74,13 @@ int mce_cmci_disabled __read_mostly;
87int mce_ignore_ce __read_mostly; 74int mce_ignore_ce __read_mostly;
88int mce_ser __read_mostly; 75int mce_ser __read_mostly;
89 76
77struct mce_bank *mce_banks __read_mostly;
78
90/* User mode helper program triggered by machine check event */ 79/* User mode helper program triggered by machine check event */
91static unsigned long mce_need_notify; 80static unsigned long mce_need_notify;
92static char mce_helper[128]; 81static char mce_helper[128];
93static char *mce_helper_argv[2] = { mce_helper, NULL }; 82static char *mce_helper_argv[2] = { mce_helper, NULL };
94 83
95static unsigned long dont_init_banks;
96
97static DECLARE_WAIT_QUEUE_HEAD(mce_wait); 84static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
98static DEFINE_PER_CPU(struct mce, mces_seen); 85static DEFINE_PER_CPU(struct mce, mces_seen);
99static int cpu_missing; 86static int cpu_missing;
@@ -104,11 +91,6 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
104 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 91 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
105}; 92};
106 93
107static inline int skip_bank_init(int i)
108{
109 return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
110}
111
112static DEFINE_PER_CPU(struct work_struct, mce_work); 94static DEFINE_PER_CPU(struct work_struct, mce_work);
113 95
114/* Do initial initialization of a struct mce */ 96/* Do initial initialization of a struct mce */
@@ -232,6 +214,9 @@ static void print_mce_tail(void)
232 214
233static atomic_t mce_paniced; 215static atomic_t mce_paniced;
234 216
217static int fake_panic;
218static atomic_t mce_fake_paniced;
219
235/* Panic in progress. Enable interrupts and wait for final IPI */ 220/* Panic in progress. Enable interrupts and wait for final IPI */
236static void wait_for_panic(void) 221static void wait_for_panic(void)
237{ 222{
@@ -249,15 +234,21 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
249{ 234{
250 int i; 235 int i;
251 236
252 /* 237 if (!fake_panic) {
253 * Make sure only one CPU runs in machine check panic 238 /*
254 */ 239 * Make sure only one CPU runs in machine check panic
255 if (atomic_add_return(1, &mce_paniced) > 1) 240 */
256 wait_for_panic(); 241 if (atomic_inc_return(&mce_paniced) > 1)
257 barrier(); 242 wait_for_panic();
243 barrier();
258 244
259 bust_spinlocks(1); 245 bust_spinlocks(1);
260 console_verbose(); 246 console_verbose();
247 } else {
248 /* Don't log too much for fake panic */
249 if (atomic_inc_return(&mce_fake_paniced) > 1)
250 return;
251 }
261 print_mce_head(); 252 print_mce_head();
262 /* First print corrected ones that are still unlogged */ 253 /* First print corrected ones that are still unlogged */
263 for (i = 0; i < MCE_LOG_LEN; i++) { 254 for (i = 0; i < MCE_LOG_LEN; i++) {
@@ -284,9 +275,12 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
284 print_mce_tail(); 275 print_mce_tail();
285 if (exp) 276 if (exp)
286 printk(KERN_EMERG "Machine check: %s\n", exp); 277 printk(KERN_EMERG "Machine check: %s\n", exp);
287 if (panic_timeout == 0) 278 if (!fake_panic) {
288 panic_timeout = mce_panic_timeout; 279 if (panic_timeout == 0)
289 panic(msg); 280 panic_timeout = mce_panic_timeout;
281 panic(msg);
282 } else
283 printk(KERN_EMERG "Fake kernel panic: %s\n", msg);
290} 284}
291 285
292/* Support code for software error injection */ 286/* Support code for software error injection */
@@ -296,11 +290,11 @@ static int msr_to_offset(u32 msr)
296 unsigned bank = __get_cpu_var(injectm.bank); 290 unsigned bank = __get_cpu_var(injectm.bank);
297 if (msr == rip_msr) 291 if (msr == rip_msr)
298 return offsetof(struct mce, ip); 292 return offsetof(struct mce, ip);
299 if (msr == MSR_IA32_MC0_STATUS + bank*4) 293 if (msr == MSR_IA32_MCx_STATUS(bank))
300 return offsetof(struct mce, status); 294 return offsetof(struct mce, status);
301 if (msr == MSR_IA32_MC0_ADDR + bank*4) 295 if (msr == MSR_IA32_MCx_ADDR(bank))
302 return offsetof(struct mce, addr); 296 return offsetof(struct mce, addr);
303 if (msr == MSR_IA32_MC0_MISC + bank*4) 297 if (msr == MSR_IA32_MCx_MISC(bank))
304 return offsetof(struct mce, misc); 298 return offsetof(struct mce, misc);
305 if (msr == MSR_IA32_MCG_STATUS) 299 if (msr == MSR_IA32_MCG_STATUS)
306 return offsetof(struct mce, mcgstatus); 300 return offsetof(struct mce, mcgstatus);
@@ -505,7 +499,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
505 499
506 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 500 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
507 for (i = 0; i < banks; i++) { 501 for (i = 0; i < banks; i++) {
508 if (!bank[i] || !test_bit(i, *b)) 502 if (!mce_banks[i].ctl || !test_bit(i, *b))
509 continue; 503 continue;
510 504
511 m.misc = 0; 505 m.misc = 0;
@@ -514,7 +508,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
514 m.tsc = 0; 508 m.tsc = 0;
515 509
516 barrier(); 510 barrier();
517 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); 511 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
518 if (!(m.status & MCI_STATUS_VAL)) 512 if (!(m.status & MCI_STATUS_VAL))
519 continue; 513 continue;
520 514
@@ -529,9 +523,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
529 continue; 523 continue;
530 524
531 if (m.status & MCI_STATUS_MISCV) 525 if (m.status & MCI_STATUS_MISCV)
532 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); 526 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
533 if (m.status & MCI_STATUS_ADDRV) 527 if (m.status & MCI_STATUS_ADDRV)
534 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); 528 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
535 529
536 if (!(flags & MCP_TIMESTAMP)) 530 if (!(flags & MCP_TIMESTAMP))
537 m.tsc = 0; 531 m.tsc = 0;
@@ -547,7 +541,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
547 /* 541 /*
548 * Clear state for this bank. 542 * Clear state for this bank.
549 */ 543 */
550 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 544 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
551 } 545 }
552 546
553 /* 547 /*
@@ -568,7 +562,7 @@ static int mce_no_way_out(struct mce *m, char **msg)
568 int i; 562 int i;
569 563
570 for (i = 0; i < banks; i++) { 564 for (i = 0; i < banks; i++) {
571 m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); 565 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
572 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) 566 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
573 return 1; 567 return 1;
574 } 568 }
@@ -628,7 +622,7 @@ out:
628 * This way we prevent any potential data corruption in a unrecoverable case 622 * This way we prevent any potential data corruption in a unrecoverable case
629 * and also makes sure always all CPU's errors are examined. 623 * and also makes sure always all CPU's errors are examined.
630 * 624 *
631 * Also this detects the case of an machine check event coming from outer 625 * Also this detects the case of a machine check event coming from outer
632 * space (not detected by any CPUs) In this case some external agent wants 626 * space (not detected by any CPUs) In this case some external agent wants
633 * us to shut down, so panic too. 627 * us to shut down, so panic too.
634 * 628 *
@@ -681,7 +675,7 @@ static void mce_reign(void)
681 * No machine check event found. Must be some external 675 * No machine check event found. Must be some external
682 * source or one CPU is hung. Panic. 676 * source or one CPU is hung. Panic.
683 */ 677 */
684 if (!m && tolerant < 3) 678 if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3)
685 mce_panic("Machine check from unknown source", NULL, NULL); 679 mce_panic("Machine check from unknown source", NULL, NULL);
686 680
687 /* 681 /*
@@ -715,7 +709,7 @@ static int mce_start(int *no_way_out)
715 * global_nwo should be updated before mce_callin 709 * global_nwo should be updated before mce_callin
716 */ 710 */
717 smp_wmb(); 711 smp_wmb();
718 order = atomic_add_return(1, &mce_callin); 712 order = atomic_inc_return(&mce_callin);
719 713
720 /* 714 /*
721 * Wait for everyone. 715 * Wait for everyone.
@@ -852,7 +846,7 @@ static void mce_clear_state(unsigned long *toclear)
852 846
853 for (i = 0; i < banks; i++) { 847 for (i = 0; i < banks; i++) {
854 if (test_bit(i, toclear)) 848 if (test_bit(i, toclear))
855 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 849 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
856 } 850 }
857} 851}
858 852
@@ -905,11 +899,11 @@ void do_machine_check(struct pt_regs *regs, long error_code)
905 mce_setup(&m); 899 mce_setup(&m);
906 900
907 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 901 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
908 no_way_out = mce_no_way_out(&m, &msg);
909
910 final = &__get_cpu_var(mces_seen); 902 final = &__get_cpu_var(mces_seen);
911 *final = m; 903 *final = m;
912 904
905 no_way_out = mce_no_way_out(&m, &msg);
906
913 barrier(); 907 barrier();
914 908
915 /* 909 /*
@@ -926,14 +920,14 @@ void do_machine_check(struct pt_regs *regs, long error_code)
926 order = mce_start(&no_way_out); 920 order = mce_start(&no_way_out);
927 for (i = 0; i < banks; i++) { 921 for (i = 0; i < banks; i++) {
928 __clear_bit(i, toclear); 922 __clear_bit(i, toclear);
929 if (!bank[i]) 923 if (!mce_banks[i].ctl)
930 continue; 924 continue;
931 925
932 m.misc = 0; 926 m.misc = 0;
933 m.addr = 0; 927 m.addr = 0;
934 m.bank = i; 928 m.bank = i;
935 929
936 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); 930 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
937 if ((m.status & MCI_STATUS_VAL) == 0) 931 if ((m.status & MCI_STATUS_VAL) == 0)
938 continue; 932 continue;
939 933
@@ -974,9 +968,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
974 kill_it = 1; 968 kill_it = 1;
975 969
976 if (m.status & MCI_STATUS_MISCV) 970 if (m.status & MCI_STATUS_MISCV)
977 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); 971 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
978 if (m.status & MCI_STATUS_ADDRV) 972 if (m.status & MCI_STATUS_ADDRV)
979 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); 973 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
980 974
981 /* 975 /*
982 * Action optional error. Queue address for later processing. 976 * Action optional error. Queue address for later processing.
@@ -1169,10 +1163,25 @@ int mce_notify_irq(void)
1169} 1163}
1170EXPORT_SYMBOL_GPL(mce_notify_irq); 1164EXPORT_SYMBOL_GPL(mce_notify_irq);
1171 1165
1166static int mce_banks_init(void)
1167{
1168 int i;
1169
1170 mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
1171 if (!mce_banks)
1172 return -ENOMEM;
1173 for (i = 0; i < banks; i++) {
1174 struct mce_bank *b = &mce_banks[i];
1175 b->ctl = -1ULL;
1176 b->init = 1;
1177 }
1178 return 0;
1179}
1180
1172/* 1181/*
1173 * Initialize Machine Checks for a CPU. 1182 * Initialize Machine Checks for a CPU.
1174 */ 1183 */
1175static int mce_cap_init(void) 1184static int __cpuinit mce_cap_init(void)
1176{ 1185{
1177 unsigned b; 1186 unsigned b;
1178 u64 cap; 1187 u64 cap;
@@ -1192,11 +1201,10 @@ static int mce_cap_init(void)
1192 /* Don't support asymmetric configurations today */ 1201 /* Don't support asymmetric configurations today */
1193 WARN_ON(banks != 0 && b != banks); 1202 WARN_ON(banks != 0 && b != banks);
1194 banks = b; 1203 banks = b;
1195 if (!bank) { 1204 if (!mce_banks) {
1196 bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); 1205 int err = mce_banks_init();
1197 if (!bank) 1206 if (err)
1198 return -ENOMEM; 1207 return err;
1199 memset(bank, 0xff, banks * sizeof(u64));
1200 } 1208 }
1201 1209
1202 /* Use accurate RIP reporting if available. */ 1210 /* Use accurate RIP reporting if available. */
@@ -1228,15 +1236,16 @@ static void mce_init(void)
1228 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 1236 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
1229 1237
1230 for (i = 0; i < banks; i++) { 1238 for (i = 0; i < banks; i++) {
1231 if (skip_bank_init(i)) 1239 struct mce_bank *b = &mce_banks[i];
1240 if (!b->init)
1232 continue; 1241 continue;
1233 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); 1242 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
1234 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 1243 wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
1235 } 1244 }
1236} 1245}
1237 1246
1238/* Add per CPU specific workarounds here */ 1247/* Add per CPU specific workarounds here */
1239static int mce_cpu_quirks(struct cpuinfo_x86 *c) 1248static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
1240{ 1249{
1241 if (c->x86_vendor == X86_VENDOR_UNKNOWN) { 1250 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
1242 pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); 1251 pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1251,7 +1260,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1251 * trips off incorrectly with the IOMMU & 3ware 1260 * trips off incorrectly with the IOMMU & 3ware
1252 * & Cerberus: 1261 * & Cerberus:
1253 */ 1262 */
1254 clear_bit(10, (unsigned long *)&bank[4]); 1263 clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
1255 } 1264 }
1256 if (c->x86 <= 17 && mce_bootlog < 0) { 1265 if (c->x86 <= 17 && mce_bootlog < 0) {
1257 /* 1266 /*
@@ -1265,7 +1274,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1265 * by default. 1274 * by default.
1266 */ 1275 */
1267 if (c->x86 == 6 && banks > 0) 1276 if (c->x86 == 6 && banks > 0)
1268 bank[0] = 0; 1277 mce_banks[0].ctl = 0;
1269 } 1278 }
1270 1279
1271 if (c->x86_vendor == X86_VENDOR_INTEL) { 1280 if (c->x86_vendor == X86_VENDOR_INTEL) {
@@ -1278,8 +1287,8 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1278 * valid event later, merely don't write CTL0. 1287 * valid event later, merely don't write CTL0.
1279 */ 1288 */
1280 1289
1281 if (c->x86 == 6 && c->x86_model < 0x1A) 1290 if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0)
1282 __set_bit(0, &dont_init_banks); 1291 mce_banks[0].init = 0;
1283 1292
1284 /* 1293 /*
1285 * All newer Intel systems support MCE broadcasting. Enable 1294 * All newer Intel systems support MCE broadcasting. Enable
@@ -1348,6 +1357,17 @@ static void mce_init_timer(void)
1348 add_timer_on(t, smp_processor_id()); 1357 add_timer_on(t, smp_processor_id());
1349} 1358}
1350 1359
1360/* Handle unconfigured int18 (should never happen) */
1361static void unexpected_machine_check(struct pt_regs *regs, long error_code)
1362{
1363 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
1364 smp_processor_id());
1365}
1366
1367/* Call the installed machine check handler for this CPU setup. */
1368void (*machine_check_vector)(struct pt_regs *, long error_code) =
1369 unexpected_machine_check;
1370
1351/* 1371/*
1352 * Called for each booted CPU to set up machine checks. 1372 * Called for each booted CPU to set up machine checks.
1353 * Must be called with preempt off: 1373 * Must be called with preempt off:
@@ -1561,8 +1581,10 @@ static struct miscdevice mce_log_device = {
1561 */ 1581 */
1562static int __init mcheck_enable(char *str) 1582static int __init mcheck_enable(char *str)
1563{ 1583{
1564 if (*str == 0) 1584 if (*str == 0) {
1565 enable_p5_mce(); 1585 enable_p5_mce();
1586 return 1;
1587 }
1566 if (*str == '=') 1588 if (*str == '=')
1567 str++; 1589 str++;
1568 if (!strcmp(str, "off")) 1590 if (!strcmp(str, "off"))
@@ -1603,8 +1625,9 @@ static int mce_disable(void)
1603 int i; 1625 int i;
1604 1626
1605 for (i = 0; i < banks; i++) { 1627 for (i = 0; i < banks; i++) {
1606 if (!skip_bank_init(i)) 1628 struct mce_bank *b = &mce_banks[i];
1607 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); 1629 if (b->init)
1630 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
1608 } 1631 }
1609 return 0; 1632 return 0;
1610} 1633}
@@ -1679,14 +1702,15 @@ DEFINE_PER_CPU(struct sys_device, mce_dev);
1679__cpuinitdata 1702__cpuinitdata
1680void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 1703void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
1681 1704
1682static struct sysdev_attribute *bank_attrs; 1705static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr)
1706{
1707 return container_of(attr, struct mce_bank, attr);
1708}
1683 1709
1684static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, 1710static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
1685 char *buf) 1711 char *buf)
1686{ 1712{
1687 u64 b = bank[attr - bank_attrs]; 1713 return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
1688
1689 return sprintf(buf, "%llx\n", b);
1690} 1714}
1691 1715
1692static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, 1716static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
@@ -1697,7 +1721,7 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
1697 if (strict_strtoull(buf, 0, &new) < 0) 1721 if (strict_strtoull(buf, 0, &new) < 0)
1698 return -EINVAL; 1722 return -EINVAL;
1699 1723
1700 bank[attr - bank_attrs] = new; 1724 attr_to_bank(attr)->ctl = new;
1701 mce_restart(); 1725 mce_restart();
1702 1726
1703 return size; 1727 return size;
@@ -1839,7 +1863,7 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1839 } 1863 }
1840 for (j = 0; j < banks; j++) { 1864 for (j = 0; j < banks; j++) {
1841 err = sysdev_create_file(&per_cpu(mce_dev, cpu), 1865 err = sysdev_create_file(&per_cpu(mce_dev, cpu),
1842 &bank_attrs[j]); 1866 &mce_banks[j].attr);
1843 if (err) 1867 if (err)
1844 goto error2; 1868 goto error2;
1845 } 1869 }
@@ -1848,10 +1872,10 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1848 return 0; 1872 return 0;
1849error2: 1873error2:
1850 while (--j >= 0) 1874 while (--j >= 0)
1851 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); 1875 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
1852error: 1876error:
1853 while (--i >= 0) 1877 while (--i >= 0)
1854 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); 1878 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
1855 1879
1856 sysdev_unregister(&per_cpu(mce_dev, cpu)); 1880 sysdev_unregister(&per_cpu(mce_dev, cpu));
1857 1881
@@ -1869,7 +1893,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
1869 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); 1893 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1870 1894
1871 for (i = 0; i < banks; i++) 1895 for (i = 0; i < banks; i++)
1872 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); 1896 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
1873 1897
1874 sysdev_unregister(&per_cpu(mce_dev, cpu)); 1898 sysdev_unregister(&per_cpu(mce_dev, cpu));
1875 cpumask_clear_cpu(cpu, mce_dev_initialized); 1899 cpumask_clear_cpu(cpu, mce_dev_initialized);
@@ -1886,8 +1910,9 @@ static void mce_disable_cpu(void *h)
1886 if (!(action & CPU_TASKS_FROZEN)) 1910 if (!(action & CPU_TASKS_FROZEN))
1887 cmci_clear(); 1911 cmci_clear();
1888 for (i = 0; i < banks; i++) { 1912 for (i = 0; i < banks; i++) {
1889 if (!skip_bank_init(i)) 1913 struct mce_bank *b = &mce_banks[i];
1890 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); 1914 if (b->init)
1915 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
1891 } 1916 }
1892} 1917}
1893 1918
@@ -1902,8 +1927,9 @@ static void mce_reenable_cpu(void *h)
1902 if (!(action & CPU_TASKS_FROZEN)) 1927 if (!(action & CPU_TASKS_FROZEN))
1903 cmci_reenable(); 1928 cmci_reenable();
1904 for (i = 0; i < banks; i++) { 1929 for (i = 0; i < banks; i++) {
1905 if (!skip_bank_init(i)) 1930 struct mce_bank *b = &mce_banks[i];
1906 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); 1931 if (b->init)
1932 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
1907 } 1933 }
1908} 1934}
1909 1935
@@ -1951,35 +1977,21 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = {
1951 .notifier_call = mce_cpu_callback, 1977 .notifier_call = mce_cpu_callback,
1952}; 1978};
1953 1979
1954static __init int mce_init_banks(void) 1980static __init void mce_init_banks(void)
1955{ 1981{
1956 int i; 1982 int i;
1957 1983
1958 bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
1959 GFP_KERNEL);
1960 if (!bank_attrs)
1961 return -ENOMEM;
1962
1963 for (i = 0; i < banks; i++) { 1984 for (i = 0; i < banks; i++) {
1964 struct sysdev_attribute *a = &bank_attrs[i]; 1985 struct mce_bank *b = &mce_banks[i];
1986 struct sysdev_attribute *a = &b->attr;
1965 1987
1966 a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); 1988 a->attr.name = b->attrname;
1967 if (!a->attr.name) 1989 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
1968 goto nomem;
1969 1990
1970 a->attr.mode = 0644; 1991 a->attr.mode = 0644;
1971 a->show = show_bank; 1992 a->show = show_bank;
1972 a->store = set_bank; 1993 a->store = set_bank;
1973 } 1994 }
1974 return 0;
1975
1976nomem:
1977 while (--i >= 0)
1978 kfree(bank_attrs[i].attr.name);
1979 kfree(bank_attrs);
1980 bank_attrs = NULL;
1981
1982 return -ENOMEM;
1983} 1995}
1984 1996
1985static __init int mce_init_device(void) 1997static __init int mce_init_device(void)
@@ -1992,9 +2004,7 @@ static __init int mce_init_device(void)
1992 2004
1993 zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); 2005 zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
1994 2006
1995 err = mce_init_banks(); 2007 mce_init_banks();
1996 if (err)
1997 return err;
1998 2008
1999 err = sysdev_class_register(&mce_sysclass); 2009 err = sysdev_class_register(&mce_sysclass);
2000 if (err) 2010 if (err)
@@ -2014,57 +2024,65 @@ static __init int mce_init_device(void)
2014 2024
2015device_initcall(mce_init_device); 2025device_initcall(mce_init_device);
2016 2026
2017#else /* CONFIG_X86_OLD_MCE: */ 2027/*
2018 2028 * Old style boot options parsing. Only for compatibility.
2019int nr_mce_banks; 2029 */
2020EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ 2030static int __init mcheck_disable(char *str)
2031{
2032 mce_disabled = 1;
2033 return 1;
2034}
2035__setup("nomce", mcheck_disable);
2021 2036
2022/* This has to be run for each processor */ 2037#ifdef CONFIG_DEBUG_FS
2023void mcheck_init(struct cpuinfo_x86 *c) 2038struct dentry *mce_get_debugfs_dir(void)
2024{ 2039{
2025 if (mce_disabled) 2040 static struct dentry *dmce;
2026 return;
2027 2041
2028 switch (c->x86_vendor) { 2042 if (!dmce)
2029 case X86_VENDOR_AMD: 2043 dmce = debugfs_create_dir("mce", NULL);
2030 amd_mcheck_init(c);
2031 break;
2032 2044
2033 case X86_VENDOR_INTEL: 2045 return dmce;
2034 if (c->x86 == 5) 2046}
2035 intel_p5_mcheck_init(c);
2036 if (c->x86 == 6)
2037 intel_p6_mcheck_init(c);
2038 if (c->x86 == 15)
2039 intel_p4_mcheck_init(c);
2040 break;
2041 2047
2042 case X86_VENDOR_CENTAUR: 2048static void mce_reset(void)
2043 if (c->x86 == 5) 2049{
2044 winchip_mcheck_init(c); 2050 cpu_missing = 0;
2045 break; 2051 atomic_set(&mce_fake_paniced, 0);
2052 atomic_set(&mce_executing, 0);
2053 atomic_set(&mce_callin, 0);
2054 atomic_set(&global_nwo, 0);
2055}
2046 2056
2047 default: 2057static int fake_panic_get(void *data, u64 *val)
2048 break; 2058{
2049 } 2059 *val = fake_panic;
2050 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks); 2060 return 0;
2051} 2061}
2052 2062
2053static int __init mcheck_enable(char *str) 2063static int fake_panic_set(void *data, u64 val)
2054{ 2064{
2055 mce_p5_enabled = 1; 2065 mce_reset();
2056 return 1; 2066 fake_panic = val;
2067 return 0;
2057} 2068}
2058__setup("mce", mcheck_enable);
2059 2069
2060#endif /* CONFIG_X86_OLD_MCE */ 2070DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
2071 fake_panic_set, "%llu\n");
2061 2072
2062/* 2073static int __init mce_debugfs_init(void)
2063 * Old style boot options parsing. Only for compatibility.
2064 */
2065static int __init mcheck_disable(char *str)
2066{ 2074{
2067 mce_disabled = 1; 2075 struct dentry *dmce, *ffake_panic;
2068 return 1; 2076
2077 dmce = mce_get_debugfs_dir();
2078 if (!dmce)
2079 return -ENOMEM;
2080 ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL,
2081 &fake_panic_fops);
2082 if (!ffake_panic)
2083 return -ENOMEM;
2084
2085 return 0;
2069} 2086}
2070__setup("nomce", mcheck_disable); 2087late_initcall(mce_debugfs_init);
2088#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index e1acec0f7a32..889f665fe93d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -90,7 +90,7 @@ static void cmci_discover(int banks, int boot)
90 if (test_bit(i, owned)) 90 if (test_bit(i, owned))
91 continue; 91 continue;
92 92
93 rdmsrl(MSR_IA32_MC0_CTL2 + i, val); 93 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
94 94
95 /* Already owned by someone else? */ 95 /* Already owned by someone else? */
96 if (val & CMCI_EN) { 96 if (val & CMCI_EN) {
@@ -101,8 +101,8 @@ static void cmci_discover(int banks, int boot)
101 } 101 }
102 102
103 val |= CMCI_EN | CMCI_THRESHOLD; 103 val |= CMCI_EN | CMCI_THRESHOLD;
104 wrmsrl(MSR_IA32_MC0_CTL2 + i, val); 104 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
105 rdmsrl(MSR_IA32_MC0_CTL2 + i, val); 105 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
106 106
107 /* Did the enable bit stick? -- the bank supports CMCI */ 107 /* Did the enable bit stick? -- the bank supports CMCI */
108 if (val & CMCI_EN) { 108 if (val & CMCI_EN) {
@@ -152,9 +152,9 @@ void cmci_clear(void)
152 if (!test_bit(i, __get_cpu_var(mce_banks_owned))) 152 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
153 continue; 153 continue;
154 /* Disable CMCI */ 154 /* Disable CMCI */
155 rdmsrl(MSR_IA32_MC0_CTL2 + i, val); 155 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
156 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); 156 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
157 wrmsrl(MSR_IA32_MC0_CTL2 + i, val); 157 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
158 __clear_bit(i, __get_cpu_var(mce_banks_owned)); 158 __clear_bit(i, __get_cpu_var(mce_banks_owned));
159 } 159 }
160 spin_unlock_irqrestore(&cmci_discover_lock, flags); 160 spin_unlock_irqrestore(&cmci_discover_lock, flags);
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
deleted file mode 100644
index f5f2d6f71fb6..000000000000
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ /dev/null
@@ -1,94 +0,0 @@
1/*
2 * Non Fatal Machine Check Exception Reporting
3 *
4 * (C) Copyright 2002 Dave Jones. <davej@redhat.com>
5 *
6 * This file contains routines to check for non-fatal MCEs every 15s
7 *
8 */
9#include <linux/interrupt.h>
10#include <linux/workqueue.h>
11#include <linux/jiffies.h>
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/init.h>
16#include <linux/smp.h>
17
18#include <asm/processor.h>
19#include <asm/system.h>
20#include <asm/mce.h>
21#include <asm/msr.h>
22
23static int firstbank;
24
25#define MCE_RATE (15*HZ) /* timer rate is 15s */
26
27static void mce_checkregs(void *info)
28{
29 u32 low, high;
30 int i;
31
32 for (i = firstbank; i < nr_mce_banks; i++) {
33 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
34
35 if (!(high & (1<<31)))
36 continue;
37
38 printk(KERN_INFO "MCE: The hardware reports a non fatal, "
39 "correctable incident occurred on CPU %d.\n",
40 smp_processor_id());
41
42 printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
43
44 /*
45 * Scrub the error so we don't pick it up in MCE_RATE
46 * seconds time:
47 */
48 wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
49
50 /* Serialize: */
51 wmb();
52 add_taint(TAINT_MACHINE_CHECK);
53 }
54}
55
56static void mce_work_fn(struct work_struct *work);
57static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
58
59static void mce_work_fn(struct work_struct *work)
60{
61 on_each_cpu(mce_checkregs, NULL, 1);
62 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
63}
64
65static int __init init_nonfatal_mce_checker(void)
66{
67 struct cpuinfo_x86 *c = &boot_cpu_data;
68
69 /* Check for MCE support */
70 if (!cpu_has(c, X86_FEATURE_MCE))
71 return -ENODEV;
72
73 /* Check for PPro style MCA */
74 if (!cpu_has(c, X86_FEATURE_MCA))
75 return -ENODEV;
76
77 /* Some Athlons misbehave when we frob bank 0 */
78 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
79 boot_cpu_data.x86 == 6)
80 firstbank = 1;
81 else
82 firstbank = 0;
83
84 /*
85 * Check for non-fatal errors every MCE_RATE s
86 */
87 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
88 printk(KERN_INFO "Machine check exception polling timer started.\n");
89
90 return 0;
91}
92module_init(init_nonfatal_mce_checker);
93
94MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
deleted file mode 100644
index 4482aea9aa2e..000000000000
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ /dev/null
@@ -1,163 +0,0 @@
1/*
2 * P4 specific Machine Check Exception Reporting
3 */
4#include <linux/kernel.h>
5#include <linux/types.h>
6#include <linux/init.h>
7#include <linux/smp.h>
8
9#include <asm/processor.h>
10#include <asm/mce.h>
11#include <asm/msr.h>
12
13/* as supported by the P4/Xeon family */
14struct intel_mce_extended_msrs {
15 u32 eax;
16 u32 ebx;
17 u32 ecx;
18 u32 edx;
19 u32 esi;
20 u32 edi;
21 u32 ebp;
22 u32 esp;
23 u32 eflags;
24 u32 eip;
25 /* u32 *reserved[]; */
26};
27
28static int mce_num_extended_msrs;
29
30/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
31static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
32{
33 u32 h;
34
35 rdmsr(MSR_IA32_MCG_EAX, r->eax, h);
36 rdmsr(MSR_IA32_MCG_EBX, r->ebx, h);
37 rdmsr(MSR_IA32_MCG_ECX, r->ecx, h);
38 rdmsr(MSR_IA32_MCG_EDX, r->edx, h);
39 rdmsr(MSR_IA32_MCG_ESI, r->esi, h);
40 rdmsr(MSR_IA32_MCG_EDI, r->edi, h);
41 rdmsr(MSR_IA32_MCG_EBP, r->ebp, h);
42 rdmsr(MSR_IA32_MCG_ESP, r->esp, h);
43 rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h);
44 rdmsr(MSR_IA32_MCG_EIP, r->eip, h);
45}
46
47static void intel_machine_check(struct pt_regs *regs, long error_code)
48{
49 u32 alow, ahigh, high, low;
50 u32 mcgstl, mcgsth;
51 int recover = 1;
52 int i;
53
54 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
55 if (mcgstl & (1<<0)) /* Recoverable ? */
56 recover = 0;
57
58 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
59 smp_processor_id(), mcgsth, mcgstl);
60
61 if (mce_num_extended_msrs > 0) {
62 struct intel_mce_extended_msrs dbg;
63
64 intel_get_extended_msrs(&dbg);
65
66 printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
67 "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
68 "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
69 smp_processor_id(), dbg.eip, dbg.eflags,
70 dbg.eax, dbg.ebx, dbg.ecx, dbg.edx,
71 dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
72 }
73
74 for (i = 0; i < nr_mce_banks; i++) {
75 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
76 if (high & (1<<31)) {
77 char misc[20];
78 char addr[24];
79
80 misc[0] = addr[0] = '\0';
81 if (high & (1<<29))
82 recover |= 1;
83 if (high & (1<<25))
84 recover |= 2;
85 high &= ~(1<<31);
86 if (high & (1<<27)) {
87 rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
88 snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
89 }
90 if (high & (1<<26)) {
91 rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
92 snprintf(addr, 24, " at %08x%08x", ahigh, alow);
93 }
94 printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
95 smp_processor_id(), i, high, low, misc, addr);
96 }
97 }
98
99 if (recover & 2)
100 panic("CPU context corrupt");
101 if (recover & 1)
102 panic("Unable to continue");
103
104 printk(KERN_EMERG "Attempting to continue.\n");
105
106 /*
107 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
108 * recoverable/continuable.This will allow BIOS to look at the MSRs
109 * for errors if the OS could not log the error.
110 */
111 for (i = 0; i < nr_mce_banks; i++) {
112 u32 msr;
113 msr = MSR_IA32_MC0_STATUS+i*4;
114 rdmsr(msr, low, high);
115 if (high&(1<<31)) {
116 /* Clear it */
117 wrmsr(msr, 0UL, 0UL);
118 /* Serialize */
119 wmb();
120 add_taint(TAINT_MACHINE_CHECK);
121 }
122 }
123 mcgstl &= ~(1<<2);
124 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
125}
126
127void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
128{
129 u32 l, h;
130 int i;
131
132 machine_check_vector = intel_machine_check;
133 wmb();
134
135 printk(KERN_INFO "Intel machine check architecture supported.\n");
136 rdmsr(MSR_IA32_MCG_CAP, l, h);
137 if (l & (1<<8)) /* Control register present ? */
138 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
139 nr_mce_banks = l & 0xff;
140
141 for (i = 0; i < nr_mce_banks; i++) {
142 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
143 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
144 }
145
146 set_in_cr4(X86_CR4_MCE);
147 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
148 smp_processor_id());
149
150 /* Check for P4/Xeon extended MCE MSRs */
151 rdmsr(MSR_IA32_MCG_CAP, l, h);
152 if (l & (1<<9)) {/* MCG_EXT_P */
153 mce_num_extended_msrs = (l >> 16) & 0xff;
154 printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
155 " available\n",
156 smp_processor_id(), mce_num_extended_msrs);
157
158#ifdef CONFIG_X86_MCE_P4THERMAL
159 /* Check for P4/Xeon Thermal monitor */
160 intel_init_thermal(c);
161#endif
162 }
163}
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
deleted file mode 100644
index 01e4f8178183..000000000000
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ /dev/null
@@ -1,127 +0,0 @@
1/*
2 * P6 specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
4 */
5#include <linux/interrupt.h>
6#include <linux/kernel.h>
7#include <linux/types.h>
8#include <linux/init.h>
9#include <linux/smp.h>
10
11#include <asm/processor.h>
12#include <asm/system.h>
13#include <asm/mce.h>
14#include <asm/msr.h>
15
16/* Machine Check Handler For PII/PIII */
17static void intel_machine_check(struct pt_regs *regs, long error_code)
18{
19 u32 alow, ahigh, high, low;
20 u32 mcgstl, mcgsth;
21 int recover = 1;
22 int i;
23
24 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
25 if (mcgstl & (1<<0)) /* Recoverable ? */
26 recover = 0;
27
28 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
29 smp_processor_id(), mcgsth, mcgstl);
30
31 for (i = 0; i < nr_mce_banks; i++) {
32 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
33 if (high & (1<<31)) {
34 char misc[20];
35 char addr[24];
36
37 misc[0] = '\0';
38 addr[0] = '\0';
39
40 if (high & (1<<29))
41 recover |= 1;
42 if (high & (1<<25))
43 recover |= 2;
44 high &= ~(1<<31);
45
46 if (high & (1<<27)) {
47 rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
48 snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
49 }
50 if (high & (1<<26)) {
51 rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
52 snprintf(addr, 24, " at %08x%08x", ahigh, alow);
53 }
54
55 printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
56 smp_processor_id(), i, high, low, misc, addr);
57 }
58 }
59
60 if (recover & 2)
61 panic("CPU context corrupt");
62 if (recover & 1)
63 panic("Unable to continue");
64
65 printk(KERN_EMERG "Attempting to continue.\n");
66 /*
67 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
68 * recoverable/continuable.This will allow BIOS to look at the MSRs
69 * for errors if the OS could not log the error:
70 */
71 for (i = 0; i < nr_mce_banks; i++) {
72 unsigned int msr;
73
74 msr = MSR_IA32_MC0_STATUS+i*4;
75 rdmsr(msr, low, high);
76 if (high & (1<<31)) {
77 /* Clear it: */
78 wrmsr(msr, 0UL, 0UL);
79 /* Serialize: */
80 wmb();
81 add_taint(TAINT_MACHINE_CHECK);
82 }
83 }
84 mcgstl &= ~(1<<2);
85 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
86}
87
88/* Set up machine check reporting for processors with Intel style MCE: */
89void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
90{
91 u32 l, h;
92 int i;
93
94 /* Check for MCE support */
95 if (!cpu_has(c, X86_FEATURE_MCE))
96 return;
97
98 /* Check for PPro style MCA */
99 if (!cpu_has(c, X86_FEATURE_MCA))
100 return;
101
102 /* Ok machine check is available */
103 machine_check_vector = intel_machine_check;
104 /* Make sure the vector pointer is visible before we enable MCEs: */
105 wmb();
106
107 printk(KERN_INFO "Intel machine check architecture supported.\n");
108 rdmsr(MSR_IA32_MCG_CAP, l, h);
109 if (l & (1<<8)) /* Control register present ? */
110 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
111 nr_mce_banks = l & 0xff;
112
113 /*
114 * Following the example in IA-32 SDM Vol 3:
115 * - MC0_CTL should not be written
116 * - Status registers on all banks should be cleared on reset
117 */
118 for (i = 1; i < nr_mce_banks; i++)
119 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
120
121 for (i = 0; i < nr_mce_banks; i++)
122 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
123
124 set_in_cr4(X86_CR4_MCE);
125 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
126 smp_processor_id());
127}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 5957a93e5173..63a56d147e4a 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -260,9 +260,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
260 return; 260 return;
261 } 261 }
262 262
263 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
264 tm2 = 1;
265
266 /* Check whether a vector already exists */ 263 /* Check whether a vector already exists */
267 if (h & APIC_VECTOR_MASK) { 264 if (h & APIC_VECTOR_MASK) {
268 printk(KERN_DEBUG 265 printk(KERN_DEBUG
@@ -271,6 +268,16 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
271 return; 268 return;
272 } 269 }
273 270
271 /* early Pentium M models use different method for enabling TM2 */
272 if (cpu_has(c, X86_FEATURE_TM2)) {
273 if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
274 rdmsr(MSR_THERM2_CTL, l, h);
275 if (l & MSR_THERM2_CTL_TM_SELECT)
276 tm2 = 1;
277 } else if (l & MSR_IA32_MISC_ENABLE_TM2)
278 tm2 = 1;
279 }
280
274 /* We'll mask the thermal vector in the lapic till we're ready: */ 281 /* We'll mask the thermal vector in the lapic till we're ready: */
275 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; 282 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
276 apic_write(APIC_LVTTHMR, h); 283 apic_write(APIC_LVTTHMR, h);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index b0cdde6932f5..74656d1d4e30 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -104,7 +104,7 @@ static int show_other_interrupts(struct seq_file *p, int prec)
104 seq_printf(p, " Threshold APIC interrupts\n"); 104 seq_printf(p, " Threshold APIC interrupts\n");
105# endif 105# endif
106#endif 106#endif
107#ifdef CONFIG_X86_NEW_MCE 107#ifdef CONFIG_X86_MCE
108 seq_printf(p, "%*s: ", prec, "MCE"); 108 seq_printf(p, "%*s: ", prec, "MCE");
109 for_each_online_cpu(j) 109 for_each_online_cpu(j)
110 seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); 110 seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
@@ -200,7 +200,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
200 sum += irq_stats(cpu)->irq_threshold_count; 200 sum += irq_stats(cpu)->irq_threshold_count;
201# endif 201# endif
202#endif 202#endif
203#ifdef CONFIG_X86_NEW_MCE 203#ifdef CONFIG_X86_MCE
204 sum += per_cpu(mce_exception_count, cpu); 204 sum += per_cpu(mce_exception_count, cpu);
205 sum += per_cpu(mce_poll_count, cpu); 205 sum += per_cpu(mce_poll_count, cpu);
206#endif 206#endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 92b7703d3d58..ccf8ab54f31a 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -190,7 +190,7 @@ static void __init apic_intr_init(void)
190#ifdef CONFIG_X86_MCE_THRESHOLD 190#ifdef CONFIG_X86_MCE_THRESHOLD
191 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); 191 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
192#endif 192#endif
193#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC) 193#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC)
194 alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); 194 alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt);
195#endif 195#endif
196 196
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 81e58238c4ce..6a44a76055ad 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -856,7 +856,7 @@ static void do_signal(struct pt_regs *regs)
856void 856void
857do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) 857do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
858{ 858{
859#ifdef CONFIG_X86_NEW_MCE 859#ifdef CONFIG_X86_MCE
860 /* notify userspace of pending MCEs */ 860 /* notify userspace of pending MCEs */
861 if (thread_info_flags & _TIF_MCE_NOTIFY) 861 if (thread_info_flags & _TIF_MCE_NOTIFY)
862 mce_notify_process(); 862 mce_notify_process();