aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig45
-rw-r--r--arch/x86/include/asm/entry_arch.h6
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/mce.h55
-rw-r--r--arch/x86/include/asm/msr-index.h7
-rw-r--r--arch/x86/kernel/apic/apic.c4
-rw-r--r--arch/x86/kernel/apic/nmi.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile9
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c42
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c127
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c (renamed from arch/x86/kernel/cpu/mcheck/mce_64.c)645
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.h26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_32.c76
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c203
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c74
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c60
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c57
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c86
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c48
-rw-r--r--arch/x86/kernel/cpu/mcheck/p6.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c73
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c17
-rw-r--r--arch/x86/kernel/entry_64.S4
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/irqinit.c11
-rw-r--r--arch/x86/kernel/signal.c4
-rw-r--r--arch/x86/kernel/traps.c6
29 files changed, 1031 insertions, 694 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b1d3f60525c..afd1168eeef 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -783,10 +783,26 @@ config X86_MCE
783 to disable it. MCE support simply ignores non-MCE processors like 783 to disable it. MCE support simply ignores non-MCE processors like
784 the 386 and 486, so nearly everyone can say Y here. 784 the 386 and 486, so nearly everyone can say Y here.
785 785
786config X86_OLD_MCE
787 depends on X86_32 && X86_MCE
788 bool "Use legacy machine check code (will go away)"
789 default n
790 select X86_ANCIENT_MCE
791 ---help---
792 Use the old i386 machine check code. This is merely intended for
793 testing in a transition period. Try this if you run into any machine
794 check related software problems, but report the problem to
795 linux-kernel. When in doubt say no.
796
797config X86_NEW_MCE
798 depends on X86_MCE
799 bool
800 default y if (!X86_OLD_MCE && X86_32) || X86_64
801
786config X86_MCE_INTEL 802config X86_MCE_INTEL
787 def_bool y 803 def_bool y
788 prompt "Intel MCE features" 804 prompt "Intel MCE features"
789 depends on X86_64 && X86_MCE && X86_LOCAL_APIC 805 depends on X86_NEW_MCE && X86_LOCAL_APIC
790 ---help--- 806 ---help---
791 Additional support for intel specific MCE features such as 807 Additional support for intel specific MCE features such as
792 the thermal monitor. 808 the thermal monitor.
@@ -794,19 +810,36 @@ config X86_MCE_INTEL
794config X86_MCE_AMD 810config X86_MCE_AMD
795 def_bool y 811 def_bool y
796 prompt "AMD MCE features" 812 prompt "AMD MCE features"
797 depends on X86_64 && X86_MCE && X86_LOCAL_APIC 813 depends on X86_NEW_MCE && X86_LOCAL_APIC
798 ---help--- 814 ---help---
799 Additional support for AMD specific MCE features such as 815 Additional support for AMD specific MCE features such as
800 the DRAM Error Threshold. 816 the DRAM Error Threshold.
801 817
818config X86_ANCIENT_MCE
819 def_bool n
820 depends on X86_32
821 prompt "Support for old Pentium 5 / WinChip machine checks"
822 ---help---
823 Include support for machine check handling on old Pentium 5 or WinChip
824 systems. These typically need to be enabled explicitely on the command
825 line.
826
802config X86_MCE_THRESHOLD 827config X86_MCE_THRESHOLD
803 depends on X86_MCE_AMD || X86_MCE_INTEL 828 depends on X86_MCE_AMD || X86_MCE_INTEL
804 bool 829 bool
805 default y 830 default y
806 831
832config X86_MCE_INJECT
833 depends on X86_NEW_MCE
834 tristate "Machine check injector support"
835 ---help---
836 Provide support for injecting machine checks for testing purposes.
837 If you don't know what a machine check is and you don't do kernel
838 QA it is safe to say n.
839
807config X86_MCE_NONFATAL 840config X86_MCE_NONFATAL
808 tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" 841 tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
809 depends on X86_32 && X86_MCE 842 depends on X86_OLD_MCE
810 ---help--- 843 ---help---
811 Enabling this feature starts a timer that triggers every 5 seconds which 844 Enabling this feature starts a timer that triggers every 5 seconds which
812 will look at the machine check registers to see if anything happened. 845 will look at the machine check registers to see if anything happened.
@@ -819,11 +852,15 @@ config X86_MCE_NONFATAL
819 852
820config X86_MCE_P4THERMAL 853config X86_MCE_P4THERMAL
821 bool "check for P4 thermal throttling interrupt." 854 bool "check for P4 thermal throttling interrupt."
822 depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) 855 depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP)
823 ---help--- 856 ---help---
824 Enabling this feature will cause a message to be printed when the P4 857 Enabling this feature will cause a message to be printed when the P4
825 enters thermal throttling. 858 enters thermal throttling.
826 859
860config X86_THERMAL_VECTOR
861 def_bool y
862 depends on X86_MCE_P4THERMAL || X86_MCE_INTEL
863
827config VM86 864config VM86
828 bool "Enable VM86 support" if EMBEDDED 865 bool "Enable VM86 support" if EMBEDDED
829 default y 866 default y
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index c2e6bedaf25..b2eb9c06684 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -52,8 +52,12 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
52BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) 52BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR)
53#endif 53#endif
54 54
55#ifdef CONFIG_X86_MCE_P4THERMAL 55#ifdef CONFIG_X86_THERMAL_VECTOR
56BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) 56BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
57#endif 57#endif
58 58
59#ifdef CONFIG_X86_MCE_THRESHOLD
60BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
61#endif
62
59#endif 63#endif
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 37555e52f98..922ee7c2969 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -20,7 +20,7 @@ typedef struct {
20#endif 20#endif
21#ifdef CONFIG_X86_MCE 21#ifdef CONFIG_X86_MCE
22 unsigned int irq_thermal_count; 22 unsigned int irq_thermal_count;
23# ifdef CONFIG_X86_64 23# ifdef CONFIG_X86_MCE_THRESHOLD
24 unsigned int irq_threshold_count; 24 unsigned int irq_threshold_count;
25# endif 25# endif
26#endif 26#endif
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 910b5a3d675..8c46b851296 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -87,11 +87,11 @@
87#define CALL_FUNCTION_VECTOR 0xfc 87#define CALL_FUNCTION_VECTOR 0xfc
88#define CALL_FUNCTION_SINGLE_VECTOR 0xfb 88#define CALL_FUNCTION_SINGLE_VECTOR 0xfb
89#define THERMAL_APIC_VECTOR 0xfa 89#define THERMAL_APIC_VECTOR 0xfa
90#define THRESHOLD_APIC_VECTOR 0xf9
90 91
91#ifdef CONFIG_X86_32 92#ifdef CONFIG_X86_32
92/* 0xf8 - 0xf9 : free */ 93/* 0xf8 : free */
93#else 94#else
94# define THRESHOLD_APIC_VECTOR 0xf9
95# define UV_BAU_MESSAGE 0xf8 95# define UV_BAU_MESSAGE 0xf8
96#endif 96#endif
97 97
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4f8c199584e..ac6e0303bf2 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -1,8 +1,6 @@
1#ifndef _ASM_X86_MCE_H 1#ifndef _ASM_X86_MCE_H
2#define _ASM_X86_MCE_H 2#define _ASM_X86_MCE_H
3 3
4#ifdef __x86_64__
5
6#include <linux/types.h> 4#include <linux/types.h>
7#include <asm/ioctls.h> 5#include <asm/ioctls.h>
8 6
@@ -10,21 +8,25 @@
10 * Machine Check support for x86 8 * Machine Check support for x86
11 */ 9 */
12 10
13#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */ 11#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
14#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ 12#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */
15#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ 13#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
16 14#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
17#define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */ 15#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
18#define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */ 16#define MCG_EXT_CNT_SHIFT 16
19#define MCG_STATUS_MCIP (1UL<<2) /* machine check in progress */ 17#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
20 18
21#define MCI_STATUS_VAL (1UL<<63) /* valid error */ 19#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
22#define MCI_STATUS_OVER (1UL<<62) /* previous errors lost */ 20#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
23#define MCI_STATUS_UC (1UL<<61) /* uncorrected error */ 21#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
24#define MCI_STATUS_EN (1UL<<60) /* error enabled */ 22
25#define MCI_STATUS_MISCV (1UL<<59) /* misc error reg. valid */ 23#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
26#define MCI_STATUS_ADDRV (1UL<<58) /* addr reg. valid */ 24#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
27#define MCI_STATUS_PCC (1UL<<57) /* processor context corrupt */ 25#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
26#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
27#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
28#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
29#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
28 30
29/* Fields are zero when not available */ 31/* Fields are zero when not available */
30struct mce { 32struct mce {
@@ -82,19 +84,15 @@ struct mce_log {
82#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) 84#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
83#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) 85#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
84 86
85#endif /* __x86_64__ */
86
87#ifdef __KERNEL__ 87#ifdef __KERNEL__
88 88
89#ifdef CONFIG_X86_32
90extern int mce_disabled; 89extern int mce_disabled;
91#else /* CONFIG_X86_32 */
92 90
93#include <asm/atomic.h> 91#include <asm/atomic.h>
94 92
95void mce_setup(struct mce *m); 93void mce_setup(struct mce *m);
96void mce_log(struct mce *m); 94void mce_log(struct mce *m);
97DECLARE_PER_CPU(struct sys_device, device_mce); 95DECLARE_PER_CPU(struct sys_device, mce_dev);
98extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 96extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
99 97
100/* 98/*
@@ -123,13 +121,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c);
123static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } 121static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
124#endif 122#endif
125 123
126extern int mce_available(struct cpuinfo_x86 *c); 124int mce_available(struct cpuinfo_x86 *c);
127 125
128void mce_log_therm_throt_event(__u64 status); 126void mce_log_therm_throt_event(__u64 status);
129 127
130extern atomic_t mce_entry; 128extern atomic_t mce_entry;
131 129
132extern void do_machine_check(struct pt_regs *, long); 130void do_machine_check(struct pt_regs *, long);
133 131
134typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); 132typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
135DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); 133DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
@@ -139,14 +137,15 @@ enum mcp_flags {
139 MCP_UC = (1 << 1), /* log uncorrected errors */ 137 MCP_UC = (1 << 1), /* log uncorrected errors */
140 MCP_DONTLOG = (1 << 2), /* only clear, don't log */ 138 MCP_DONTLOG = (1 << 2), /* only clear, don't log */
141}; 139};
142extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); 140void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
143 141
144extern int mce_notify_user(void); 142int mce_notify_user(void);
145 143
146#endif /* !CONFIG_X86_32 */ 144DECLARE_PER_CPU(struct mce, injectm);
145extern struct file_operations mce_chrdev_ops;
147 146
148#ifdef CONFIG_X86_MCE 147#ifdef CONFIG_X86_MCE
149extern void mcheck_init(struct cpuinfo_x86 *c); 148void mcheck_init(struct cpuinfo_x86 *c);
150#else 149#else
151#define mcheck_init(c) do { } while (0) 150#define mcheck_init(c) do { } while (0)
152#endif 151#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ec41fc16c16..c8640469508 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -208,7 +208,14 @@
208 208
209#define MSR_IA32_THERM_CONTROL 0x0000019a 209#define MSR_IA32_THERM_CONTROL 0x0000019a
210#define MSR_IA32_THERM_INTERRUPT 0x0000019b 210#define MSR_IA32_THERM_INTERRUPT 0x0000019b
211
212#define THERM_INT_LOW_ENABLE (1 << 0)
213#define THERM_INT_HIGH_ENABLE (1 << 1)
214
211#define MSR_IA32_THERM_STATUS 0x0000019c 215#define MSR_IA32_THERM_STATUS 0x0000019c
216
217#define THERM_STATUS_PROCHOT (1 << 0)
218
212#define MSR_IA32_MISC_ENABLE 0x000001a0 219#define MSR_IA32_MISC_ENABLE 0x000001a0
213 220
214/* MISC_ENABLE bits: architectural */ 221/* MISC_ENABLE bits: architectural */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b0fd26442c4..ee75d2a9b9c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -897,7 +897,7 @@ void clear_local_APIC(void)
897 } 897 }
898 898
899 /* lets not touch this if we didn't frob it */ 899 /* lets not touch this if we didn't frob it */
900#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) 900#ifdef CONFIG_X86_THERMAL_VECTOR
901 if (maxlvt >= 5) { 901 if (maxlvt >= 5) {
902 v = apic_read(APIC_LVTTHMR); 902 v = apic_read(APIC_LVTTHMR);
903 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); 903 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
@@ -2007,7 +2007,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
2007 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 2007 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
2008 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 2008 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
2009 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 2009 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
2010#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) 2010#ifdef CONFIG_X86_THERMAL_VECTOR
2011 if (maxlvt >= 5) 2011 if (maxlvt >= 5)
2012 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 2012 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
2013#endif 2013#endif
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index ce4fbfa315a..c4762276c17 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu)
66 66
67static inline int mce_in_progress(void) 67static inline int mce_in_progress(void)
68{ 68{
69#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) 69#if defined(CONFIG_X86_NEW_MCE)
70 return atomic_read(&mce_entry) > 0; 70 return atomic_read(&mce_entry) > 0;
71#endif 71#endif
72 return 0; 72 return 0;
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index b2f89829bbe..60ee182c6c5 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,7 +1,10 @@
1obj-y = mce_$(BITS).o therm_throt.o 1obj-y = mce.o therm_throt.o
2 2
3obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o 3obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
4obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o 4obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
5obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o
6obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o
5obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o 7obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
6obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o 8obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
7obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o 9obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
10obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index dd3af6e7b39..89e51042415 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -2,11 +2,10 @@
2 * Athlon specific Machine Check Exception Reporting 2 * Athlon specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Dave Jones <davej@redhat.com> 3 * (C) Copyright 2002 Dave Jones <davej@redhat.com>
4 */ 4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/interrupt.h> 5#include <linux/interrupt.h>
6#include <linux/kernel.h>
7#include <linux/types.h>
8#include <linux/init.h>
10#include <linux/smp.h> 9#include <linux/smp.h>
11 10
12#include <asm/processor.h> 11#include <asm/processor.h>
@@ -15,12 +14,12 @@
15 14
16#include "mce.h" 15#include "mce.h"
17 16
18/* Machine Check Handler For AMD Athlon/Duron */ 17/* Machine Check Handler For AMD Athlon/Duron: */
19static void k7_machine_check(struct pt_regs *regs, long error_code) 18static void k7_machine_check(struct pt_regs *regs, long error_code)
20{ 19{
21 int recover = 1;
22 u32 alow, ahigh, high, low; 20 u32 alow, ahigh, high, low;
23 u32 mcgstl, mcgsth; 21 u32 mcgstl, mcgsth;
22 int recover = 1;
24 int i; 23 int i;
25 24
26 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 25 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@@ -32,15 +31,19 @@ static void k7_machine_check(struct pt_regs *regs, long error_code)
32 31
33 for (i = 1; i < nr_mce_banks; i++) { 32 for (i = 1; i < nr_mce_banks; i++) {
34 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); 33 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
35 if (high&(1<<31)) { 34 if (high & (1<<31)) {
36 char misc[20]; 35 char misc[20];
37 char addr[24]; 36 char addr[24];
38 misc[0] = addr[0] = '\0'; 37
38 misc[0] = '\0';
39 addr[0] = '\0';
40
39 if (high & (1<<29)) 41 if (high & (1<<29))
40 recover |= 1; 42 recover |= 1;
41 if (high & (1<<25)) 43 if (high & (1<<25))
42 recover |= 2; 44 recover |= 2;
43 high &= ~(1<<31); 45 high &= ~(1<<31);
46
44 if (high & (1<<27)) { 47 if (high & (1<<27)) {
45 rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); 48 rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
46 snprintf(misc, 20, "[%08x%08x]", ahigh, alow); 49 snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
@@ -49,27 +52,31 @@ static void k7_machine_check(struct pt_regs *regs, long error_code)
49 rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); 52 rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
50 snprintf(addr, 24, " at %08x%08x", ahigh, alow); 53 snprintf(addr, 24, " at %08x%08x", ahigh, alow);
51 } 54 }
55
52 printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", 56 printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
53 smp_processor_id(), i, high, low, misc, addr); 57 smp_processor_id(), i, high, low, misc, addr);
54 /* Clear it */ 58
59 /* Clear it: */
55 wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); 60 wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
56 /* Serialize */ 61 /* Serialize: */
57 wmb(); 62 wmb();
58 add_taint(TAINT_MACHINE_CHECK); 63 add_taint(TAINT_MACHINE_CHECK);
59 } 64 }
60 } 65 }
61 66
62 if (recover&2) 67 if (recover & 2)
63 panic("CPU context corrupt"); 68 panic("CPU context corrupt");
64 if (recover&1) 69 if (recover & 1)
65 panic("Unable to continue"); 70 panic("Unable to continue");
71
66 printk(KERN_EMERG "Attempting to continue.\n"); 72 printk(KERN_EMERG "Attempting to continue.\n");
73
67 mcgstl &= ~(1<<2); 74 mcgstl &= ~(1<<2);
68 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 75 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
69} 76}
70 77
71 78
72/* AMD K7 machine check is Intel like */ 79/* AMD K7 machine check is Intel like: */
73void amd_mcheck_init(struct cpuinfo_x86 *c) 80void amd_mcheck_init(struct cpuinfo_x86 *c)
74{ 81{
75 u32 l, h; 82 u32 l, h;
@@ -79,21 +86,26 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
79 return; 86 return;
80 87
81 machine_check_vector = k7_machine_check; 88 machine_check_vector = k7_machine_check;
89 /* Make sure the vector pointer is visible before we enable MCEs: */
82 wmb(); 90 wmb();
83 91
84 printk(KERN_INFO "Intel machine check architecture supported.\n"); 92 printk(KERN_INFO "Intel machine check architecture supported.\n");
93
85 rdmsr(MSR_IA32_MCG_CAP, l, h); 94 rdmsr(MSR_IA32_MCG_CAP, l, h);
86 if (l & (1<<8)) /* Control register present ? */ 95 if (l & (1<<8)) /* Control register present ? */
87 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 96 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
88 nr_mce_banks = l & 0xff; 97 nr_mce_banks = l & 0xff;
89 98
90 /* Clear status for MC index 0 separately, we don't touch CTL, 99 /*
91 * as some K7 Athlons cause spurious MCEs when its enabled. */ 100 * Clear status for MC index 0 separately, we don't touch CTL,
101 * as some K7 Athlons cause spurious MCEs when its enabled:
102 */
92 if (boot_cpu_data.x86 == 6) { 103 if (boot_cpu_data.x86 == 6) {
93 wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); 104 wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
94 i = 1; 105 i = 1;
95 } else 106 } else
96 i = 0; 107 i = 0;
108
97 for (; i < nr_mce_banks; i++) { 109 for (; i < nr_mce_banks; i++) {
98 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); 110 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
99 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); 111 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
new file mode 100644
index 00000000000..7b3a5428396
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -0,0 +1,127 @@
1/*
2 * Machine check injection support.
3 * Copyright 2008 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; version 2
8 * of the License.
9 *
10 * Authors:
11 * Andi Kleen
12 * Ying Huang
13 */
14#include <linux/uaccess.h>
15#include <linux/module.h>
16#include <linux/timer.h>
17#include <linux/kernel.h>
18#include <linux/string.h>
19#include <linux/fs.h>
20#include <linux/smp.h>
21#include <asm/mce.h>
22
23/* Update fake mce registers on current CPU. */
24static void inject_mce(struct mce *m)
25{
26 struct mce *i = &per_cpu(injectm, m->cpu);
27
28 /* Make sure noone reads partially written injectm */
29 i->finished = 0;
30 mb();
31 m->finished = 0;
32 /* First set the fields after finished */
33 i->cpu = m->cpu;
34 mb();
35 /* Now write record in order, finished last (except above) */
36 memcpy(i, m, sizeof(struct mce));
37 /* Finally activate it */
38 mb();
39 i->finished = 1;
40}
41
42struct delayed_mce {
43 struct timer_list timer;
44 struct mce m;
45};
46
47/* Inject mce on current CPU */
48static void raise_mce(unsigned long data)
49{
50 struct delayed_mce *dm = (struct delayed_mce *)data;
51 struct mce *m = &dm->m;
52 int cpu = m->cpu;
53
54 inject_mce(m);
55 if (m->status & MCI_STATUS_UC) {
56 struct pt_regs regs;
57 memset(&regs, 0, sizeof(struct pt_regs));
58 regs.ip = m->ip;
59 regs.cs = m->cs;
60 printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
61 do_machine_check(&regs, 0);
62 printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
63 } else {
64 mce_banks_t b;
65 memset(&b, 0xff, sizeof(mce_banks_t));
66 printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
67 machine_check_poll(0, &b);
68 mce_notify_user();
69 printk(KERN_INFO "Finished machine check poll on CPU %d\n",
70 cpu);
71 }
72 kfree(dm);
73}
74
75/* Error injection interface */
76static ssize_t mce_write(struct file *filp, const char __user *ubuf,
77 size_t usize, loff_t *off)
78{
79 struct delayed_mce *dm;
80 struct mce m;
81
82 if (!capable(CAP_SYS_ADMIN))
83 return -EPERM;
84 /*
85 * There are some cases where real MSR reads could slip
86 * through.
87 */
88 if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
89 return -EIO;
90
91 if ((unsigned long)usize > sizeof(struct mce))
92 usize = sizeof(struct mce);
93 if (copy_from_user(&m, ubuf, usize))
94 return -EFAULT;
95
96 if (m.cpu >= num_possible_cpus() || !cpu_online(m.cpu))
97 return -EINVAL;
98
99 dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL);
100 if (!dm)
101 return -ENOMEM;
102
103 /*
104 * Need to give user space some time to set everything up,
105 * so do it a jiffie or two later everywhere.
106 * Should we use a hrtimer here for better synchronization?
107 */
108 memcpy(&dm->m, &m, sizeof(struct mce));
109 setup_timer(&dm->timer, raise_mce, (unsigned long)dm);
110 dm->timer.expires = jiffies + 2;
111 add_timer_on(&dm->timer, m.cpu);
112 return usize;
113}
114
115static int inject_init(void)
116{
117 printk(KERN_INFO "Machine check injector initialized\n");
118 mce_chrdev_ops.write = mce_write;
119 return 0;
120}
121
122module_init(inject_init);
123/*
124 * Cannot tolerate unloading currently because we cannot
125 * guarantee all openers of mce_chrdev will get a reference to us.
126 */
127MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 6fb0b359d2a..1d0aa9c4e15 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1,46 +1,61 @@
1/* 1/*
2 * Machine check handler. 2 * Machine check handler.
3 *
3 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. 4 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
4 * Rest from unknown author(s). 5 * Rest from unknown author(s).
5 * 2004 Andi Kleen. Rewrote most of it. 6 * 2004 Andi Kleen. Rewrote most of it.
6 * Copyright 2008 Intel Corporation 7 * Copyright 2008 Intel Corporation
7 * Author: Andi Kleen 8 * Author: Andi Kleen
8 */ 9 */
9 10#include <linux/thread_info.h>
10#include <linux/init.h> 11#include <linux/capability.h>
11#include <linux/types.h> 12#include <linux/miscdevice.h>
13#include <linux/ratelimit.h>
14#include <linux/kallsyms.h>
15#include <linux/rcupdate.h>
16#include <linux/kobject.h>
17#include <linux/uaccess.h>
18#include <linux/kdebug.h>
12#include <linux/kernel.h> 19#include <linux/kernel.h>
13#include <linux/sched.h> 20#include <linux/percpu.h>
14#include <linux/smp_lock.h>
15#include <linux/string.h> 21#include <linux/string.h>
16#include <linux/rcupdate.h>
17#include <linux/kallsyms.h>
18#include <linux/sysdev.h> 22#include <linux/sysdev.h>
19#include <linux/miscdevice.h>
20#include <linux/fs.h>
21#include <linux/capability.h>
22#include <linux/cpu.h>
23#include <linux/percpu.h>
24#include <linux/poll.h>
25#include <linux/thread_info.h>
26#include <linux/ctype.h> 23#include <linux/ctype.h>
27#include <linux/kmod.h> 24#include <linux/sched.h>
28#include <linux/kdebug.h>
29#include <linux/kobject.h>
30#include <linux/sysfs.h> 25#include <linux/sysfs.h>
31#include <linux/ratelimit.h> 26#include <linux/types.h>
27#include <linux/init.h>
28#include <linux/kmod.h>
29#include <linux/poll.h>
30#include <linux/cpu.h>
31#include <linux/smp.h>
32#include <linux/fs.h>
33
32#include <asm/processor.h> 34#include <asm/processor.h>
33#include <asm/msr.h>
34#include <asm/mce.h>
35#include <asm/uaccess.h>
36#include <asm/smp.h>
37#include <asm/idle.h> 35#include <asm/idle.h>
36#include <asm/mce.h>
37#include <asm/msr.h>
38 38
39#define MISC_MCELOG_MINOR 227 39#include "mce.h"
40 40
41atomic_t mce_entry; 41/* Handle unconfigured int18 (should never happen) */
42static void unexpected_machine_check(struct pt_regs *regs, long error_code)
43{
44 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
45 smp_processor_id());
46}
47
48/* Call the installed machine check handler for this CPU setup. */
49void (*machine_check_vector)(struct pt_regs *, long error_code) =
50 unexpected_machine_check;
42 51
43static int mce_dont_init; 52int mce_disabled;
53
54#ifdef CONFIG_X86_NEW_MCE
55
56#define MISC_MCELOG_MINOR 227
57
58atomic_t mce_entry;
44 59
45/* 60/*
46 * Tolerant levels: 61 * Tolerant levels:
@@ -49,16 +64,17 @@ static int mce_dont_init;
49 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors 64 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
50 * 3: never panic or SIGBUS, log all errors (for testing only) 65 * 3: never panic or SIGBUS, log all errors (for testing only)
51 */ 66 */
52static int tolerant = 1; 67static int tolerant = 1;
53static int banks; 68static int banks;
54static u64 *bank; 69static u64 *bank;
55static unsigned long notify_user; 70static unsigned long notify_user;
56static int rip_msr; 71static int rip_msr;
57static int mce_bootlog = -1; 72static int mce_bootlog = -1;
58static atomic_t mce_events;
59 73
60static char trigger[128]; 74static char trigger[128];
61static char *trigger_argv[2] = { trigger, NULL }; 75static char *trigger_argv[2] = { trigger, NULL };
76
77static unsigned long dont_init_banks;
62 78
63static DECLARE_WAIT_QUEUE_HEAD(mce_wait); 79static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
64 80
@@ -67,6 +83,11 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
67 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 83 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
68}; 84};
69 85
86static inline int skip_bank_init(int i)
87{
88 return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
89}
90
70/* Do initial initialization of a struct mce */ 91/* Do initial initialization of a struct mce */
71void mce_setup(struct mce *m) 92void mce_setup(struct mce *m)
72{ 93{
@@ -75,6 +96,9 @@ void mce_setup(struct mce *m)
75 rdtscll(m->tsc); 96 rdtscll(m->tsc);
76} 97}
77 98
99DEFINE_PER_CPU(struct mce, injectm);
100EXPORT_PER_CPU_SYMBOL_GPL(injectm);
101
78/* 102/*
79 * Lockless MCE logging infrastructure. 103 * Lockless MCE logging infrastructure.
80 * This avoids deadlocks on printk locks without having to break locks. Also 104 * This avoids deadlocks on printk locks without having to break locks. Also
@@ -89,19 +113,23 @@ static struct mce_log mcelog = {
89void mce_log(struct mce *mce) 113void mce_log(struct mce *mce)
90{ 114{
91 unsigned next, entry; 115 unsigned next, entry;
92 atomic_inc(&mce_events); 116
93 mce->finished = 0; 117 mce->finished = 0;
94 wmb(); 118 wmb();
95 for (;;) { 119 for (;;) {
96 entry = rcu_dereference(mcelog.next); 120 entry = rcu_dereference(mcelog.next);
97 for (;;) { 121 for (;;) {
98 /* When the buffer fills up discard new entries. Assume 122 /*
99 that the earlier errors are the more interesting. */ 123 * When the buffer fills up discard new entries.
124 * Assume that the earlier errors are the more
125 * interesting ones:
126 */
100 if (entry >= MCE_LOG_LEN) { 127 if (entry >= MCE_LOG_LEN) {
101 set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); 128 set_bit(MCE_OVERFLOW,
129 (unsigned long *)&mcelog.flags);
102 return; 130 return;
103 } 131 }
104 /* Old left over entry. Skip. */ 132 /* Old left over entry. Skip: */
105 if (mcelog.entry[entry].finished) { 133 if (mcelog.entry[entry].finished) {
106 entry++; 134 entry++;
107 continue; 135 continue;
@@ -147,15 +175,16 @@ static void print_mce(struct mce *m)
147 "and contact your hardware vendor\n"); 175 "and contact your hardware vendor\n");
148} 176}
149 177
150static void mce_panic(char *msg, struct mce *backup, unsigned long start) 178static void mce_panic(char *msg, struct mce *backup, u64 start)
151{ 179{
152 int i; 180 int i;
153 181
154 oops_begin(); 182 bust_spinlocks(1);
183 console_verbose();
155 for (i = 0; i < MCE_LOG_LEN; i++) { 184 for (i = 0; i < MCE_LOG_LEN; i++) {
156 unsigned long tsc = mcelog.entry[i].tsc; 185 u64 tsc = mcelog.entry[i].tsc;
157 186
158 if (time_before(tsc, start)) 187 if ((s64)(tsc - start) < 0)
159 continue; 188 continue;
160 print_mce(&mcelog.entry[i]); 189 print_mce(&mcelog.entry[i]);
161 if (backup && mcelog.entry[i].tsc == backup->tsc) 190 if (backup && mcelog.entry[i].tsc == backup->tsc)
@@ -166,9 +195,52 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start)
166 panic(msg); 195 panic(msg);
167} 196}
168 197
198/* Support code for software error injection */
199
200static int msr_to_offset(u32 msr)
201{
202 unsigned bank = __get_cpu_var(injectm.bank);
203 if (msr == rip_msr)
204 return offsetof(struct mce, ip);
205 if (msr == MSR_IA32_MC0_STATUS + bank*4)
206 return offsetof(struct mce, status);
207 if (msr == MSR_IA32_MC0_ADDR + bank*4)
208 return offsetof(struct mce, addr);
209 if (msr == MSR_IA32_MC0_MISC + bank*4)
210 return offsetof(struct mce, misc);
211 if (msr == MSR_IA32_MCG_STATUS)
212 return offsetof(struct mce, mcgstatus);
213 return -1;
214}
215
216/* MSR access wrappers used for error injection */
217static u64 mce_rdmsrl(u32 msr)
218{
219 u64 v;
220 if (__get_cpu_var(injectm).finished) {
221 int offset = msr_to_offset(msr);
222 if (offset < 0)
223 return 0;
224 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
225 }
226 rdmsrl(msr, v);
227 return v;
228}
229
230static void mce_wrmsrl(u32 msr, u64 v)
231{
232 if (__get_cpu_var(injectm).finished) {
233 int offset = msr_to_offset(msr);
234 if (offset >= 0)
235 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
236 return;
237 }
238 wrmsrl(msr, v);
239}
240
169int mce_available(struct cpuinfo_x86 *c) 241int mce_available(struct cpuinfo_x86 *c)
170{ 242{
171 if (mce_dont_init) 243 if (mce_disabled)
172 return 0; 244 return 0;
173 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); 245 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
174} 246}
@@ -185,7 +257,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
185 if (rip_msr) { 257 if (rip_msr) {
186 /* Assume the RIP in the MSR is exact. Is this true? */ 258 /* Assume the RIP in the MSR is exact. Is this true? */
187 m->mcgstatus |= MCG_STATUS_EIPV; 259 m->mcgstatus |= MCG_STATUS_EIPV;
188 rdmsrl(rip_msr, m->ip); 260 m->ip = mce_rdmsrl(rip_msr);
189 m->cs = 0; 261 m->cs = 0;
190 } 262 }
191} 263}
@@ -203,7 +275,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
203 275
204 mce_setup(&m); 276 mce_setup(&m);
205 277
206 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); 278 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
207 for (i = 0; i < banks; i++) { 279 for (i = 0; i < banks; i++) {
208 if (!bank[i] || !test_bit(i, *b)) 280 if (!bank[i] || !test_bit(i, *b))
209 continue; 281 continue;
@@ -214,7 +286,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
214 m.tsc = 0; 286 m.tsc = 0;
215 287
216 barrier(); 288 barrier();
217 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); 289 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
218 if (!(m.status & MCI_STATUS_VAL)) 290 if (!(m.status & MCI_STATUS_VAL))
219 continue; 291 continue;
220 292
@@ -229,9 +301,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
229 continue; 301 continue;
230 302
231 if (m.status & MCI_STATUS_MISCV) 303 if (m.status & MCI_STATUS_MISCV)
232 rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); 304 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
233 if (m.status & MCI_STATUS_ADDRV) 305 if (m.status & MCI_STATUS_ADDRV)
234 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); 306 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
235 307
236 if (!(flags & MCP_TIMESTAMP)) 308 if (!(flags & MCP_TIMESTAMP))
237 m.tsc = 0; 309 m.tsc = 0;
@@ -247,14 +319,17 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
247 /* 319 /*
248 * Clear state for this bank. 320 * Clear state for this bank.
249 */ 321 */
250 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 322 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
251 } 323 }
252 324
253 /* 325 /*
254 * Don't clear MCG_STATUS here because it's only defined for 326 * Don't clear MCG_STATUS here because it's only defined for
255 * exceptions. 327 * exceptions.
256 */ 328 */
329
330 sync_core();
257} 331}
332EXPORT_SYMBOL_GPL(machine_check_poll);
258 333
259/* 334/*
260 * The actual machine check handler. This only handles real 335 * The actual machine check handler. This only handles real
@@ -264,12 +339,12 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
264 * implies that most kernel services cannot be safely used. Don't even 339 * implies that most kernel services cannot be safely used. Don't even
265 * think about putting a printk in there! 340 * think about putting a printk in there!
266 */ 341 */
267void do_machine_check(struct pt_regs * regs, long error_code) 342void do_machine_check(struct pt_regs *regs, long error_code)
268{ 343{
269 struct mce m, panicm; 344 struct mce m, panicm;
345 int panicm_found = 0;
270 u64 mcestart = 0; 346 u64 mcestart = 0;
271 int i; 347 int i;
272 int panicm_found = 0;
273 /* 348 /*
274 * If no_way_out gets set, there is no safe way to recover from this 349 * If no_way_out gets set, there is no safe way to recover from this
275 * MCE. If tolerant is cranked up, we'll try anyway. 350 * MCE. If tolerant is cranked up, we'll try anyway.
@@ -286,13 +361,14 @@ void do_machine_check(struct pt_regs * regs, long error_code)
286 361
287 if (notify_die(DIE_NMI, "machine check", regs, error_code, 362 if (notify_die(DIE_NMI, "machine check", regs, error_code,
288 18, SIGKILL) == NOTIFY_STOP) 363 18, SIGKILL) == NOTIFY_STOP)
289 goto out2; 364 goto out;
290 if (!banks) 365 if (!banks)
291 goto out2; 366 goto out;
292 367
293 mce_setup(&m); 368 mce_setup(&m);
294 369
295 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); 370 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
371
296 /* if the restart IP is not valid, we're done for */ 372 /* if the restart IP is not valid, we're done for */
297 if (!(m.mcgstatus & MCG_STATUS_RIPV)) 373 if (!(m.mcgstatus & MCG_STATUS_RIPV))
298 no_way_out = 1; 374 no_way_out = 1;
@@ -309,7 +385,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
309 m.addr = 0; 385 m.addr = 0;
310 m.bank = i; 386 m.bank = i;
311 387
312 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); 388 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
313 if ((m.status & MCI_STATUS_VAL) == 0) 389 if ((m.status & MCI_STATUS_VAL) == 0)
314 continue; 390 continue;
315 391
@@ -349,30 +425,36 @@ void do_machine_check(struct pt_regs * regs, long error_code)
349 } 425 }
350 426
351 if (m.status & MCI_STATUS_MISCV) 427 if (m.status & MCI_STATUS_MISCV)
352 rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); 428 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
353 if (m.status & MCI_STATUS_ADDRV) 429 if (m.status & MCI_STATUS_ADDRV)
354 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); 430 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
355 431
356 mce_get_rip(&m, regs); 432 mce_get_rip(&m, regs);
357 mce_log(&m); 433 mce_log(&m);
358 434
359 /* Did this bank cause the exception? */ 435 /*
360 /* Assume that the bank with uncorrectable errors did it, 436 * Did this bank cause the exception?
361 and that there is only a single one. */ 437 *
362 if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) { 438 * Assume that the bank with uncorrectable errors did it,
439 * and that there is only a single one:
440 */
441 if ((m.status & MCI_STATUS_UC) &&
442 (m.status & MCI_STATUS_EN)) {
363 panicm = m; 443 panicm = m;
364 panicm_found = 1; 444 panicm_found = 1;
365 } 445 }
366 } 446 }
367 447
368 /* If we didn't find an uncorrectable error, pick 448 /*
369 the last one (shouldn't happen, just being safe). */ 449 * If we didn't find an uncorrectable error, pick
450 * the last one (shouldn't happen, just being safe).
451 */
370 if (!panicm_found) 452 if (!panicm_found)
371 panicm = m; 453 panicm = m;
372 454
373 /* 455 /*
374 * If we have decided that we just CAN'T continue, and the user 456 * If we have decided that we just CAN'T continue, and the user
375 * has not set tolerant to an insane level, give up and die. 457 * has not set tolerant to an insane level, give up and die.
376 */ 458 */
377 if (no_way_out && tolerant < 3) 459 if (no_way_out && tolerant < 3)
378 mce_panic("Machine check", &panicm, mcestart); 460 mce_panic("Machine check", &panicm, mcestart);
@@ -414,12 +496,14 @@ void do_machine_check(struct pt_regs * regs, long error_code)
414 /* the last thing we do is clear state */ 496 /* the last thing we do is clear state */
415 for (i = 0; i < banks; i++) { 497 for (i = 0; i < banks; i++) {
416 if (test_bit(i, toclear)) 498 if (test_bit(i, toclear))
417 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 499 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
418 } 500 }
419 wrmsrl(MSR_IA32_MCG_STATUS, 0); 501 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
420 out2: 502out:
421 atomic_dec(&mce_entry); 503 atomic_dec(&mce_entry);
504 sync_core();
422} 505}
506EXPORT_SYMBOL_GPL(do_machine_check);
423 507
424#ifdef CONFIG_X86_MCE_INTEL 508#ifdef CONFIG_X86_MCE_INTEL
425/*** 509/***
@@ -451,10 +535,9 @@ void mce_log_therm_throt_event(__u64 status)
451 * poller finds an MCE, poll 2x faster. When the poller finds no more 535 * poller finds an MCE, poll 2x faster. When the poller finds no more
452 * errors, poll 2x slower (up to check_interval seconds). 536 * errors, poll 2x slower (up to check_interval seconds).
453 */ 537 */
454
455static int check_interval = 5 * 60; /* 5 minutes */ 538static int check_interval = 5 * 60; /* 5 minutes */
539
456static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ 540static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
457static void mcheck_timer(unsigned long);
458static DEFINE_PER_CPU(struct timer_list, mce_timer); 541static DEFINE_PER_CPU(struct timer_list, mce_timer);
459 542
460static void mcheck_timer(unsigned long data) 543static void mcheck_timer(unsigned long data)
@@ -464,20 +547,20 @@ static void mcheck_timer(unsigned long data)
464 547
465 WARN_ON(smp_processor_id() != data); 548 WARN_ON(smp_processor_id() != data);
466 549
467 if (mce_available(&current_cpu_data)) 550 if (mce_available(&current_cpu_data)) {
468 machine_check_poll(MCP_TIMESTAMP, 551 machine_check_poll(MCP_TIMESTAMP,
469 &__get_cpu_var(mce_poll_banks)); 552 &__get_cpu_var(mce_poll_banks));
553 }
470 554
471 /* 555 /*
472 * Alert userspace if needed. If we logged an MCE, reduce the 556 * Alert userspace if needed. If we logged an MCE, reduce the
473 * polling interval, otherwise increase the polling interval. 557 * polling interval, otherwise increase the polling interval.
474 */ 558 */
475 n = &__get_cpu_var(next_interval); 559 n = &__get_cpu_var(next_interval);
476 if (mce_notify_user()) { 560 if (mce_notify_user())
477 *n = max(*n/2, HZ/100); 561 *n = max(*n/2, HZ/100);
478 } else { 562 else
479 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); 563 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
480 }
481 564
482 t->expires = jiffies + *n; 565 t->expires = jiffies + *n;
483 add_timer(t); 566 add_timer(t);
@@ -501,6 +584,7 @@ int mce_notify_user(void)
501 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); 584 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
502 585
503 clear_thread_flag(TIF_MCE_NOTIFY); 586 clear_thread_flag(TIF_MCE_NOTIFY);
587
504 if (test_and_clear_bit(0, &notify_user)) { 588 if (test_and_clear_bit(0, &notify_user)) {
505 wake_up_interruptible(&mce_wait); 589 wake_up_interruptible(&mce_wait);
506 590
@@ -519,39 +603,21 @@ int mce_notify_user(void)
519 } 603 }
520 return 0; 604 return 0;
521} 605}
522 606EXPORT_SYMBOL_GPL(mce_notify_user);
523/* see if the idle task needs to notify userspace */
524static int
525mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk)
526{
527 /* IDLE_END should be safe - interrupts are back on */
528 if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY))
529 mce_notify_user();
530
531 return NOTIFY_OK;
532}
533
534static struct notifier_block mce_idle_notifier = {
535 .notifier_call = mce_idle_callback,
536};
537
538static __init int periodic_mcheck_init(void)
539{
540 idle_notifier_register(&mce_idle_notifier);
541 return 0;
542}
543__initcall(periodic_mcheck_init);
544 607
545/* 608/*
546 * Initialize Machine Checks for a CPU. 609 * Initialize Machine Checks for a CPU.
547 */ 610 */
548static int mce_cap_init(void) 611static int mce_cap_init(void)
549{ 612{
550 u64 cap;
551 unsigned b; 613 unsigned b;
614 u64 cap;
552 615
553 rdmsrl(MSR_IA32_MCG_CAP, cap); 616 rdmsrl(MSR_IA32_MCG_CAP, cap);
554 b = cap & 0xff; 617
618 b = cap & MCG_BANKCNT_MASK;
619 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
620
555 if (b > MAX_NR_BANKS) { 621 if (b > MAX_NR_BANKS) {
556 printk(KERN_WARNING 622 printk(KERN_WARNING
557 "MCE: Using only %u machine check banks out of %u\n", 623 "MCE: Using only %u machine check banks out of %u\n",
@@ -570,17 +636,17 @@ static int mce_cap_init(void)
570 } 636 }
571 637
572 /* Use accurate RIP reporting if available. */ 638 /* Use accurate RIP reporting if available. */
573 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) 639 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
574 rip_msr = MSR_IA32_MCG_EIP; 640 rip_msr = MSR_IA32_MCG_EIP;
575 641
576 return 0; 642 return 0;
577} 643}
578 644
579static void mce_init(void *dummy) 645static void mce_init(void)
580{ 646{
647 mce_banks_t all_banks;
581 u64 cap; 648 u64 cap;
582 int i; 649 int i;
583 mce_banks_t all_banks;
584 650
585 /* 651 /*
586 * Log the machine checks left over from the previous reset. 652 * Log the machine checks left over from the previous reset.
@@ -595,6 +661,8 @@ static void mce_init(void *dummy)
595 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 661 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
596 662
597 for (i = 0; i < banks; i++) { 663 for (i = 0; i < banks; i++) {
664 if (skip_bank_init(i))
665 continue;
598 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); 666 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
599 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 667 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
600 } 668 }
@@ -605,16 +673,57 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
605{ 673{
606 /* This should be disabled by the BIOS, but isn't always */ 674 /* This should be disabled by the BIOS, but isn't always */
607 if (c->x86_vendor == X86_VENDOR_AMD) { 675 if (c->x86_vendor == X86_VENDOR_AMD) {
608 if (c->x86 == 15 && banks > 4) 676 if (c->x86 == 15 && banks > 4) {
609 /* disable GART TBL walk error reporting, which trips off 677 /*
610 incorrectly with the IOMMU & 3ware & Cerberus. */ 678 * disable GART TBL walk error reporting, which
679 * trips off incorrectly with the IOMMU & 3ware
680 * & Cerberus:
681 */
611 clear_bit(10, (unsigned long *)&bank[4]); 682 clear_bit(10, (unsigned long *)&bank[4]);
612 if(c->x86 <= 17 && mce_bootlog < 0) 683 }
613 /* Lots of broken BIOS around that don't clear them 684 if (c->x86 <= 17 && mce_bootlog < 0) {
614 by default and leave crap in there. Don't log. */ 685 /*
686 * Lots of broken BIOS around that don't clear them
687 * by default and leave crap in there. Don't log:
688 */
615 mce_bootlog = 0; 689 mce_bootlog = 0;
690 }
691 /*
692 * Various K7s with broken bank 0 around. Always disable
693 * by default.
694 */
695 if (c->x86 == 6)
696 bank[0] = 0;
616 } 697 }
617 698
699 if (c->x86_vendor == X86_VENDOR_INTEL) {
700 /*
701 * SDM documents that on family 6 bank 0 should not be written
702 * because it aliases to another special BIOS controlled
703 * register.
704 * But it's not aliased anymore on model 0x1a+
705 * Don't ignore bank 0 completely because there could be a
706 * valid event later, merely don't write CTL0.
707 */
708
709 if (c->x86 == 6 && c->x86_model < 0x1A)
710 __set_bit(0, &dont_init_banks);
711 }
712}
713
714static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
715{
716 if (c->x86 != 5)
717 return;
718 switch (c->x86_vendor) {
719 case X86_VENDOR_INTEL:
720 if (mce_p5_enabled())
721 intel_p5_mcheck_init(c);
722 break;
723 case X86_VENDOR_CENTAUR:
724 winchip_mcheck_init(c);
725 break;
726 }
618} 727}
619 728
620static void mce_cpu_features(struct cpuinfo_x86 *c) 729static void mce_cpu_features(struct cpuinfo_x86 *c)
@@ -646,20 +755,27 @@ static void mce_init_timer(void)
646 755
647/* 756/*
648 * Called for each booted CPU to set up machine checks. 757 * Called for each booted CPU to set up machine checks.
649 * Must be called with preempt off. 758 * Must be called with preempt off:
650 */ 759 */
651void __cpuinit mcheck_init(struct cpuinfo_x86 *c) 760void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
652{ 761{
762 if (mce_disabled)
763 return;
764
765 mce_ancient_init(c);
766
653 if (!mce_available(c)) 767 if (!mce_available(c))
654 return; 768 return;
655 769
656 if (mce_cap_init() < 0) { 770 if (mce_cap_init() < 0) {
657 mce_dont_init = 1; 771 mce_disabled = 1;
658 return; 772 return;
659 } 773 }
660 mce_cpu_quirks(c); 774 mce_cpu_quirks(c);
661 775
662 mce_init(NULL); 776 machine_check_vector = do_machine_check;
777
778 mce_init();
663 mce_cpu_features(c); 779 mce_cpu_features(c);
664 mce_init_timer(); 780 mce_init_timer();
665} 781}
@@ -669,17 +785,16 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
669 */ 785 */
670 786
671static DEFINE_SPINLOCK(mce_state_lock); 787static DEFINE_SPINLOCK(mce_state_lock);
672static int open_count; /* #times opened */ 788static int open_count; /* #times opened */
673static int open_exclu; /* already open exclusive? */ 789static int open_exclu; /* already open exclusive? */
674 790
675static int mce_open(struct inode *inode, struct file *file) 791static int mce_open(struct inode *inode, struct file *file)
676{ 792{
677 lock_kernel();
678 spin_lock(&mce_state_lock); 793 spin_lock(&mce_state_lock);
679 794
680 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { 795 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
681 spin_unlock(&mce_state_lock); 796 spin_unlock(&mce_state_lock);
682 unlock_kernel(); 797
683 return -EBUSY; 798 return -EBUSY;
684 } 799 }
685 800
@@ -688,7 +803,6 @@ static int mce_open(struct inode *inode, struct file *file)
688 open_count++; 803 open_count++;
689 804
690 spin_unlock(&mce_state_lock); 805 spin_unlock(&mce_state_lock);
691 unlock_kernel();
692 806
693 return nonseekable_open(inode, file); 807 return nonseekable_open(inode, file);
694} 808}
@@ -712,13 +826,14 @@ static void collect_tscs(void *data)
712 rdtscll(cpu_tsc[smp_processor_id()]); 826 rdtscll(cpu_tsc[smp_processor_id()]);
713} 827}
714 828
829static DEFINE_MUTEX(mce_read_mutex);
830
715static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, 831static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
716 loff_t *off) 832 loff_t *off)
717{ 833{
834 char __user *buf = ubuf;
718 unsigned long *cpu_tsc; 835 unsigned long *cpu_tsc;
719 static DEFINE_MUTEX(mce_read_mutex);
720 unsigned prev, next; 836 unsigned prev, next;
721 char __user *buf = ubuf;
722 int i, err; 837 int i, err;
723 838
724 cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); 839 cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
@@ -732,6 +847,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
732 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { 847 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
733 mutex_unlock(&mce_read_mutex); 848 mutex_unlock(&mce_read_mutex);
734 kfree(cpu_tsc); 849 kfree(cpu_tsc);
850
735 return -EINVAL; 851 return -EINVAL;
736 } 852 }
737 853
@@ -770,6 +886,7 @@ timeout:
770 * synchronize. 886 * synchronize.
771 */ 887 */
772 on_each_cpu(collect_tscs, cpu_tsc, 1); 888 on_each_cpu(collect_tscs, cpu_tsc, 1);
889
773 for (i = next; i < MCE_LOG_LEN; i++) { 890 for (i = next; i < MCE_LOG_LEN; i++) {
774 if (mcelog.entry[i].finished && 891 if (mcelog.entry[i].finished &&
775 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { 892 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
@@ -782,6 +899,7 @@ timeout:
782 } 899 }
783 mutex_unlock(&mce_read_mutex); 900 mutex_unlock(&mce_read_mutex);
784 kfree(cpu_tsc); 901 kfree(cpu_tsc);
902
785 return err ? -EFAULT : buf - ubuf; 903 return err ? -EFAULT : buf - ubuf;
786} 904}
787 905
@@ -799,6 +917,7 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
799 917
800 if (!capable(CAP_SYS_ADMIN)) 918 if (!capable(CAP_SYS_ADMIN))
801 return -EPERM; 919 return -EPERM;
920
802 switch (cmd) { 921 switch (cmd) {
803 case MCE_GET_RECORD_LEN: 922 case MCE_GET_RECORD_LEN:
804 return put_user(sizeof(struct mce), p); 923 return put_user(sizeof(struct mce), p);
@@ -810,6 +929,7 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
810 do { 929 do {
811 flags = mcelog.flags; 930 flags = mcelog.flags;
812 } while (cmpxchg(&mcelog.flags, flags, 0) != flags); 931 } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
932
813 return put_user(flags, p); 933 return put_user(flags, p);
814 } 934 }
815 default: 935 default:
@@ -817,13 +937,15 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
817 } 937 }
818} 938}
819 939
820static const struct file_operations mce_chrdev_ops = { 940/* Modified in mce-inject.c, so not static or const */
821 .open = mce_open, 941struct file_operations mce_chrdev_ops = {
822 .release = mce_release, 942 .open = mce_open,
823 .read = mce_read, 943 .release = mce_release,
824 .poll = mce_poll, 944 .read = mce_read,
825 .unlocked_ioctl = mce_ioctl, 945 .poll = mce_poll,
946 .unlocked_ioctl = mce_ioctl,
826}; 947};
948EXPORT_SYMBOL_GPL(mce_chrdev_ops);
827 949
828static struct miscdevice mce_log_device = { 950static struct miscdevice mce_log_device = {
829 MISC_MCELOG_MINOR, 951 MISC_MCELOG_MINOR,
@@ -832,33 +954,31 @@ static struct miscdevice mce_log_device = {
832}; 954};
833 955
834/* 956/*
835 * Old style boot options parsing. Only for compatibility. 957 * mce=off disables machine check
958 * mce=TOLERANCELEVEL (number, see above)
959 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
960 * mce=nobootlog Don't log MCEs from before booting.
836 */ 961 */
837static int __init mcheck_disable(char *str)
838{
839 mce_dont_init = 1;
840 return 1;
841}
842
843/* mce=off disables machine check.
844 mce=TOLERANCELEVEL (number, see above)
845 mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
846 mce=nobootlog Don't log MCEs from before booting. */
847static int __init mcheck_enable(char *str) 962static int __init mcheck_enable(char *str)
848{ 963{
964 if (*str == 0)
965 enable_p5_mce();
966 if (*str == '=')
967 str++;
849 if (!strcmp(str, "off")) 968 if (!strcmp(str, "off"))
850 mce_dont_init = 1; 969 mce_disabled = 1;
851 else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) 970 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
852 mce_bootlog = str[0] == 'b'; 971 mce_bootlog = (str[0] == 'b');
853 else if (isdigit(str[0])) 972 else if (isdigit(str[0]))
854 get_option(&str, &tolerant); 973 get_option(&str, &tolerant);
855 else 974 else {
856 printk("mce= argument %s ignored. Please use /sys", str); 975 printk(KERN_INFO "mce argument %s ignored. Please use /sys\n",
976 str);
977 return 0;
978 }
857 return 1; 979 return 1;
858} 980}
859 981__setup("mce", mcheck_enable);
860__setup("nomce", mcheck_disable);
861__setup("mce=", mcheck_enable);
862 982
863/* 983/*
864 * Sysfs support 984 * Sysfs support
@@ -872,8 +992,10 @@ static int mce_disable(void)
872{ 992{
873 int i; 993 int i;
874 994
875 for (i = 0; i < banks; i++) 995 for (i = 0; i < banks; i++) {
876 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); 996 if (!skip_bank_init(i))
997 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
998 }
877 return 0; 999 return 0;
878} 1000}
879 1001
@@ -887,13 +1009,16 @@ static int mce_shutdown(struct sys_device *dev)
887 return mce_disable(); 1009 return mce_disable();
888} 1010}
889 1011
890/* On resume clear all MCE state. Don't want to see leftovers from the BIOS. 1012/*
891 Only one CPU is active at this time, the others get readded later using 1013 * On resume clear all MCE state. Don't want to see leftovers from the BIOS.
892 CPU hotplug. */ 1014 * Only one CPU is active at this time, the others get re-added later using
1015 * CPU hotplug:
1016 */
893static int mce_resume(struct sys_device *dev) 1017static int mce_resume(struct sys_device *dev)
894{ 1018{
895 mce_init(NULL); 1019 mce_init();
896 mce_cpu_features(&current_cpu_data); 1020 mce_cpu_features(&current_cpu_data);
1021
897 return 0; 1022 return 0;
898} 1023}
899 1024
@@ -901,7 +1026,7 @@ static void mce_cpu_restart(void *data)
901{ 1026{
902 del_timer_sync(&__get_cpu_var(mce_timer)); 1027 del_timer_sync(&__get_cpu_var(mce_timer));
903 if (mce_available(&current_cpu_data)) 1028 if (mce_available(&current_cpu_data))
904 mce_init(NULL); 1029 mce_init();
905 mce_init_timer(); 1030 mce_init_timer();
906} 1031}
907 1032
@@ -912,33 +1037,16 @@ static void mce_restart(void)
912} 1037}
913 1038
914static struct sysdev_class mce_sysclass = { 1039static struct sysdev_class mce_sysclass = {
915 .suspend = mce_suspend, 1040 .suspend = mce_suspend,
916 .shutdown = mce_shutdown, 1041 .shutdown = mce_shutdown,
917 .resume = mce_resume, 1042 .resume = mce_resume,
918 .name = "machinecheck", 1043 .name = "machinecheck",
919}; 1044};
920 1045
921DEFINE_PER_CPU(struct sys_device, device_mce); 1046DEFINE_PER_CPU(struct sys_device, mce_dev);
922void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; 1047
923 1048__cpuinitdata
924/* Why are there no generic functions for this? */ 1049void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
925#define ACCESSOR(name, var, start) \
926 static ssize_t show_ ## name(struct sys_device *s, \
927 struct sysdev_attribute *attr, \
928 char *buf) { \
929 return sprintf(buf, "%lx\n", (unsigned long)var); \
930 } \
931 static ssize_t set_ ## name(struct sys_device *s, \
932 struct sysdev_attribute *attr, \
933 const char *buf, size_t siz) { \
934 char *end; \
935 unsigned long new = simple_strtoul(buf, &end, 0); \
936 if (end == buf) return -EINVAL; \
937 var = new; \
938 start; \
939 return end-buf; \
940 } \
941 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
942 1050
943static struct sysdev_attribute *bank_attrs; 1051static struct sysdev_attribute *bank_attrs;
944 1052
@@ -946,23 +1054,26 @@ static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
946 char *buf) 1054 char *buf)
947{ 1055{
948 u64 b = bank[attr - bank_attrs]; 1056 u64 b = bank[attr - bank_attrs];
1057
949 return sprintf(buf, "%llx\n", b); 1058 return sprintf(buf, "%llx\n", b);
950} 1059}
951 1060
952static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, 1061static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
953 const char *buf, size_t siz) 1062 const char *buf, size_t size)
954{ 1063{
955 char *end; 1064 u64 new;
956 u64 new = simple_strtoull(buf, &end, 0); 1065
957 if (end == buf) 1066 if (strict_strtoull(buf, 0, &new) < 0)
958 return -EINVAL; 1067 return -EINVAL;
1068
959 bank[attr - bank_attrs] = new; 1069 bank[attr - bank_attrs] = new;
960 mce_restart(); 1070 mce_restart();
961 return end-buf; 1071
1072 return size;
962} 1073}
963 1074
964static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, 1075static ssize_t
965 char *buf) 1076show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
966{ 1077{
967 strcpy(buf, trigger); 1078 strcpy(buf, trigger);
968 strcat(buf, "\n"); 1079 strcat(buf, "\n");
@@ -970,29 +1081,48 @@ static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
970} 1081}
971 1082
972static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, 1083static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
973 const char *buf,size_t siz) 1084 const char *buf, size_t siz)
974{ 1085{
975 char *p; 1086 char *p;
976 int len; 1087 int len;
1088
977 strncpy(trigger, buf, sizeof(trigger)); 1089 strncpy(trigger, buf, sizeof(trigger));
978 trigger[sizeof(trigger)-1] = 0; 1090 trigger[sizeof(trigger)-1] = 0;
979 len = strlen(trigger); 1091 len = strlen(trigger);
980 p = strchr(trigger, '\n'); 1092 p = strchr(trigger, '\n');
981 if (*p) *p = 0; 1093
1094 if (*p)
1095 *p = 0;
1096
982 return len; 1097 return len;
983} 1098}
984 1099
1100static ssize_t store_int_with_restart(struct sys_device *s,
1101 struct sysdev_attribute *attr,
1102 const char *buf, size_t size)
1103{
1104 ssize_t ret = sysdev_store_int(s, attr, buf, size);
1105 mce_restart();
1106 return ret;
1107}
1108
985static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); 1109static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
986static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); 1110static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
987ACCESSOR(check_interval,check_interval,mce_restart()) 1111
988static struct sysdev_attribute *mce_attributes[] = { 1112static struct sysdev_ext_attribute attr_check_interval = {
989 &attr_tolerant.attr, &attr_check_interval, &attr_trigger, 1113 _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
1114 store_int_with_restart),
1115 &check_interval
1116};
1117
1118static struct sysdev_attribute *mce_attrs[] = {
1119 &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger,
990 NULL 1120 NULL
991}; 1121};
992 1122
993static cpumask_var_t mce_device_initialized; 1123static cpumask_var_t mce_dev_initialized;
994 1124
995/* Per cpu sysdev init. All of the cpus still share the same ctl bank */ 1125/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
996static __cpuinit int mce_create_device(unsigned int cpu) 1126static __cpuinit int mce_create_device(unsigned int cpu)
997{ 1127{
998 int err; 1128 int err;
@@ -1001,40 +1131,36 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1001 if (!mce_available(&boot_cpu_data)) 1131 if (!mce_available(&boot_cpu_data))
1002 return -EIO; 1132 return -EIO;
1003 1133
1004 memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); 1134 memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject));
1005 per_cpu(device_mce,cpu).id = cpu; 1135 per_cpu(mce_dev, cpu).id = cpu;
1006 per_cpu(device_mce,cpu).cls = &mce_sysclass; 1136 per_cpu(mce_dev, cpu).cls = &mce_sysclass;
1007 1137
1008 err = sysdev_register(&per_cpu(device_mce,cpu)); 1138 err = sysdev_register(&per_cpu(mce_dev, cpu));
1009 if (err) 1139 if (err)
1010 return err; 1140 return err;
1011 1141
1012 for (i = 0; mce_attributes[i]; i++) { 1142 for (i = 0; mce_attrs[i]; i++) {
1013 err = sysdev_create_file(&per_cpu(device_mce,cpu), 1143 err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1014 mce_attributes[i]);
1015 if (err) 1144 if (err)
1016 goto error; 1145 goto error;
1017 } 1146 }
1018 for (i = 0; i < banks; i++) { 1147 for (i = 0; i < banks; i++) {
1019 err = sysdev_create_file(&per_cpu(device_mce, cpu), 1148 err = sysdev_create_file(&per_cpu(mce_dev, cpu),
1020 &bank_attrs[i]); 1149 &bank_attrs[i]);
1021 if (err) 1150 if (err)
1022 goto error2; 1151 goto error2;
1023 } 1152 }
1024 cpumask_set_cpu(cpu, mce_device_initialized); 1153 cpumask_set_cpu(cpu, mce_dev_initialized);
1025 1154
1026 return 0; 1155 return 0;
1027error2: 1156error2:
1028 while (--i >= 0) { 1157 while (--i >= 0)
1029 sysdev_remove_file(&per_cpu(device_mce, cpu), 1158 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
1030 &bank_attrs[i]);
1031 }
1032error: 1159error:
1033 while (--i >= 0) { 1160 while (--i >= 0)
1034 sysdev_remove_file(&per_cpu(device_mce,cpu), 1161 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1035 mce_attributes[i]); 1162
1036 } 1163 sysdev_unregister(&per_cpu(mce_dev, cpu));
1037 sysdev_unregister(&per_cpu(device_mce,cpu));
1038 1164
1039 return err; 1165 return err;
1040} 1166}
@@ -1043,49 +1169,54 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
1043{ 1169{
1044 int i; 1170 int i;
1045 1171
1046 if (!cpumask_test_cpu(cpu, mce_device_initialized)) 1172 if (!cpumask_test_cpu(cpu, mce_dev_initialized))
1047 return; 1173 return;
1048 1174
1049 for (i = 0; mce_attributes[i]; i++) 1175 for (i = 0; mce_attrs[i]; i++)
1050 sysdev_remove_file(&per_cpu(device_mce,cpu), 1176 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1051 mce_attributes[i]); 1177
1052 for (i = 0; i < banks; i++) 1178 for (i = 0; i < banks; i++)
1053 sysdev_remove_file(&per_cpu(device_mce, cpu), 1179 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
1054 &bank_attrs[i]); 1180
1055 sysdev_unregister(&per_cpu(device_mce,cpu)); 1181 sysdev_unregister(&per_cpu(mce_dev, cpu));
1056 cpumask_clear_cpu(cpu, mce_device_initialized); 1182 cpumask_clear_cpu(cpu, mce_dev_initialized);
1057} 1183}
1058 1184
1059/* Make sure there are no machine checks on offlined CPUs. */ 1185/* Make sure there are no machine checks on offlined CPUs. */
1060static void mce_disable_cpu(void *h) 1186static void mce_disable_cpu(void *h)
1061{ 1187{
1062 int i;
1063 unsigned long action = *(unsigned long *)h; 1188 unsigned long action = *(unsigned long *)h;
1189 int i;
1064 1190
1065 if (!mce_available(&current_cpu_data)) 1191 if (!mce_available(&current_cpu_data))
1066 return; 1192 return;
1067 if (!(action & CPU_TASKS_FROZEN)) 1193 if (!(action & CPU_TASKS_FROZEN))
1068 cmci_clear(); 1194 cmci_clear();
1069 for (i = 0; i < banks; i++) 1195 for (i = 0; i < banks; i++) {
1070 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); 1196 if (!skip_bank_init(i))
1197 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
1198 }
1071} 1199}
1072 1200
1073static void mce_reenable_cpu(void *h) 1201static void mce_reenable_cpu(void *h)
1074{ 1202{
1075 int i;
1076 unsigned long action = *(unsigned long *)h; 1203 unsigned long action = *(unsigned long *)h;
1204 int i;
1077 1205
1078 if (!mce_available(&current_cpu_data)) 1206 if (!mce_available(&current_cpu_data))
1079 return; 1207 return;
1208
1080 if (!(action & CPU_TASKS_FROZEN)) 1209 if (!(action & CPU_TASKS_FROZEN))
1081 cmci_reenable(); 1210 cmci_reenable();
1082 for (i = 0; i < banks; i++) 1211 for (i = 0; i < banks; i++) {
1083 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); 1212 if (!skip_bank_init(i))
1213 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
1214 }
1084} 1215}
1085 1216
1086/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 1217/* Get notified when a cpu comes on/off. Be hotplug friendly. */
1087static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, 1218static int __cpuinit
1088 unsigned long action, void *hcpu) 1219mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
1089{ 1220{
1090 unsigned int cpu = (unsigned long)hcpu; 1221 unsigned int cpu = (unsigned long)hcpu;
1091 struct timer_list *t = &per_cpu(mce_timer, cpu); 1222 struct timer_list *t = &per_cpu(mce_timer, cpu);
@@ -1138,12 +1269,14 @@ static __init int mce_init_banks(void)
1138 1269
1139 for (i = 0; i < banks; i++) { 1270 for (i = 0; i < banks; i++) {
1140 struct sysdev_attribute *a = &bank_attrs[i]; 1271 struct sysdev_attribute *a = &bank_attrs[i];
1141 a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); 1272
1273 a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
1142 if (!a->attr.name) 1274 if (!a->attr.name)
1143 goto nomem; 1275 goto nomem;
1144 a->attr.mode = 0644; 1276
1145 a->show = show_bank; 1277 a->attr.mode = 0644;
1146 a->store = set_bank; 1278 a->show = show_bank;
1279 a->store = set_bank;
1147 } 1280 }
1148 return 0; 1281 return 0;
1149 1282
@@ -1152,6 +1285,7 @@ nomem:
1152 kfree(bank_attrs[i].attr.name); 1285 kfree(bank_attrs[i].attr.name);
1153 kfree(bank_attrs); 1286 kfree(bank_attrs);
1154 bank_attrs = NULL; 1287 bank_attrs = NULL;
1288
1155 return -ENOMEM; 1289 return -ENOMEM;
1156} 1290}
1157 1291
@@ -1163,7 +1297,7 @@ static __init int mce_init_device(void)
1163 if (!mce_available(&boot_cpu_data)) 1297 if (!mce_available(&boot_cpu_data))
1164 return -EIO; 1298 return -EIO;
1165 1299
1166 alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); 1300 alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
1167 1301
1168 err = mce_init_banks(); 1302 err = mce_init_banks();
1169 if (err) 1303 if (err)
@@ -1181,7 +1315,64 @@ static __init int mce_init_device(void)
1181 1315
1182 register_hotcpu_notifier(&mce_cpu_notifier); 1316 register_hotcpu_notifier(&mce_cpu_notifier);
1183 misc_register(&mce_log_device); 1317 misc_register(&mce_log_device);
1318
1184 return err; 1319 return err;
1185} 1320}
1186 1321
1187device_initcall(mce_init_device); 1322device_initcall(mce_init_device);
1323
1324#else /* CONFIG_X86_OLD_MCE: */
1325
1326int nr_mce_banks;
1327EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
1328
1329/* This has to be run for each processor */
1330void mcheck_init(struct cpuinfo_x86 *c)
1331{
1332 if (mce_disabled == 1)
1333 return;
1334
1335 switch (c->x86_vendor) {
1336 case X86_VENDOR_AMD:
1337 amd_mcheck_init(c);
1338 break;
1339
1340 case X86_VENDOR_INTEL:
1341 if (c->x86 == 5)
1342 intel_p5_mcheck_init(c);
1343 if (c->x86 == 6)
1344 intel_p6_mcheck_init(c);
1345 if (c->x86 == 15)
1346 intel_p4_mcheck_init(c);
1347 break;
1348
1349 case X86_VENDOR_CENTAUR:
1350 if (c->x86 == 5)
1351 winchip_mcheck_init(c);
1352 break;
1353
1354 default:
1355 break;
1356 }
1357 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks);
1358}
1359
1360static int __init mcheck_enable(char *str)
1361{
1362 mce_disabled = -1;
1363 return 1;
1364}
1365
1366__setup("mce", mcheck_enable);
1367
1368#endif /* CONFIG_X86_OLD_MCE */
1369
1370/*
1371 * Old style boot options parsing. Only for compatibility.
1372 */
1373static int __init mcheck_disable(char *str)
1374{
1375 mce_disabled = 1;
1376 return 1;
1377}
1378__setup("nomce", mcheck_disable);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
index ae9f628838f..84a552b458c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.h
+++ b/arch/x86/kernel/cpu/mcheck/mce.h
@@ -1,14 +1,38 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <asm/mce.h> 2#include <asm/mce.h>
3 3
4#ifdef CONFIG_X86_OLD_MCE
4void amd_mcheck_init(struct cpuinfo_x86 *c); 5void amd_mcheck_init(struct cpuinfo_x86 *c);
5void intel_p4_mcheck_init(struct cpuinfo_x86 *c); 6void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
6void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
7void intel_p6_mcheck_init(struct cpuinfo_x86 *c); 7void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
8#endif
9
10#ifdef CONFIG_X86_ANCIENT_MCE
11void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
8void winchip_mcheck_init(struct cpuinfo_x86 *c); 12void winchip_mcheck_init(struct cpuinfo_x86 *c);
13extern int mce_p5_enable;
14static inline int mce_p5_enabled(void) { return mce_p5_enable; }
15static inline void enable_p5_mce(void) { mce_p5_enable = 1; }
16#else
17static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
18static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
19static inline int mce_p5_enabled(void) { return 0; }
20static inline void enable_p5_mce(void) { }
21#endif
9 22
10/* Call the installed machine check handler for this CPU setup. */ 23/* Call the installed machine check handler for this CPU setup. */
11extern void (*machine_check_vector)(struct pt_regs *, long error_code); 24extern void (*machine_check_vector)(struct pt_regs *, long error_code);
12 25
26#ifdef CONFIG_X86_OLD_MCE
27
13extern int nr_mce_banks; 28extern int nr_mce_banks;
14 29
30void intel_set_thermal_handler(void);
31
32#else
33
34static inline void intel_set_thermal_handler(void) { }
35
36#endif
37
38void intel_init_thermal(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
deleted file mode 100644
index 3552119b091..00000000000
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ /dev/null
@@ -1,76 +0,0 @@
1/*
2 * mce.c - x86 Machine Check Exception Reporting
3 * (c) 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>, Dave Jones <davej@redhat.com>
4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/module.h>
10#include <linux/smp.h>
11#include <linux/thread_info.h>
12
13#include <asm/processor.h>
14#include <asm/system.h>
15#include <asm/mce.h>
16
17#include "mce.h"
18
19int mce_disabled;
20int nr_mce_banks;
21
22EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
23
24/* Handle unconfigured int18 (should never happen) */
25static void unexpected_machine_check(struct pt_regs *regs, long error_code)
26{
27 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
28}
29
30/* Call the installed machine check handler for this CPU setup. */
31void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
32
33/* This has to be run for each processor */
34void mcheck_init(struct cpuinfo_x86 *c)
35{
36 if (mce_disabled == 1)
37 return;
38
39 switch (c->x86_vendor) {
40 case X86_VENDOR_AMD:
41 amd_mcheck_init(c);
42 break;
43
44 case X86_VENDOR_INTEL:
45 if (c->x86 == 5)
46 intel_p5_mcheck_init(c);
47 if (c->x86 == 6)
48 intel_p6_mcheck_init(c);
49 if (c->x86 == 15)
50 intel_p4_mcheck_init(c);
51 break;
52
53 case X86_VENDOR_CENTAUR:
54 if (c->x86 == 5)
55 winchip_mcheck_init(c);
56 break;
57
58 default:
59 break;
60 }
61}
62
63static int __init mcheck_disable(char *str)
64{
65 mce_disabled = 1;
66 return 1;
67}
68
69static int __init mcheck_enable(char *str)
70{
71 mce_disabled = -1;
72 return 1;
73}
74
75__setup("nomce", mcheck_disable);
76__setup("mce", mcheck_enable);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 56dde9c4bc9..ddae21620bd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -13,22 +13,22 @@
13 * 13 *
14 * All MC4_MISCi registers are shared between multi-cores 14 * All MC4_MISCi registers are shared between multi-cores
15 */ 15 */
16
17#include <linux/cpu.h>
18#include <linux/errno.h>
19#include <linux/init.h>
20#include <linux/interrupt.h> 16#include <linux/interrupt.h>
21#include <linux/kobject.h>
22#include <linux/notifier.h> 17#include <linux/notifier.h>
23#include <linux/sched.h> 18#include <linux/kobject.h>
24#include <linux/smp.h> 19#include <linux/percpu.h>
25#include <linux/sysdev.h> 20#include <linux/sysdev.h>
21#include <linux/errno.h>
22#include <linux/sched.h>
26#include <linux/sysfs.h> 23#include <linux/sysfs.h>
24#include <linux/init.h>
25#include <linux/cpu.h>
26#include <linux/smp.h>
27
27#include <asm/apic.h> 28#include <asm/apic.h>
29#include <asm/idle.h>
28#include <asm/mce.h> 30#include <asm/mce.h>
29#include <asm/msr.h> 31#include <asm/msr.h>
30#include <asm/percpu.h>
31#include <asm/idle.h>
32 32
33#define PFX "mce_threshold: " 33#define PFX "mce_threshold: "
34#define VERSION "version 1.1.1" 34#define VERSION "version 1.1.1"
@@ -48,26 +48,26 @@
48#define MCG_XBLK_ADDR 0xC0000400 48#define MCG_XBLK_ADDR 0xC0000400
49 49
50struct threshold_block { 50struct threshold_block {
51 unsigned int block; 51 unsigned int block;
52 unsigned int bank; 52 unsigned int bank;
53 unsigned int cpu; 53 unsigned int cpu;
54 u32 address; 54 u32 address;
55 u16 interrupt_enable; 55 u16 interrupt_enable;
56 u16 threshold_limit; 56 u16 threshold_limit;
57 struct kobject kobj; 57 struct kobject kobj;
58 struct list_head miscj; 58 struct list_head miscj;
59}; 59};
60 60
61/* defaults used early on boot */ 61/* defaults used early on boot */
62static struct threshold_block threshold_defaults = { 62static struct threshold_block threshold_defaults = {
63 .interrupt_enable = 0, 63 .interrupt_enable = 0,
64 .threshold_limit = THRESHOLD_MAX, 64 .threshold_limit = THRESHOLD_MAX,
65}; 65};
66 66
67struct threshold_bank { 67struct threshold_bank {
68 struct kobject *kobj; 68 struct kobject *kobj;
69 struct threshold_block *blocks; 69 struct threshold_block *blocks;
70 cpumask_var_t cpus; 70 cpumask_var_t cpus;
71}; 71};
72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); 72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
73 73
@@ -86,9 +86,9 @@ static void amd_threshold_interrupt(void);
86 */ 86 */
87 87
88struct thresh_restart { 88struct thresh_restart {
89 struct threshold_block *b; 89 struct threshold_block *b;
90 int reset; 90 int reset;
91 u16 old_limit; 91 u16 old_limit;
92}; 92};
93 93
94/* must be called with correct cpu affinity */ 94/* must be called with correct cpu affinity */
@@ -110,6 +110,7 @@ static void threshold_restart_bank(void *_tr)
110 } else if (tr->old_limit) { /* change limit w/o reset */ 110 } else if (tr->old_limit) { /* change limit w/o reset */
111 int new_count = (mci_misc_hi & THRESHOLD_MAX) + 111 int new_count = (mci_misc_hi & THRESHOLD_MAX) +
112 (tr->old_limit - tr->b->threshold_limit); 112 (tr->old_limit - tr->b->threshold_limit);
113
113 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | 114 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
114 (new_count & THRESHOLD_MAX); 115 (new_count & THRESHOLD_MAX);
115 } 116 }
@@ -125,11 +126,11 @@ static void threshold_restart_bank(void *_tr)
125/* cpu init entry point, called from mce.c with preempt off */ 126/* cpu init entry point, called from mce.c with preempt off */
126void mce_amd_feature_init(struct cpuinfo_x86 *c) 127void mce_amd_feature_init(struct cpuinfo_x86 *c)
127{ 128{
128 unsigned int bank, block;
129 unsigned int cpu = smp_processor_id(); 129 unsigned int cpu = smp_processor_id();
130 u8 lvt_off;
131 u32 low = 0, high = 0, address = 0; 130 u32 low = 0, high = 0, address = 0;
131 unsigned int bank, block;
132 struct thresh_restart tr; 132 struct thresh_restart tr;
133 u8 lvt_off;
133 134
134 for (bank = 0; bank < NR_BANKS; ++bank) { 135 for (bank = 0; bank < NR_BANKS; ++bank) {
135 for (block = 0; block < NR_BLOCKS; ++block) { 136 for (block = 0; block < NR_BLOCKS; ++block) {
@@ -140,8 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
140 if (!address) 141 if (!address)
141 break; 142 break;
142 address += MCG_XBLK_ADDR; 143 address += MCG_XBLK_ADDR;
143 } 144 } else
144 else
145 ++address; 145 ++address;
146 146
147 if (rdmsr_safe(address, &low, &high)) 147 if (rdmsr_safe(address, &low, &high))
@@ -193,9 +193,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
193 */ 193 */
194static void amd_threshold_interrupt(void) 194static void amd_threshold_interrupt(void)
195{ 195{
196 u32 low = 0, high = 0, address = 0;
196 unsigned int bank, block; 197 unsigned int bank, block;
197 struct mce m; 198 struct mce m;
198 u32 low = 0, high = 0, address = 0;
199 199
200 mce_setup(&m); 200 mce_setup(&m);
201 201
@@ -204,16 +204,16 @@ static void amd_threshold_interrupt(void)
204 if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) 204 if (!(per_cpu(bank_map, m.cpu) & (1 << bank)))
205 continue; 205 continue;
206 for (block = 0; block < NR_BLOCKS; ++block) { 206 for (block = 0; block < NR_BLOCKS; ++block) {
207 if (block == 0) 207 if (block == 0) {
208 address = MSR_IA32_MC0_MISC + bank * 4; 208 address = MSR_IA32_MC0_MISC + bank * 4;
209 else if (block == 1) { 209 } else if (block == 1) {
210 address = (low & MASK_BLKPTR_LO) >> 21; 210 address = (low & MASK_BLKPTR_LO) >> 21;
211 if (!address) 211 if (!address)
212 break; 212 break;
213 address += MCG_XBLK_ADDR; 213 address += MCG_XBLK_ADDR;
214 } 214 } else {
215 else
216 ++address; 215 ++address;
216 }
217 217
218 if (rdmsr_safe(address, &low, &high)) 218 if (rdmsr_safe(address, &low, &high))
219 break; 219 break;
@@ -229,8 +229,10 @@ static void amd_threshold_interrupt(void)
229 (high & MASK_LOCKED_HI)) 229 (high & MASK_LOCKED_HI))
230 continue; 230 continue;
231 231
232 /* Log the machine check that caused the threshold 232 /*
233 event. */ 233 * Log the machine check that caused the threshold
234 * event.
235 */
234 machine_check_poll(MCP_TIMESTAMP, 236 machine_check_poll(MCP_TIMESTAMP,
235 &__get_cpu_var(mce_poll_banks)); 237 &__get_cpu_var(mce_poll_banks));
236 238
@@ -254,48 +256,52 @@ static void amd_threshold_interrupt(void)
254 256
255struct threshold_attr { 257struct threshold_attr {
256 struct attribute attr; 258 struct attribute attr;
257 ssize_t(*show) (struct threshold_block *, char *); 259 ssize_t (*show) (struct threshold_block *, char *);
258 ssize_t(*store) (struct threshold_block *, const char *, size_t count); 260 ssize_t (*store) (struct threshold_block *, const char *, size_t count);
259}; 261};
260 262
261#define SHOW_FIELDS(name) \ 263#define SHOW_FIELDS(name) \
262static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ 264static ssize_t show_ ## name(struct threshold_block *b, char *buf) \
263{ \ 265{ \
264 return sprintf(buf, "%lx\n", (unsigned long) b->name); \ 266 return sprintf(buf, "%lx\n", (unsigned long) b->name); \
265} 267}
266SHOW_FIELDS(interrupt_enable) 268SHOW_FIELDS(interrupt_enable)
267SHOW_FIELDS(threshold_limit) 269SHOW_FIELDS(threshold_limit)
268 270
269static ssize_t store_interrupt_enable(struct threshold_block *b, 271static ssize_t
270 const char *buf, size_t count) 272store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
271{ 273{
272 char *end;
273 struct thresh_restart tr; 274 struct thresh_restart tr;
274 unsigned long new = simple_strtoul(buf, &end, 0); 275 unsigned long new;
275 if (end == buf) 276
277 if (strict_strtoul(buf, 0, &new) < 0)
276 return -EINVAL; 278 return -EINVAL;
279
277 b->interrupt_enable = !!new; 280 b->interrupt_enable = !!new;
278 281
279 tr.b = b; 282 tr.b = b;
280 tr.reset = 0; 283 tr.reset = 0;
281 tr.old_limit = 0; 284 tr.old_limit = 0;
285
282 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); 286 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
283 287
284 return end - buf; 288 return size;
285} 289}
286 290
287static ssize_t store_threshold_limit(struct threshold_block *b, 291static ssize_t
288 const char *buf, size_t count) 292store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
289{ 293{
290 char *end;
291 struct thresh_restart tr; 294 struct thresh_restart tr;
292 unsigned long new = simple_strtoul(buf, &end, 0); 295 unsigned long new;
293 if (end == buf) 296
297 if (strict_strtoul(buf, 0, &new) < 0)
294 return -EINVAL; 298 return -EINVAL;
299
295 if (new > THRESHOLD_MAX) 300 if (new > THRESHOLD_MAX)
296 new = THRESHOLD_MAX; 301 new = THRESHOLD_MAX;
297 if (new < 1) 302 if (new < 1)
298 new = 1; 303 new = 1;
304
299 tr.old_limit = b->threshold_limit; 305 tr.old_limit = b->threshold_limit;
300 b->threshold_limit = new; 306 b->threshold_limit = new;
301 tr.b = b; 307 tr.b = b;
@@ -303,12 +309,12 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
303 309
304 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); 310 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
305 311
306 return end - buf; 312 return size;
307} 313}
308 314
309struct threshold_block_cross_cpu { 315struct threshold_block_cross_cpu {
310 struct threshold_block *tb; 316 struct threshold_block *tb;
311 long retval; 317 long retval;
312}; 318};
313 319
314static void local_error_count_handler(void *_tbcc) 320static void local_error_count_handler(void *_tbcc)
@@ -338,16 +344,13 @@ static ssize_t store_error_count(struct threshold_block *b,
338 return 1; 344 return 1;
339} 345}
340 346
341#define THRESHOLD_ATTR(_name,_mode,_show,_store) { \ 347#define RW_ATTR(val) \
342 .attr = {.name = __stringify(_name), .mode = _mode }, \ 348static struct threshold_attr val = { \
343 .show = _show, \ 349 .attr = {.name = __stringify(val), .mode = 0644 }, \
344 .store = _store, \ 350 .show = show_## val, \
351 .store = store_## val, \
345}; 352};
346 353
347#define RW_ATTR(name) \
348static struct threshold_attr name = \
349 THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
350
351RW_ATTR(interrupt_enable); 354RW_ATTR(interrupt_enable);
352RW_ATTR(threshold_limit); 355RW_ATTR(threshold_limit);
353RW_ATTR(error_count); 356RW_ATTR(error_count);
@@ -359,15 +362,17 @@ static struct attribute *default_attrs[] = {
359 NULL 362 NULL
360}; 363};
361 364
362#define to_block(k) container_of(k, struct threshold_block, kobj) 365#define to_block(k) container_of(k, struct threshold_block, kobj)
363#define to_attr(a) container_of(a, struct threshold_attr, attr) 366#define to_attr(a) container_of(a, struct threshold_attr, attr)
364 367
365static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) 368static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
366{ 369{
367 struct threshold_block *b = to_block(kobj); 370 struct threshold_block *b = to_block(kobj);
368 struct threshold_attr *a = to_attr(attr); 371 struct threshold_attr *a = to_attr(attr);
369 ssize_t ret; 372 ssize_t ret;
373
370 ret = a->show ? a->show(b, buf) : -EIO; 374 ret = a->show ? a->show(b, buf) : -EIO;
375
371 return ret; 376 return ret;
372} 377}
373 378
@@ -377,18 +382,20 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
377 struct threshold_block *b = to_block(kobj); 382 struct threshold_block *b = to_block(kobj);
378 struct threshold_attr *a = to_attr(attr); 383 struct threshold_attr *a = to_attr(attr);
379 ssize_t ret; 384 ssize_t ret;
385
380 ret = a->store ? a->store(b, buf, count) : -EIO; 386 ret = a->store ? a->store(b, buf, count) : -EIO;
387
381 return ret; 388 return ret;
382} 389}
383 390
384static struct sysfs_ops threshold_ops = { 391static struct sysfs_ops threshold_ops = {
385 .show = show, 392 .show = show,
386 .store = store, 393 .store = store,
387}; 394};
388 395
389static struct kobj_type threshold_ktype = { 396static struct kobj_type threshold_ktype = {
390 .sysfs_ops = &threshold_ops, 397 .sysfs_ops = &threshold_ops,
391 .default_attrs = default_attrs, 398 .default_attrs = default_attrs,
392}; 399};
393 400
394static __cpuinit int allocate_threshold_blocks(unsigned int cpu, 401static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
@@ -396,9 +403,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
396 unsigned int block, 403 unsigned int block,
397 u32 address) 404 u32 address)
398{ 405{
399 int err;
400 u32 low, high;
401 struct threshold_block *b = NULL; 406 struct threshold_block *b = NULL;
407 u32 low, high;
408 int err;
402 409
403 if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) 410 if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
404 return 0; 411 return 0;
@@ -421,20 +428,21 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
421 if (!b) 428 if (!b)
422 return -ENOMEM; 429 return -ENOMEM;
423 430
424 b->block = block; 431 b->block = block;
425 b->bank = bank; 432 b->bank = bank;
426 b->cpu = cpu; 433 b->cpu = cpu;
427 b->address = address; 434 b->address = address;
428 b->interrupt_enable = 0; 435 b->interrupt_enable = 0;
429 b->threshold_limit = THRESHOLD_MAX; 436 b->threshold_limit = THRESHOLD_MAX;
430 437
431 INIT_LIST_HEAD(&b->miscj); 438 INIT_LIST_HEAD(&b->miscj);
432 439
433 if (per_cpu(threshold_banks, cpu)[bank]->blocks) 440 if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
434 list_add(&b->miscj, 441 list_add(&b->miscj,
435 &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); 442 &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
436 else 443 } else {
437 per_cpu(threshold_banks, cpu)[bank]->blocks = b; 444 per_cpu(threshold_banks, cpu)[bank]->blocks = b;
445 }
438 446
439 err = kobject_init_and_add(&b->kobj, &threshold_ktype, 447 err = kobject_init_and_add(&b->kobj, &threshold_ktype,
440 per_cpu(threshold_banks, cpu)[bank]->kobj, 448 per_cpu(threshold_banks, cpu)[bank]->kobj,
@@ -447,8 +455,9 @@ recurse:
447 if (!address) 455 if (!address)
448 return 0; 456 return 0;
449 address += MCG_XBLK_ADDR; 457 address += MCG_XBLK_ADDR;
450 } else 458 } else {
451 ++address; 459 ++address;
460 }
452 461
453 err = allocate_threshold_blocks(cpu, bank, ++block, address); 462 err = allocate_threshold_blocks(cpu, bank, ++block, address);
454 if (err) 463 if (err)
@@ -500,13 +509,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
500 if (!b) 509 if (!b)
501 goto out; 510 goto out;
502 511
503 err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj, 512 err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj,
504 b->kobj, name); 513 b->kobj, name);
505 if (err) 514 if (err)
506 goto out; 515 goto out;
507 516
508 cpumask_copy(b->cpus, cpu_core_mask(cpu)); 517 cpumask_copy(b->cpus, cpu_core_mask(cpu));
509 per_cpu(threshold_banks, cpu)[bank] = b; 518 per_cpu(threshold_banks, cpu)[bank] = b;
519
510 goto out; 520 goto out;
511 } 521 }
512#endif 522#endif
@@ -522,7 +532,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
522 goto out; 532 goto out;
523 } 533 }
524 534
525 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); 535 b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj);
526 if (!b->kobj) 536 if (!b->kobj)
527 goto out_free; 537 goto out_free;
528 538
@@ -542,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
542 if (i == cpu) 552 if (i == cpu)
543 continue; 553 continue;
544 554
545 err = sysfs_create_link(&per_cpu(device_mce, i).kobj, 555 err = sysfs_create_link(&per_cpu(mce_dev, i).kobj,
546 b->kobj, name); 556 b->kobj, name);
547 if (err) 557 if (err)
548 goto out; 558 goto out;
@@ -605,15 +615,13 @@ static void deallocate_threshold_block(unsigned int cpu,
605 615
606static void threshold_remove_bank(unsigned int cpu, int bank) 616static void threshold_remove_bank(unsigned int cpu, int bank)
607{ 617{
608 int i = 0;
609 struct threshold_bank *b; 618 struct threshold_bank *b;
610 char name[32]; 619 char name[32];
620 int i = 0;
611 621
612 b = per_cpu(threshold_banks, cpu)[bank]; 622 b = per_cpu(threshold_banks, cpu)[bank];
613
614 if (!b) 623 if (!b)
615 return; 624 return;
616
617 if (!b->blocks) 625 if (!b->blocks)
618 goto free_out; 626 goto free_out;
619 627
@@ -622,8 +630,9 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
622#ifdef CONFIG_SMP 630#ifdef CONFIG_SMP
623 /* sibling symlink */ 631 /* sibling symlink */
624 if (shared_bank[bank] && b->blocks->cpu != cpu) { 632 if (shared_bank[bank] && b->blocks->cpu != cpu) {
625 sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); 633 sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name);
626 per_cpu(threshold_banks, cpu)[bank] = NULL; 634 per_cpu(threshold_banks, cpu)[bank] = NULL;
635
627 return; 636 return;
628 } 637 }
629#endif 638#endif
@@ -633,7 +642,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
633 if (i == cpu) 642 if (i == cpu)
634 continue; 643 continue;
635 644
636 sysfs_remove_link(&per_cpu(device_mce, i).kobj, name); 645 sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name);
637 per_cpu(threshold_banks, i)[bank] = NULL; 646 per_cpu(threshold_banks, i)[bank] = NULL;
638 } 647 }
639 648
@@ -659,12 +668,9 @@ static void threshold_remove_device(unsigned int cpu)
659} 668}
660 669
661/* get notified when a cpu comes on/off */ 670/* get notified when a cpu comes on/off */
662static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, 671static void __cpuinit
663 unsigned int cpu) 672amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
664{ 673{
665 if (cpu >= NR_CPUS)
666 return;
667
668 switch (action) { 674 switch (action) {
669 case CPU_ONLINE: 675 case CPU_ONLINE:
670 case CPU_ONLINE_FROZEN: 676 case CPU_ONLINE_FROZEN:
@@ -686,11 +692,12 @@ static __init int threshold_init_device(void)
686 /* to hit CPUs online before the notifier is up */ 692 /* to hit CPUs online before the notifier is up */
687 for_each_online_cpu(lcpu) { 693 for_each_online_cpu(lcpu) {
688 int err = threshold_create_device(lcpu); 694 int err = threshold_create_device(lcpu);
695
689 if (err) 696 if (err)
690 return err; 697 return err;
691 } 698 }
692 threshold_cpu_callback = amd_64_threshold_cpu_callback; 699 threshold_cpu_callback = amd_64_threshold_cpu_callback;
700
693 return 0; 701 return 0;
694} 702}
695
696device_initcall(threshold_init_device); 703device_initcall(threshold_init_device);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
new file mode 100644
index 00000000000..2b011d2d857
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -0,0 +1,74 @@
1/*
2 * Common code for Intel machine checks
3 */
4#include <linux/interrupt.h>
5#include <linux/kernel.h>
6#include <linux/types.h>
7#include <linux/init.h>
8#include <linux/smp.h>
9
10#include <asm/therm_throt.h>
11#include <asm/processor.h>
12#include <asm/system.h>
13#include <asm/apic.h>
14#include <asm/msr.h>
15
16#include "mce.h"
17
18void intel_init_thermal(struct cpuinfo_x86 *c)
19{
20 unsigned int cpu = smp_processor_id();
21 int tm2 = 0;
22 u32 l, h;
23
24 /* Thermal monitoring depends on ACPI and clock modulation*/
25 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
26 return;
27
28 /*
29 * First check if its enabled already, in which case there might
30 * be some SMM goo which handles it, so we can't even put a handler
31 * since it might be delivered via SMI already:
32 */
33 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
34 h = apic_read(APIC_LVTTHMR);
35 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
36 printk(KERN_DEBUG
37 "CPU%d: Thermal monitoring handled by SMI\n", cpu);
38 return;
39 }
40
41 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
42 tm2 = 1;
43
44 /* Check whether a vector already exists */
45 if (h & APIC_VECTOR_MASK) {
46 printk(KERN_DEBUG
47 "CPU%d: Thermal LVT vector (%#x) already installed\n",
48 cpu, (h & APIC_VECTOR_MASK));
49 return;
50 }
51
52 /* We'll mask the thermal vector in the lapic till we're ready: */
53 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
54 apic_write(APIC_LVTTHMR, h);
55
56 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
57 wrmsr(MSR_IA32_THERM_INTERRUPT,
58 l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
59
60 intel_set_thermal_handler();
61
62 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
63 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
64
65 /* Unmask the thermal vector: */
66 l = apic_read(APIC_LVTTHMR);
67 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
68
69 printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
70 cpu, tm2 ? "TM2" : "TM1");
71
72 /* enable thermal throttle processing */
73 atomic_set(&therm_throt_en, 1);
74}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index cef3ee30744..eff3740501a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -17,6 +17,8 @@
17#include <asm/therm_throt.h> 17#include <asm/therm_throt.h>
18#include <asm/apic.h> 18#include <asm/apic.h>
19 19
20#include "mce.h"
21
20asmlinkage void smp_thermal_interrupt(void) 22asmlinkage void smp_thermal_interrupt(void)
21{ 23{
22 __u64 msr_val; 24 __u64 msr_val;
@@ -27,67 +29,13 @@ asmlinkage void smp_thermal_interrupt(void)
27 irq_enter(); 29 irq_enter();
28 30
29 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 31 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
30 if (therm_throt_process(msr_val & 1)) 32 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
31 mce_log_therm_throt_event(msr_val); 33 mce_log_therm_throt_event(msr_val);
32 34
33 inc_irq_stat(irq_thermal_count); 35 inc_irq_stat(irq_thermal_count);
34 irq_exit(); 36 irq_exit();
35} 37}
36 38
37static void intel_init_thermal(struct cpuinfo_x86 *c)
38{
39 u32 l, h;
40 int tm2 = 0;
41 unsigned int cpu = smp_processor_id();
42
43 if (!cpu_has(c, X86_FEATURE_ACPI))
44 return;
45
46 if (!cpu_has(c, X86_FEATURE_ACC))
47 return;
48
49 /* first check if TM1 is already enabled by the BIOS, in which
50 * case there might be some SMM goo which handles it, so we can't even
51 * put a handler since it might be delivered via SMI already.
52 */
53 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
54 h = apic_read(APIC_LVTTHMR);
55 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
56 printk(KERN_DEBUG
57 "CPU%d: Thermal monitoring handled by SMI\n", cpu);
58 return;
59 }
60
61 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
62 tm2 = 1;
63
64 if (h & APIC_VECTOR_MASK) {
65 printk(KERN_DEBUG
66 "CPU%d: Thermal LVT vector (%#x) already "
67 "installed\n", cpu, (h & APIC_VECTOR_MASK));
68 return;
69 }
70
71 h = THERMAL_APIC_VECTOR;
72 h |= (APIC_DM_FIXED | APIC_LVT_MASKED);
73 apic_write(APIC_LVTTHMR, h);
74
75 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
76 wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
77
78 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
79 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
80
81 l = apic_read(APIC_LVTTHMR);
82 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
83 printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
84 cpu, tm2 ? "TM2" : "TM1");
85
86 /* enable thermal throttle processing */
87 atomic_set(&therm_throt_en, 1);
88 return;
89}
90
91/* 39/*
92 * Support for Intel Correct Machine Check Interrupts. This allows 40 * Support for Intel Correct Machine Check Interrupts. This allows
93 * the CPU to raise an interrupt when a corrected machine check happened. 41 * the CPU to raise an interrupt when a corrected machine check happened.
@@ -248,7 +196,7 @@ void cmci_rediscover(int dying)
248 return; 196 return;
249 cpumask_copy(old, &current->cpus_allowed); 197 cpumask_copy(old, &current->cpus_allowed);
250 198
251 for_each_online_cpu (cpu) { 199 for_each_online_cpu(cpu) {
252 if (cpu == dying) 200 if (cpu == dying)
253 continue; 201 continue;
254 if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) 202 if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index a74af128efc..70b710420f7 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -6,15 +6,14 @@
6 * This file contains routines to check for non-fatal MCEs every 15s 6 * This file contains routines to check for non-fatal MCEs every 15s
7 * 7 *
8 */ 8 */
9
10#include <linux/init.h>
11#include <linux/types.h>
12#include <linux/kernel.h>
13#include <linux/jiffies.h>
14#include <linux/workqueue.h>
15#include <linux/interrupt.h> 9#include <linux/interrupt.h>
16#include <linux/smp.h> 10#include <linux/workqueue.h>
11#include <linux/jiffies.h>
12#include <linux/kernel.h>
17#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/init.h>
16#include <linux/smp.h>
18 17
19#include <asm/processor.h> 18#include <asm/processor.h>
20#include <asm/system.h> 19#include <asm/system.h>
@@ -22,9 +21,9 @@
22 21
23#include "mce.h" 22#include "mce.h"
24 23
25static int firstbank; 24static int firstbank;
26 25
27#define MCE_RATE 15*HZ /* timer rate is 15s */ 26#define MCE_RATE (15*HZ) /* timer rate is 15s */
28 27
29static void mce_checkregs(void *info) 28static void mce_checkregs(void *info)
30{ 29{
@@ -34,23 +33,24 @@ static void mce_checkregs(void *info)
34 for (i = firstbank; i < nr_mce_banks; i++) { 33 for (i = firstbank; i < nr_mce_banks; i++) {
35 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); 34 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
36 35
37 if (high & (1<<31)) { 36 if (!(high & (1<<31)))
38 printk(KERN_INFO "MCE: The hardware reports a non " 37 continue;
39 "fatal, correctable incident occurred on " 38
40 "CPU %d.\n", 39 printk(KERN_INFO "MCE: The hardware reports a non fatal, "
40 "correctable incident occurred on CPU %d.\n",
41 smp_processor_id()); 41 smp_processor_id());
42 printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); 42
43 43 printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
44 /* 44
45 * Scrub the error so we don't pick it up in MCE_RATE 45 /*
46 * seconds time. 46 * Scrub the error so we don't pick it up in MCE_RATE
47 */ 47 * seconds time:
48 wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); 48 */
49 49 wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
50 /* Serialize */ 50
51 wmb(); 51 /* Serialize: */
52 add_taint(TAINT_MACHINE_CHECK); 52 wmb();
53 } 53 add_taint(TAINT_MACHINE_CHECK);
54 } 54 }
55} 55}
56 56
@@ -77,16 +77,17 @@ static int __init init_nonfatal_mce_checker(void)
77 77
78 /* Some Athlons misbehave when we frob bank 0 */ 78 /* Some Athlons misbehave when we frob bank 0 */
79 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && 79 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
80 boot_cpu_data.x86 == 6) 80 boot_cpu_data.x86 == 6)
81 firstbank = 1; 81 firstbank = 1;
82 else 82 else
83 firstbank = 0; 83 firstbank = 0;
84 84
85 /* 85 /*
86 * Check for non-fatal errors every MCE_RATE s 86 * Check for non-fatal errors every MCE_RATE s
87 */ 87 */
88 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); 88 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
89 printk(KERN_INFO "Machine check exception polling timer started.\n"); 89 printk(KERN_INFO "Machine check exception polling timer started.\n");
90
90 return 0; 91 return 0;
91} 92}
92module_init(init_nonfatal_mce_checker); 93module_init(init_nonfatal_mce_checker);
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index f53bdcbaf38..82cee108a2d 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -2,18 +2,17 @@
2 * P4 specific Machine Check Exception Reporting 2 * P4 specific Machine Check Exception Reporting
3 */ 3 */
4 4
5#include <linux/init.h>
6#include <linux/types.h>
7#include <linux/kernel.h>
8#include <linux/interrupt.h> 5#include <linux/interrupt.h>
6#include <linux/kernel.h>
7#include <linux/types.h>
8#include <linux/init.h>
9#include <linux/smp.h> 9#include <linux/smp.h>
10 10
11#include <asm/therm_throt.h>
11#include <asm/processor.h> 12#include <asm/processor.h>
12#include <asm/system.h> 13#include <asm/system.h>
13#include <asm/msr.h>
14#include <asm/apic.h> 14#include <asm/apic.h>
15 15#include <asm/msr.h>
16#include <asm/therm_throt.h>
17 16
18#include "mce.h" 17#include "mce.h"
19 18
@@ -36,6 +35,7 @@ static int mce_num_extended_msrs;
36 35
37 36
38#ifdef CONFIG_X86_MCE_P4THERMAL 37#ifdef CONFIG_X86_MCE_P4THERMAL
38
39static void unexpected_thermal_interrupt(struct pt_regs *regs) 39static void unexpected_thermal_interrupt(struct pt_regs *regs)
40{ 40{
41 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", 41 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
@@ -43,7 +43,7 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs)
43 add_taint(TAINT_MACHINE_CHECK); 43 add_taint(TAINT_MACHINE_CHECK);
44} 44}
45 45
46/* P4/Xeon Thermal transition interrupt handler */ 46/* P4/Xeon Thermal transition interrupt handler: */
47static void intel_thermal_interrupt(struct pt_regs *regs) 47static void intel_thermal_interrupt(struct pt_regs *regs)
48{ 48{
49 __u64 msr_val; 49 __u64 msr_val;
@@ -51,11 +51,12 @@ static void intel_thermal_interrupt(struct pt_regs *regs)
51 ack_APIC_irq(); 51 ack_APIC_irq();
52 52
53 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 53 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
54 therm_throt_process(msr_val & 0x1); 54 therm_throt_process(msr_val & THERM_STATUS_PROCHOT);
55} 55}
56 56
57/* Thermal interrupt handler for this CPU setup */ 57/* Thermal interrupt handler for this CPU setup: */
58static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; 58static void (*vendor_thermal_interrupt)(struct pt_regs *regs) =
59 unexpected_thermal_interrupt;
59 60
60void smp_thermal_interrupt(struct pt_regs *regs) 61void smp_thermal_interrupt(struct pt_regs *regs)
61{ 62{
@@ -65,67 +66,15 @@ void smp_thermal_interrupt(struct pt_regs *regs)
65 irq_exit(); 66 irq_exit();
66} 67}
67 68
68/* P4/Xeon Thermal regulation detect and init */ 69void intel_set_thermal_handler(void)
69static void intel_init_thermal(struct cpuinfo_x86 *c)
70{ 70{
71 u32 l, h;
72 unsigned int cpu = smp_processor_id();
73
74 /* Thermal monitoring */
75 if (!cpu_has(c, X86_FEATURE_ACPI))
76 return; /* -ENODEV */
77
78 /* Clock modulation */
79 if (!cpu_has(c, X86_FEATURE_ACC))
80 return; /* -ENODEV */
81
82 /* first check if its enabled already, in which case there might
83 * be some SMM goo which handles it, so we can't even put a handler
84 * since it might be delivered via SMI already -zwanem.
85 */
86 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
87 h = apic_read(APIC_LVTTHMR);
88 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
90 cpu);
91 return; /* -EBUSY */
92 }
93
94 /* check whether a vector already exists, temporarily masked? */
95 if (h & APIC_VECTOR_MASK) {
96 printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
97 "installed\n",
98 cpu, (h & APIC_VECTOR_MASK));
99 return; /* -EBUSY */
100 }
101
102 /* The temperature transition interrupt handler setup */
103 h = THERMAL_APIC_VECTOR; /* our delivery vector */
104 h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
105 apic_write(APIC_LVTTHMR, h);
106
107 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
108 wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
109
110 /* ok we're good to go... */
111 vendor_thermal_interrupt = intel_thermal_interrupt; 71 vendor_thermal_interrupt = intel_thermal_interrupt;
112
113 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
114 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
115
116 l = apic_read(APIC_LVTTHMR);
117 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
118 printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
119
120 /* enable thermal throttle processing */
121 atomic_set(&therm_throt_en, 1);
122 return;
123} 72}
124#endif /* CONFIG_X86_MCE_P4THERMAL */
125 73
74#endif /* CONFIG_X86_MCE_P4THERMAL */
126 75
127/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ 76/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
128static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) 77static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
129{ 78{
130 u32 h; 79 u32 h;
131 80
@@ -143,9 +92,9 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
143 92
144static void intel_machine_check(struct pt_regs *regs, long error_code) 93static void intel_machine_check(struct pt_regs *regs, long error_code)
145{ 94{
146 int recover = 1;
147 u32 alow, ahigh, high, low; 95 u32 alow, ahigh, high, low;
148 u32 mcgstl, mcgsth; 96 u32 mcgstl, mcgsth;
97 int recover = 1;
149 int i; 98 int i;
150 99
151 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 100 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@@ -157,7 +106,9 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
157 106
158 if (mce_num_extended_msrs > 0) { 107 if (mce_num_extended_msrs > 0) {
159 struct intel_mce_extended_msrs dbg; 108 struct intel_mce_extended_msrs dbg;
109
160 intel_get_extended_msrs(&dbg); 110 intel_get_extended_msrs(&dbg);
111
161 printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" 112 printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
162 "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" 113 "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
163 "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", 114 "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
@@ -171,6 +122,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
171 if (high & (1<<31)) { 122 if (high & (1<<31)) {
172 char misc[20]; 123 char misc[20];
173 char addr[24]; 124 char addr[24];
125
174 misc[0] = addr[0] = '\0'; 126 misc[0] = addr[0] = '\0';
175 if (high & (1<<29)) 127 if (high & (1<<29))
176 recover |= 1; 128 recover |= 1;
@@ -196,6 +148,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
196 panic("Unable to continue"); 148 panic("Unable to continue");
197 149
198 printk(KERN_EMERG "Attempting to continue.\n"); 150 printk(KERN_EMERG "Attempting to continue.\n");
151
199 /* 152 /*
200 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 153 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
201 * recoverable/continuable.This will allow BIOS to look at the MSRs 154 * recoverable/continuable.This will allow BIOS to look at the MSRs
@@ -217,7 +170,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
217 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 170 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
218} 171}
219 172
220
221void intel_p4_mcheck_init(struct cpuinfo_x86 *c) 173void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
222{ 174{
223 u32 l, h; 175 u32 l, h;
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index c9f77ea69ed..015f481ab1b 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -2,11 +2,10 @@
2 * P5 specific Machine Check Exception Reporting 2 * P5 specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> 3 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
4 */ 4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/interrupt.h> 5#include <linux/interrupt.h>
6#include <linux/kernel.h>
7#include <linux/types.h>
8#include <linux/init.h>
10#include <linux/smp.h> 9#include <linux/smp.h>
11 10
12#include <asm/processor.h> 11#include <asm/processor.h>
@@ -15,39 +14,58 @@
15 14
16#include "mce.h" 15#include "mce.h"
17 16
18/* Machine check handler for Pentium class Intel */ 17/* By default disabled */
18int mce_p5_enable;
19
20/* Machine check handler for Pentium class Intel CPUs: */
19static void pentium_machine_check(struct pt_regs *regs, long error_code) 21static void pentium_machine_check(struct pt_regs *regs, long error_code)
20{ 22{
21 u32 loaddr, hi, lotype; 23 u32 loaddr, hi, lotype;
24
22 rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); 25 rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
23 rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); 26 rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
24 printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); 27
25 if (lotype&(1<<5)) 28 printk(KERN_EMERG
26 printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); 29 "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n",
30 smp_processor_id(), loaddr, lotype);
31
32 if (lotype & (1<<5)) {
33 printk(KERN_EMERG
34 "CPU#%d: Possible thermal failure (CPU on fire ?).\n",
35 smp_processor_id());
36 }
37
27 add_taint(TAINT_MACHINE_CHECK); 38 add_taint(TAINT_MACHINE_CHECK);
28} 39}
29 40
30/* Set up machine check reporting for processors with Intel style MCE */ 41/* Set up machine check reporting for processors with Intel style MCE: */
31void intel_p5_mcheck_init(struct cpuinfo_x86 *c) 42void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
32{ 43{
33 u32 l, h; 44 u32 l, h;
34 45
35 /*Check for MCE support */ 46 /* Check for MCE support: */
36 if (!cpu_has(c, X86_FEATURE_MCE)) 47 if (!cpu_has(c, X86_FEATURE_MCE))
37 return; 48 return;
38 49
39 /* Default P5 to off as its often misconnected */ 50#ifdef CONFIG_X86_OLD_MCE
51 /* Default P5 to off as its often misconnected: */
40 if (mce_disabled != -1) 52 if (mce_disabled != -1)
41 return; 53 return;
54#endif
55
42 machine_check_vector = pentium_machine_check; 56 machine_check_vector = pentium_machine_check;
57 /* Make sure the vector pointer is visible before we enable MCEs: */
43 wmb(); 58 wmb();
44 59
45 /* Read registers before enabling */ 60 /* Read registers before enabling: */
46 rdmsr(MSR_IA32_P5_MC_ADDR, l, h); 61 rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
47 rdmsr(MSR_IA32_P5_MC_TYPE, l, h); 62 rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
48 printk(KERN_INFO "Intel old style machine check architecture supported.\n"); 63 printk(KERN_INFO
64 "Intel old style machine check architecture supported.\n");
49 65
50 /* Enable MCE */ 66 /* Enable MCE: */
51 set_in_cr4(X86_CR4_MCE); 67 set_in_cr4(X86_CR4_MCE);
52 printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); 68 printk(KERN_INFO
69 "Intel old style machine check reporting enabled on CPU#%d.\n",
70 smp_processor_id());
53} 71}
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index 2ac52d7b434..43c24e66745 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -2,11 +2,10 @@
2 * P6 specific Machine Check Exception Reporting 2 * P6 specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> 3 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
4 */ 4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/interrupt.h> 5#include <linux/interrupt.h>
6#include <linux/kernel.h>
7#include <linux/types.h>
8#include <linux/init.h>
10#include <linux/smp.h> 9#include <linux/smp.h>
11 10
12#include <asm/processor.h> 11#include <asm/processor.h>
@@ -18,9 +17,9 @@
18/* Machine Check Handler For PII/PIII */ 17/* Machine Check Handler For PII/PIII */
19static void intel_machine_check(struct pt_regs *regs, long error_code) 18static void intel_machine_check(struct pt_regs *regs, long error_code)
20{ 19{
21 int recover = 1;
22 u32 alow, ahigh, high, low; 20 u32 alow, ahigh, high, low;
23 u32 mcgstl, mcgsth; 21 u32 mcgstl, mcgsth;
22 int recover = 1;
24 int i; 23 int i;
25 24
26 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 25 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@@ -35,12 +34,16 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
35 if (high & (1<<31)) { 34 if (high & (1<<31)) {
36 char misc[20]; 35 char misc[20];
37 char addr[24]; 36 char addr[24];
38 misc[0] = addr[0] = '\0'; 37
38 misc[0] = '\0';
39 addr[0] = '\0';
40
39 if (high & (1<<29)) 41 if (high & (1<<29))
40 recover |= 1; 42 recover |= 1;
41 if (high & (1<<25)) 43 if (high & (1<<25))
42 recover |= 2; 44 recover |= 2;
43 high &= ~(1<<31); 45 high &= ~(1<<31);
46
44 if (high & (1<<27)) { 47 if (high & (1<<27)) {
45 rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); 48 rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
46 snprintf(misc, 20, "[%08x%08x]", ahigh, alow); 49 snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
@@ -49,6 +52,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
49 rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); 52 rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
50 snprintf(addr, 24, " at %08x%08x", ahigh, alow); 53 snprintf(addr, 24, " at %08x%08x", ahigh, alow);
51 } 54 }
55
52 printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", 56 printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
53 smp_processor_id(), i, high, low, misc, addr); 57 smp_processor_id(), i, high, low, misc, addr);
54 } 58 }
@@ -63,16 +67,17 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
63 /* 67 /*
64 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 68 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
65 * recoverable/continuable.This will allow BIOS to look at the MSRs 69 * recoverable/continuable.This will allow BIOS to look at the MSRs
66 * for errors if the OS could not log the error. 70 * for errors if the OS could not log the error:
67 */ 71 */
68 for (i = 0; i < nr_mce_banks; i++) { 72 for (i = 0; i < nr_mce_banks; i++) {
69 unsigned int msr; 73 unsigned int msr;
74
70 msr = MSR_IA32_MC0_STATUS+i*4; 75 msr = MSR_IA32_MC0_STATUS+i*4;
71 rdmsr(msr, low, high); 76 rdmsr(msr, low, high);
72 if (high & (1<<31)) { 77 if (high & (1<<31)) {
73 /* Clear it */ 78 /* Clear it: */
74 wrmsr(msr, 0UL, 0UL); 79 wrmsr(msr, 0UL, 0UL);
75 /* Serialize */ 80 /* Serialize: */
76 wmb(); 81 wmb();
77 add_taint(TAINT_MACHINE_CHECK); 82 add_taint(TAINT_MACHINE_CHECK);
78 } 83 }
@@ -81,7 +86,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
81 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 86 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
82} 87}
83 88
84/* Set up machine check reporting for processors with Intel style MCE */ 89/* Set up machine check reporting for processors with Intel style MCE: */
85void intel_p6_mcheck_init(struct cpuinfo_x86 *c) 90void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
86{ 91{
87 u32 l, h; 92 u32 l, h;
@@ -97,6 +102,7 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
97 102
98 /* Ok machine check is available */ 103 /* Ok machine check is available */
99 machine_check_vector = intel_machine_check; 104 machine_check_vector = intel_machine_check;
105 /* Make sure the vector pointer is visible before we enable MCEs: */
100 wmb(); 106 wmb();
101 107
102 printk(KERN_INFO "Intel machine check architecture supported.\n"); 108 printk(KERN_INFO "Intel machine check architecture supported.\n");
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index d5ae2243f0b..7b1ae2e20ba 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -1,7 +1,7 @@
1/* 1/*
2 *
3 * Thermal throttle event support code (such as syslog messaging and rate 2 * Thermal throttle event support code (such as syslog messaging and rate
4 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). 3 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
4 *
5 * This allows consistent reporting of CPU thermal throttle events. 5 * This allows consistent reporting of CPU thermal throttle events.
6 * 6 *
7 * Maintains a counter in /sys that keeps track of the number of thermal 7 * Maintains a counter in /sys that keeps track of the number of thermal
@@ -13,43 +13,43 @@
13 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. 13 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
14 * Inspired by Ross Biro's and Al Borchers' counter code. 14 * Inspired by Ross Biro's and Al Borchers' counter code.
15 */ 15 */
16 16#include <linux/notifier.h>
17#include <linux/jiffies.h>
17#include <linux/percpu.h> 18#include <linux/percpu.h>
18#include <linux/sysdev.h> 19#include <linux/sysdev.h>
19#include <linux/cpu.h> 20#include <linux/cpu.h>
20#include <asm/cpu.h> 21
21#include <linux/notifier.h>
22#include <linux/jiffies.h>
23#include <asm/therm_throt.h> 22#include <asm/therm_throt.h>
24 23
25/* How long to wait between reporting thermal events */ 24/* How long to wait between reporting thermal events */
26#define CHECK_INTERVAL (300 * HZ) 25#define CHECK_INTERVAL (300 * HZ)
27 26
28static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; 27static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
29static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); 28static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
30atomic_t therm_throt_en = ATOMIC_INIT(0); 29
30atomic_t therm_throt_en = ATOMIC_INIT(0);
31 31
32#ifdef CONFIG_SYSFS 32#ifdef CONFIG_SYSFS
33#define define_therm_throt_sysdev_one_ro(_name) \ 33#define define_therm_throt_sysdev_one_ro(_name) \
34 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) 34 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
35 35
36#define define_therm_throt_sysdev_show_func(name) \ 36#define define_therm_throt_sysdev_show_func(name) \
37static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ 37static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
38 struct sysdev_attribute *attr, \ 38 struct sysdev_attribute *attr, \
39 char *buf) \ 39 char *buf) \
40{ \ 40{ \
41 unsigned int cpu = dev->id; \ 41 unsigned int cpu = dev->id; \
42 ssize_t ret; \ 42 ssize_t ret; \
43 \ 43 \
44 preempt_disable(); /* CPU hotplug */ \ 44 preempt_disable(); /* CPU hotplug */ \
45 if (cpu_online(cpu)) \ 45 if (cpu_online(cpu)) \
46 ret = sprintf(buf, "%lu\n", \ 46 ret = sprintf(buf, "%lu\n", \
47 per_cpu(thermal_throttle_##name, cpu)); \ 47 per_cpu(thermal_throttle_##name, cpu)); \
48 else \ 48 else \
49 ret = 0; \ 49 ret = 0; \
50 preempt_enable(); \ 50 preempt_enable(); \
51 \ 51 \
52 return ret; \ 52 return ret; \
53} 53}
54 54
55define_therm_throt_sysdev_show_func(count); 55define_therm_throt_sysdev_show_func(count);
@@ -61,8 +61,8 @@ static struct attribute *thermal_throttle_attrs[] = {
61}; 61};
62 62
63static struct attribute_group thermal_throttle_attr_group = { 63static struct attribute_group thermal_throttle_attr_group = {
64 .attrs = thermal_throttle_attrs, 64 .attrs = thermal_throttle_attrs,
65 .name = "thermal_throttle" 65 .name = "thermal_throttle"
66}; 66};
67#endif /* CONFIG_SYSFS */ 67#endif /* CONFIG_SYSFS */
68 68
@@ -110,10 +110,11 @@ int therm_throt_process(int curr)
110} 110}
111 111
112#ifdef CONFIG_SYSFS 112#ifdef CONFIG_SYSFS
113/* Add/Remove thermal_throttle interface for CPU device */ 113/* Add/Remove thermal_throttle interface for CPU device: */
114static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) 114static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
115{ 115{
116 return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); 116 return sysfs_create_group(&sys_dev->kobj,
117 &thermal_throttle_attr_group);
117} 118}
118 119
119static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) 120static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
@@ -121,19 +122,21 @@ static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
121 sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); 122 sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
122} 123}
123 124
124/* Mutex protecting device creation against CPU hotplug */ 125/* Mutex protecting device creation against CPU hotplug: */
125static DEFINE_MUTEX(therm_cpu_lock); 126static DEFINE_MUTEX(therm_cpu_lock);
126 127
127/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 128/* Get notified when a cpu comes on/off. Be hotplug friendly. */
128static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, 129static __cpuinit int
129 unsigned long action, 130thermal_throttle_cpu_callback(struct notifier_block *nfb,
130 void *hcpu) 131 unsigned long action,
132 void *hcpu)
131{ 133{
132 unsigned int cpu = (unsigned long)hcpu; 134 unsigned int cpu = (unsigned long)hcpu;
133 struct sys_device *sys_dev; 135 struct sys_device *sys_dev;
134 int err = 0; 136 int err = 0;
135 137
136 sys_dev = get_cpu_sysdev(cpu); 138 sys_dev = get_cpu_sysdev(cpu);
139
137 switch (action) { 140 switch (action) {
138 case CPU_UP_PREPARE: 141 case CPU_UP_PREPARE:
139 case CPU_UP_PREPARE_FROZEN: 142 case CPU_UP_PREPARE_FROZEN:
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 23ee9e730f7..d746df2909c 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -17,7 +17,7 @@ static void default_threshold_interrupt(void)
17 17
18void (*mce_threshold_vector)(void) = default_threshold_interrupt; 18void (*mce_threshold_vector)(void) = default_threshold_interrupt;
19 19
20asmlinkage void mce_threshold_interrupt(void) 20asmlinkage void smp_threshold_interrupt(void)
21{ 21{
22 exit_idle(); 22 exit_idle();
23 irq_enter(); 23 irq_enter();
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 2a043d89811..81b02487090 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -2,11 +2,10 @@
2 * IDT Winchip specific Machine Check Exception Reporting 2 * IDT Winchip specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> 3 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
4 */ 4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/interrupt.h> 5#include <linux/interrupt.h>
6#include <linux/kernel.h>
7#include <linux/types.h>
8#include <linux/init.h>
10 9
11#include <asm/processor.h> 10#include <asm/processor.h>
12#include <asm/system.h> 11#include <asm/system.h>
@@ -14,7 +13,7 @@
14 13
15#include "mce.h" 14#include "mce.h"
16 15
17/* Machine check handler for WinChip C6 */ 16/* Machine check handler for WinChip C6: */
18static void winchip_machine_check(struct pt_regs *regs, long error_code) 17static void winchip_machine_check(struct pt_regs *regs, long error_code)
19{ 18{
20 printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); 19 printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
@@ -25,12 +24,18 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
25void winchip_mcheck_init(struct cpuinfo_x86 *c) 24void winchip_mcheck_init(struct cpuinfo_x86 *c)
26{ 25{
27 u32 lo, hi; 26 u32 lo, hi;
27
28 machine_check_vector = winchip_machine_check; 28 machine_check_vector = winchip_machine_check;
29 /* Make sure the vector pointer is visible before we enable MCEs: */
29 wmb(); 30 wmb();
31
30 rdmsr(MSR_IDT_FCR1, lo, hi); 32 rdmsr(MSR_IDT_FCR1, lo, hi);
31 lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */ 33 lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */
32 lo &= ~(1<<4); /* Enable MCE */ 34 lo &= ~(1<<4); /* Enable MCE */
33 wrmsr(MSR_IDT_FCR1, lo, hi); 35 wrmsr(MSR_IDT_FCR1, lo, hi);
36
34 set_in_cr4(X86_CR4_MCE); 37 set_in_cr4(X86_CR4_MCE);
35 printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); 38
39 printk(KERN_INFO
40 "Winchip machine check reporting enabled on CPU#0.\n");
36} 41}
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 38946c6e843..a31a7f29cff 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1007,7 +1007,7 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
1007#endif 1007#endif
1008 1008
1009apicinterrupt THRESHOLD_APIC_VECTOR \ 1009apicinterrupt THRESHOLD_APIC_VECTOR \
1010 threshold_interrupt mce_threshold_interrupt 1010 threshold_interrupt smp_threshold_interrupt
1011apicinterrupt THERMAL_APIC_VECTOR \ 1011apicinterrupt THERMAL_APIC_VECTOR \
1012 thermal_interrupt smp_thermal_interrupt 1012 thermal_interrupt smp_thermal_interrupt
1013 1013
@@ -1382,7 +1382,7 @@ paranoiderrorentry stack_segment do_stack_segment
1382errorentry general_protection do_general_protection 1382errorentry general_protection do_general_protection
1383errorentry page_fault do_page_fault 1383errorentry page_fault do_page_fault
1384#ifdef CONFIG_X86_MCE 1384#ifdef CONFIG_X86_MCE
1385paranoidzeroentry machine_check do_machine_check 1385paranoidzeroentry machine_check *machine_check_vector(%rip)
1386#endif 1386#endif
1387 1387
1388 /* 1388 /*
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index c1739ac2970..a05660bf029 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -87,7 +87,7 @@ static int show_other_interrupts(struct seq_file *p, int prec)
87 for_each_online_cpu(j) 87 for_each_online_cpu(j)
88 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); 88 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
89 seq_printf(p, " Thermal event interrupts\n"); 89 seq_printf(p, " Thermal event interrupts\n");
90# ifdef CONFIG_X86_64 90# ifdef CONFIG_X86_MCE_THRESHOLD
91 seq_printf(p, "%*s: ", prec, "THR"); 91 seq_printf(p, "%*s: ", prec, "THR");
92 for_each_online_cpu(j) 92 for_each_online_cpu(j)
93 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); 93 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
@@ -174,7 +174,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
174#endif 174#endif
175#ifdef CONFIG_X86_MCE 175#ifdef CONFIG_X86_MCE
176 sum += irq_stats(cpu)->irq_thermal_count; 176 sum += irq_stats(cpu)->irq_thermal_count;
177# ifdef CONFIG_X86_64 177# ifdef CONFIG_X86_MCE_THRESHOLD
178 sum += irq_stats(cpu)->irq_threshold_count; 178 sum += irq_stats(cpu)->irq_threshold_count;
179# endif 179# endif
180#endif 180#endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 2e08b10ad51..aab3d277766 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -181,8 +181,10 @@ static void __init apic_intr_init(void)
181{ 181{
182 smp_intr_init(); 182 smp_intr_init();
183 183
184#ifdef CONFIG_X86_64 184#ifdef CONFIG_X86_THERMAL_VECTOR
185 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 185 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
186#endif
187#ifdef CONFIG_X86_THRESHOLD
186 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); 188 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
187#endif 189#endif
188 190
@@ -204,13 +206,6 @@ static void __init apic_intr_init(void)
204# endif 206# endif
205 207
206#endif 208#endif
207
208#ifdef CONFIG_X86_32
209#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
210 /* thermal monitor LVT interrupt */
211 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
212#endif
213#endif
214} 209}
215 210
216/** 211/**
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 14425166b8e..d0851e3f77e 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -25,11 +25,11 @@
25#include <asm/ucontext.h> 25#include <asm/ucontext.h>
26#include <asm/i387.h> 26#include <asm/i387.h>
27#include <asm/vdso.h> 27#include <asm/vdso.h>
28#include <asm/mce.h>
28 29
29#ifdef CONFIG_X86_64 30#ifdef CONFIG_X86_64
30#include <asm/proto.h> 31#include <asm/proto.h>
31#include <asm/ia32_unistd.h> 32#include <asm/ia32_unistd.h>
32#include <asm/mce.h>
33#endif /* CONFIG_X86_64 */ 33#endif /* CONFIG_X86_64 */
34 34
35#include <asm/syscall.h> 35#include <asm/syscall.h>
@@ -857,7 +857,7 @@ static void do_signal(struct pt_regs *regs)
857void 857void
858do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) 858do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
859{ 859{
860#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) 860#ifdef CONFIG_X86_NEW_MCE
861 /* notify userspace of pending MCEs */ 861 /* notify userspace of pending MCEs */
862 if (thread_info_flags & _TIF_MCE_NOTIFY) 862 if (thread_info_flags & _TIF_MCE_NOTIFY)
863 mce_notify_user(); 863 mce_notify_user();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 2310700faca..f4d683b630b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -798,15 +798,15 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
798 798
799 return new_kesp; 799 return new_kesp;
800} 800}
801#else 801#endif
802
802asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) 803asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
803{ 804{
804} 805}
805 806
806asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) 807asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
807{ 808{
808} 809}
809#endif
810 810
811/* 811/*
812 * 'math_state_restore()' saves the current math information in the 812 * 'math_state_restore()' saves the current math information in the