diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-13 16:14:51 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-13 16:14:51 -0400 |
commit | a2ee2981ae2a7046b10980feae9f4ab813877106 (patch) | |
tree | ed75db7830b9ef1342659d36d2775954ce96b79f /arch/x86/include | |
parent | 7603ef03a22a33d36d3c75d7c1aca1f957671ad3 (diff) | |
parent | 0d5959723e1db3fd7323c198a50c16cecf96c7a9 (diff) |
Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (80 commits)
x86, mce: Add boot options for corrected errors
x86, mce: Fix mce printing
x86, mce: fix for mce counters
x86, mce: support action-optional machine checks
x86, mce: define MCE_VECTOR
x86, mce: rename mce_notify_user to mce_notify_irq
x86: fix panic with interrupts off (needed for MCE)
x86, mce: export MCE severities coverage via debugfs
x86, mce: implement new status bits
x86, mce: print header/footer only once for multiple MCEs
x86, mce: default to panic timeout for machine checks
x86, mce: improve mce_get_rip
x86, mce: make non Monarch panic message "Fatal machine check" too
x86, mce: switch x86 machine check handler to Monarch election.
x86, mce: implement panic synchronization
x86, mce: implement bootstrapping for machine check wakeups
x86, mce: check early in exception handler if panic is needed
x86, mce: add table driven machine check grading
x86, mce: remove TSC print heuristic
x86, mce: log corrected errors when panicing
...
Diffstat (limited to 'arch/x86/include')
-rw-r--r-- | arch/x86/include/asm/entry_arch.h | 11 | ||||
-rw-r--r-- | arch/x86/include/asm/hardirq.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/hw_irq.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/irq_vectors.h | 17 | ||||
-rw-r--r-- | arch/x86/include/asm/mce.h | 88 | ||||
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 7 |
6 files changed, 85 insertions, 42 deletions
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index d750a10ccad6..ff8cbfa07851 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -14,6 +14,7 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) | |||
14 | BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) | 14 | BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) |
15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) | 15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) |
16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) | 16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) |
17 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) | ||
17 | 18 | ||
18 | BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0, | 19 | BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0, |
19 | smp_invalidate_interrupt) | 20 | smp_invalidate_interrupt) |
@@ -52,8 +53,16 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) | |||
52 | BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) | 53 | BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) |
53 | #endif | 54 | #endif |
54 | 55 | ||
55 | #ifdef CONFIG_X86_MCE_P4THERMAL | 56 | #ifdef CONFIG_X86_THERMAL_VECTOR |
56 | BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) | 57 | BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) |
57 | #endif | 58 | #endif |
58 | 59 | ||
60 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
61 | BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) | ||
62 | #endif | ||
63 | |||
64 | #ifdef CONFIG_X86_NEW_MCE | ||
65 | BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR) | ||
66 | #endif | ||
67 | |||
59 | #endif | 68 | #endif |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 9ebc5c255032..82e3e8f01043 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -22,7 +22,7 @@ typedef struct { | |||
22 | #endif | 22 | #endif |
23 | #ifdef CONFIG_X86_MCE | 23 | #ifdef CONFIG_X86_MCE |
24 | unsigned int irq_thermal_count; | 24 | unsigned int irq_thermal_count; |
25 | # ifdef CONFIG_X86_64 | 25 | # ifdef CONFIG_X86_MCE_THRESHOLD |
26 | unsigned int irq_threshold_count; | 26 | unsigned int irq_threshold_count; |
27 | # endif | 27 | # endif |
28 | #endif | 28 | #endif |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 6df45f639666..ba180d93b08c 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -34,6 +34,7 @@ extern void perf_pending_interrupt(void); | |||
34 | extern void spurious_interrupt(void); | 34 | extern void spurious_interrupt(void); |
35 | extern void thermal_interrupt(void); | 35 | extern void thermal_interrupt(void); |
36 | extern void reschedule_interrupt(void); | 36 | extern void reschedule_interrupt(void); |
37 | extern void mce_self_interrupt(void); | ||
37 | 38 | ||
38 | extern void invalidate_interrupt(void); | 39 | extern void invalidate_interrupt(void); |
39 | extern void invalidate_interrupt0(void); | 40 | extern void invalidate_interrupt0(void); |
@@ -46,6 +47,7 @@ extern void invalidate_interrupt6(void); | |||
46 | extern void invalidate_interrupt7(void); | 47 | extern void invalidate_interrupt7(void); |
47 | 48 | ||
48 | extern void irq_move_cleanup_interrupt(void); | 49 | extern void irq_move_cleanup_interrupt(void); |
50 | extern void reboot_interrupt(void); | ||
49 | extern void threshold_interrupt(void); | 51 | extern void threshold_interrupt(void); |
50 | 52 | ||
51 | extern void call_function_interrupt(void); | 53 | extern void call_function_interrupt(void); |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index e997be98c9b9..5b21f0ec3df2 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -25,6 +25,7 @@ | |||
25 | */ | 25 | */ |
26 | 26 | ||
27 | #define NMI_VECTOR 0x02 | 27 | #define NMI_VECTOR 0x02 |
28 | #define MCE_VECTOR 0x12 | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * IDT vectors usable for external interrupt sources start | 31 | * IDT vectors usable for external interrupt sources start |
@@ -87,13 +88,8 @@ | |||
87 | #define CALL_FUNCTION_VECTOR 0xfc | 88 | #define CALL_FUNCTION_VECTOR 0xfc |
88 | #define CALL_FUNCTION_SINGLE_VECTOR 0xfb | 89 | #define CALL_FUNCTION_SINGLE_VECTOR 0xfb |
89 | #define THERMAL_APIC_VECTOR 0xfa | 90 | #define THERMAL_APIC_VECTOR 0xfa |
90 | 91 | #define THRESHOLD_APIC_VECTOR 0xf9 | |
91 | #ifdef CONFIG_X86_32 | 92 | #define REBOOT_VECTOR 0xf8 |
92 | /* 0xf8 - 0xf9 : free */ | ||
93 | #else | ||
94 | # define THRESHOLD_APIC_VECTOR 0xf9 | ||
95 | # define UV_BAU_MESSAGE 0xf8 | ||
96 | #endif | ||
97 | 93 | ||
98 | /* f0-f7 used for spreading out TLB flushes: */ | 94 | /* f0-f7 used for spreading out TLB flushes: */ |
99 | #define INVALIDATE_TLB_VECTOR_END 0xf7 | 95 | #define INVALIDATE_TLB_VECTOR_END 0xf7 |
@@ -117,6 +113,13 @@ | |||
117 | */ | 113 | */ |
118 | #define LOCAL_PENDING_VECTOR 0xec | 114 | #define LOCAL_PENDING_VECTOR 0xec |
119 | 115 | ||
116 | #define UV_BAU_MESSAGE 0xec | ||
117 | |||
118 | /* | ||
119 | * Self IPI vector for machine checks | ||
120 | */ | ||
121 | #define MCE_SELF_VECTOR 0xeb | ||
122 | |||
120 | /* | 123 | /* |
121 | * First APIC vector available to drivers: (vectors 0x30-0xee) we | 124 | * First APIC vector available to drivers: (vectors 0x30-0xee) we |
122 | * start at 0x31(0x41) to spread out vectors evenly between priority | 125 | * start at 0x31(0x41) to spread out vectors evenly between priority |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4f8c199584e7..540a466e50f5 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -1,8 +1,6 @@ | |||
1 | #ifndef _ASM_X86_MCE_H | 1 | #ifndef _ASM_X86_MCE_H |
2 | #define _ASM_X86_MCE_H | 2 | #define _ASM_X86_MCE_H |
3 | 3 | ||
4 | #ifdef __x86_64__ | ||
5 | |||
6 | #include <linux/types.h> | 4 | #include <linux/types.h> |
7 | #include <asm/ioctls.h> | 5 | #include <asm/ioctls.h> |
8 | 6 | ||
@@ -10,21 +8,35 @@ | |||
10 | * Machine Check support for x86 | 8 | * Machine Check support for x86 |
11 | */ | 9 | */ |
12 | 10 | ||
13 | #define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */ | 11 | #define MCG_BANKCNT_MASK 0xff /* Number of Banks */ |
14 | #define MCG_EXT_P (1ULL<<9) /* Extended registers available */ | 12 | #define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ |
15 | #define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ | 13 | #define MCG_EXT_P (1ULL<<9) /* Extended registers available */ |
16 | 14 | #define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ | |
17 | #define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */ | 15 | #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ |
18 | #define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */ | 16 | #define MCG_EXT_CNT_SHIFT 16 |
19 | #define MCG_STATUS_MCIP (1UL<<2) /* machine check in progress */ | 17 | #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) |
20 | 18 | #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ | |
21 | #define MCI_STATUS_VAL (1UL<<63) /* valid error */ | 19 | |
22 | #define MCI_STATUS_OVER (1UL<<62) /* previous errors lost */ | 20 | #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ |
23 | #define MCI_STATUS_UC (1UL<<61) /* uncorrected error */ | 21 | #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ |
24 | #define MCI_STATUS_EN (1UL<<60) /* error enabled */ | 22 | #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ |
25 | #define MCI_STATUS_MISCV (1UL<<59) /* misc error reg. valid */ | 23 | |
26 | #define MCI_STATUS_ADDRV (1UL<<58) /* addr reg. valid */ | 24 | #define MCI_STATUS_VAL (1ULL<<63) /* valid error */ |
27 | #define MCI_STATUS_PCC (1UL<<57) /* processor context corrupt */ | 25 | #define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ |
26 | #define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ | ||
27 | #define MCI_STATUS_EN (1ULL<<60) /* error enabled */ | ||
28 | #define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ | ||
29 | #define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ | ||
30 | #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ | ||
31 | #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ | ||
32 | #define MCI_STATUS_AR (1ULL<<55) /* Action required */ | ||
33 | |||
34 | /* MISC register defines */ | ||
35 | #define MCM_ADDR_SEGOFF 0 /* segment offset */ | ||
36 | #define MCM_ADDR_LINEAR 1 /* linear address */ | ||
37 | #define MCM_ADDR_PHYS 2 /* physical address */ | ||
38 | #define MCM_ADDR_MEM 3 /* memory address */ | ||
39 | #define MCM_ADDR_GENERIC 7 /* generic */ | ||
28 | 40 | ||
29 | /* Fields are zero when not available */ | 41 | /* Fields are zero when not available */ |
30 | struct mce { | 42 | struct mce { |
@@ -34,13 +46,19 @@ struct mce { | |||
34 | __u64 mcgstatus; | 46 | __u64 mcgstatus; |
35 | __u64 ip; | 47 | __u64 ip; |
36 | __u64 tsc; /* cpu time stamp counter */ | 48 | __u64 tsc; /* cpu time stamp counter */ |
37 | __u64 res1; /* for future extension */ | 49 | __u64 time; /* wall time_t when error was detected */ |
38 | __u64 res2; /* dito. */ | 50 | __u8 cpuvendor; /* cpu vendor as encoded in system.h */ |
51 | __u8 pad1; | ||
52 | __u16 pad2; | ||
53 | __u32 cpuid; /* CPUID 1 EAX */ | ||
39 | __u8 cs; /* code segment */ | 54 | __u8 cs; /* code segment */ |
40 | __u8 bank; /* machine check bank */ | 55 | __u8 bank; /* machine check bank */ |
41 | __u8 cpu; /* cpu that raised the error */ | 56 | __u8 cpu; /* cpu number; obsolete; use extcpu now */ |
42 | __u8 finished; /* entry is valid */ | 57 | __u8 finished; /* entry is valid */ |
43 | __u32 pad; | 58 | __u32 extcpu; /* linux cpu number that detected the error */ |
59 | __u32 socketid; /* CPU socket ID */ | ||
60 | __u32 apicid; /* CPU initial apic ID */ | ||
61 | __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ | ||
44 | }; | 62 | }; |
45 | 63 | ||
46 | /* | 64 | /* |
@@ -57,7 +75,7 @@ struct mce_log { | |||
57 | unsigned len; /* = MCE_LOG_LEN */ | 75 | unsigned len; /* = MCE_LOG_LEN */ |
58 | unsigned next; | 76 | unsigned next; |
59 | unsigned flags; | 77 | unsigned flags; |
60 | unsigned pad0; | 78 | unsigned recordlen; /* length of struct mce */ |
61 | struct mce entry[MCE_LOG_LEN]; | 79 | struct mce entry[MCE_LOG_LEN]; |
62 | }; | 80 | }; |
63 | 81 | ||
@@ -82,19 +100,16 @@ struct mce_log { | |||
82 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) | 100 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) |
83 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) | 101 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) |
84 | 102 | ||
85 | #endif /* __x86_64__ */ | ||
86 | |||
87 | #ifdef __KERNEL__ | 103 | #ifdef __KERNEL__ |
88 | 104 | ||
89 | #ifdef CONFIG_X86_32 | ||
90 | extern int mce_disabled; | 105 | extern int mce_disabled; |
91 | #else /* CONFIG_X86_32 */ | ||
92 | 106 | ||
93 | #include <asm/atomic.h> | 107 | #include <asm/atomic.h> |
108 | #include <linux/percpu.h> | ||
94 | 109 | ||
95 | void mce_setup(struct mce *m); | 110 | void mce_setup(struct mce *m); |
96 | void mce_log(struct mce *m); | 111 | void mce_log(struct mce *m); |
97 | DECLARE_PER_CPU(struct sys_device, device_mce); | 112 | DECLARE_PER_CPU(struct sys_device, mce_dev); |
98 | extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | 113 | extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); |
99 | 114 | ||
100 | /* | 115 | /* |
@@ -104,6 +119,8 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | |||
104 | #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) | 119 | #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) |
105 | 120 | ||
106 | #ifdef CONFIG_X86_MCE_INTEL | 121 | #ifdef CONFIG_X86_MCE_INTEL |
122 | extern int mce_cmci_disabled; | ||
123 | extern int mce_ignore_ce; | ||
107 | void mce_intel_feature_init(struct cpuinfo_x86 *c); | 124 | void mce_intel_feature_init(struct cpuinfo_x86 *c); |
108 | void cmci_clear(void); | 125 | void cmci_clear(void); |
109 | void cmci_reenable(void); | 126 | void cmci_reenable(void); |
@@ -123,13 +140,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c); | |||
123 | static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } | 140 | static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } |
124 | #endif | 141 | #endif |
125 | 142 | ||
126 | extern int mce_available(struct cpuinfo_x86 *c); | 143 | int mce_available(struct cpuinfo_x86 *c); |
144 | |||
145 | DECLARE_PER_CPU(unsigned, mce_exception_count); | ||
146 | DECLARE_PER_CPU(unsigned, mce_poll_count); | ||
127 | 147 | ||
128 | void mce_log_therm_throt_event(__u64 status); | 148 | void mce_log_therm_throt_event(__u64 status); |
129 | 149 | ||
130 | extern atomic_t mce_entry; | 150 | extern atomic_t mce_entry; |
131 | 151 | ||
132 | extern void do_machine_check(struct pt_regs *, long); | 152 | void do_machine_check(struct pt_regs *, long); |
133 | 153 | ||
134 | typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); | 154 | typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); |
135 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); | 155 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); |
@@ -139,14 +159,16 @@ enum mcp_flags { | |||
139 | MCP_UC = (1 << 1), /* log uncorrected errors */ | 159 | MCP_UC = (1 << 1), /* log uncorrected errors */ |
140 | MCP_DONTLOG = (1 << 2), /* only clear, don't log */ | 160 | MCP_DONTLOG = (1 << 2), /* only clear, don't log */ |
141 | }; | 161 | }; |
142 | extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); | 162 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); |
143 | 163 | ||
144 | extern int mce_notify_user(void); | 164 | int mce_notify_irq(void); |
165 | void mce_notify_process(void); | ||
145 | 166 | ||
146 | #endif /* !CONFIG_X86_32 */ | 167 | DECLARE_PER_CPU(struct mce, injectm); |
168 | extern struct file_operations mce_chrdev_ops; | ||
147 | 169 | ||
148 | #ifdef CONFIG_X86_MCE | 170 | #ifdef CONFIG_X86_MCE |
149 | extern void mcheck_init(struct cpuinfo_x86 *c); | 171 | void mcheck_init(struct cpuinfo_x86 *c); |
150 | #else | 172 | #else |
151 | #define mcheck_init(c) do { } while (0) | 173 | #define mcheck_init(c) do { } while (0) |
152 | #endif | 174 | #endif |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4d58d04fca83..1692fb5050e3 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -207,7 +207,14 @@ | |||
207 | 207 | ||
208 | #define MSR_IA32_THERM_CONTROL 0x0000019a | 208 | #define MSR_IA32_THERM_CONTROL 0x0000019a |
209 | #define MSR_IA32_THERM_INTERRUPT 0x0000019b | 209 | #define MSR_IA32_THERM_INTERRUPT 0x0000019b |
210 | |||
211 | #define THERM_INT_LOW_ENABLE (1 << 0) | ||
212 | #define THERM_INT_HIGH_ENABLE (1 << 1) | ||
213 | |||
210 | #define MSR_IA32_THERM_STATUS 0x0000019c | 214 | #define MSR_IA32_THERM_STATUS 0x0000019c |
215 | |||
216 | #define THERM_STATUS_PROCHOT (1 << 0) | ||
217 | |||
211 | #define MSR_IA32_MISC_ENABLE 0x000001a0 | 218 | #define MSR_IA32_MISC_ENABLE 0x000001a0 |
212 | 219 | ||
213 | /* MISC_ENABLE bits: architectural */ | 220 | /* MISC_ENABLE bits: architectural */ |