diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-06 19:24:51 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-06 19:24:51 -0400 |
commit | e8779776afbd5f2d5315cf48c4257ca7e9b250fb (patch) | |
tree | a439733c6c71dd7d24ba65557159c30c2e49861a | |
parent | 3cf8ad3394b8675e92a35c438f22341197535531 (diff) | |
parent | a2d7b0d4852536273b65d16fe179c65184fe5e2d (diff) |
Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86, mce: Use HW_ERR in MCE handler
x86, mce: Add HW_ERR printk prefix for hardware error logging
x86, mce: Fix MSR_IA32_MCI_CTL2 CMCI threshold setup
x86, mce: Rename MSR_IA32_MCx_CTL2 value
-rw-r--r-- | arch/x86/include/asm/mce.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 32 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 9 | ||||
-rw-r--r-- | include/linux/kernel.h | 7 |
5 files changed, 26 insertions, 29 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f32a4301c4d4..c62c13cb9788 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -38,6 +38,10 @@ | |||
38 | #define MCM_ADDR_MEM 3 /* memory address */ | 38 | #define MCM_ADDR_MEM 3 /* memory address */ |
39 | #define MCM_ADDR_GENERIC 7 /* generic */ | 39 | #define MCM_ADDR_GENERIC 7 /* generic */ |
40 | 40 | ||
41 | /* CTL2 register defines */ | ||
42 | #define MCI_CTL2_CMCI_EN (1ULL << 30) | ||
43 | #define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL | ||
44 | |||
41 | #define MCJ_CTX_MASK 3 | 45 | #define MCJ_CTX_MASK 3 |
42 | #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) | 46 | #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) |
43 | #define MCJ_CTX_RANDOM 0 /* inject context: random */ | 47 | #define MCJ_CTX_RANDOM 0 /* inject context: random */ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 65bbec2093aa..986f7790fdb2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -96,9 +96,6 @@ | |||
96 | #define MSR_IA32_MC0_CTL2 0x00000280 | 96 | #define MSR_IA32_MC0_CTL2 0x00000280 |
97 | #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) | 97 | #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) |
98 | 98 | ||
99 | #define CMCI_EN (1ULL << 30) | ||
100 | #define CMCI_THRESHOLD_MASK 0xffffULL | ||
101 | |||
102 | #define MSR_P6_PERFCTR0 0x000000c1 | 99 | #define MSR_P6_PERFCTR0 0x000000c1 |
103 | #define MSR_P6_PERFCTR1 0x000000c2 | 100 | #define MSR_P6_PERFCTR1 0x000000c2 |
104 | #define MSR_P6_EVNTSEL0 0x00000186 | 101 | #define MSR_P6_EVNTSEL0 0x00000186 |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e1269d62c569..ed41562909fe 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -107,8 +107,8 @@ EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | |||
107 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, | 107 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, |
108 | void *data) | 108 | void *data) |
109 | { | 109 | { |
110 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); | 110 | pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n"); |
111 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); | 111 | pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n"); |
112 | 112 | ||
113 | return NOTIFY_STOP; | 113 | return NOTIFY_STOP; |
114 | } | 114 | } |
@@ -211,11 +211,11 @@ void mce_log(struct mce *mce) | |||
211 | 211 | ||
212 | static void print_mce(struct mce *m) | 212 | static void print_mce(struct mce *m) |
213 | { | 213 | { |
214 | pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | 214 | pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", |
215 | m->extcpu, m->mcgstatus, m->bank, m->status); | 215 | m->extcpu, m->mcgstatus, m->bank, m->status); |
216 | 216 | ||
217 | if (m->ip) { | 217 | if (m->ip) { |
218 | pr_emerg("RIP%s %02x:<%016Lx> ", | 218 | pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", |
219 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | 219 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", |
220 | m->cs, m->ip); | 220 | m->cs, m->ip); |
221 | 221 | ||
@@ -224,14 +224,14 @@ static void print_mce(struct mce *m) | |||
224 | pr_cont("\n"); | 224 | pr_cont("\n"); |
225 | } | 225 | } |
226 | 226 | ||
227 | pr_emerg("TSC %llx ", m->tsc); | 227 | pr_emerg(HW_ERR "TSC %llx ", m->tsc); |
228 | if (m->addr) | 228 | if (m->addr) |
229 | pr_cont("ADDR %llx ", m->addr); | 229 | pr_cont("ADDR %llx ", m->addr); |
230 | if (m->misc) | 230 | if (m->misc) |
231 | pr_cont("MISC %llx ", m->misc); | 231 | pr_cont("MISC %llx ", m->misc); |
232 | 232 | ||
233 | pr_cont("\n"); | 233 | pr_cont("\n"); |
234 | pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | 234 | pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", |
235 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); | 235 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); |
236 | 236 | ||
237 | /* | 237 | /* |
@@ -241,16 +241,6 @@ static void print_mce(struct mce *m) | |||
241 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); | 241 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); |
242 | } | 242 | } |
243 | 243 | ||
244 | static void print_mce_head(void) | ||
245 | { | ||
246 | pr_emerg("\nHARDWARE ERROR\n"); | ||
247 | } | ||
248 | |||
249 | static void print_mce_tail(void) | ||
250 | { | ||
251 | pr_emerg("This is not a software problem!\n"); | ||
252 | } | ||
253 | |||
254 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | 244 | #define PANIC_TIMEOUT 5 /* 5 seconds */ |
255 | 245 | ||
256 | static atomic_t mce_paniced; | 246 | static atomic_t mce_paniced; |
@@ -291,7 +281,6 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
291 | if (atomic_inc_return(&mce_fake_paniced) > 1) | 281 | if (atomic_inc_return(&mce_fake_paniced) > 1) |
292 | return; | 282 | return; |
293 | } | 283 | } |
294 | print_mce_head(); | ||
295 | /* First print corrected ones that are still unlogged */ | 284 | /* First print corrected ones that are still unlogged */ |
296 | for (i = 0; i < MCE_LOG_LEN; i++) { | 285 | for (i = 0; i < MCE_LOG_LEN; i++) { |
297 | struct mce *m = &mcelog.entry[i]; | 286 | struct mce *m = &mcelog.entry[i]; |
@@ -322,16 +311,15 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
322 | apei_err = apei_write_mce(final); | 311 | apei_err = apei_write_mce(final); |
323 | } | 312 | } |
324 | if (cpu_missing) | 313 | if (cpu_missing) |
325 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); | 314 | pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n"); |
326 | print_mce_tail(); | ||
327 | if (exp) | 315 | if (exp) |
328 | printk(KERN_EMERG "Machine check: %s\n", exp); | 316 | pr_emerg(HW_ERR "Machine check: %s\n", exp); |
329 | if (!fake_panic) { | 317 | if (!fake_panic) { |
330 | if (panic_timeout == 0) | 318 | if (panic_timeout == 0) |
331 | panic_timeout = mce_panic_timeout; | 319 | panic_timeout = mce_panic_timeout; |
332 | panic(msg); | 320 | panic(msg); |
333 | } else | 321 | } else |
334 | printk(KERN_EMERG "Fake kernel panic: %s\n", msg); | 322 | pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); |
335 | } | 323 | } |
336 | 324 | ||
337 | /* Support code for software error injection */ | 325 | /* Support code for software error injection */ |
@@ -1221,7 +1209,7 @@ int mce_notify_irq(void) | |||
1221 | schedule_work(&mce_trigger_work); | 1209 | schedule_work(&mce_trigger_work); |
1222 | 1210 | ||
1223 | if (__ratelimit(&ratelimit)) | 1211 | if (__ratelimit(&ratelimit)) |
1224 | printk(KERN_INFO "Machine check events logged\n"); | 1212 | pr_info(HW_ERR "Machine check events logged\n"); |
1225 | 1213 | ||
1226 | return 1; | 1214 | return 1; |
1227 | } | 1215 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 62b48e40920a..6fcd0936194f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -95,19 +95,20 @@ static void cmci_discover(int banks, int boot) | |||
95 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 95 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
96 | 96 | ||
97 | /* Already owned by someone else? */ | 97 | /* Already owned by someone else? */ |
98 | if (val & CMCI_EN) { | 98 | if (val & MCI_CTL2_CMCI_EN) { |
99 | if (test_and_clear_bit(i, owned) && !boot) | 99 | if (test_and_clear_bit(i, owned) && !boot) |
100 | print_update("SHD", &hdr, i); | 100 | print_update("SHD", &hdr, i); |
101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
102 | continue; | 102 | continue; |
103 | } | 103 | } |
104 | 104 | ||
105 | val |= CMCI_EN | CMCI_THRESHOLD; | 105 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
106 | val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; | ||
106 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 107 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
107 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 108 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
108 | 109 | ||
109 | /* Did the enable bit stick? -- the bank supports CMCI */ | 110 | /* Did the enable bit stick? -- the bank supports CMCI */ |
110 | if (val & CMCI_EN) { | 111 | if (val & MCI_CTL2_CMCI_EN) { |
111 | if (!test_and_set_bit(i, owned) && !boot) | 112 | if (!test_and_set_bit(i, owned) && !boot) |
112 | print_update("CMCI", &hdr, i); | 113 | print_update("CMCI", &hdr, i); |
113 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 114 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
@@ -155,7 +156,7 @@ void cmci_clear(void) | |||
155 | continue; | 156 | continue; |
156 | /* Disable CMCI */ | 157 | /* Disable CMCI */ |
157 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 158 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
158 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | 159 | val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); |
159 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 160 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
160 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 161 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
161 | } | 162 | } |
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 38e462e00594..7d5b10ff63e0 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
@@ -252,6 +252,13 @@ extern struct pid *session_of_pgrp(struct pid *pgrp); | |||
252 | #define FW_WARN "[Firmware Warn]: " | 252 | #define FW_WARN "[Firmware Warn]: " |
253 | #define FW_INFO "[Firmware Info]: " | 253 | #define FW_INFO "[Firmware Info]: " |
254 | 254 | ||
255 | /* | ||
256 | * HW_ERR | ||
257 | * Add this to a message for hardware errors, so that user can report | ||
258 | * it to hardware vendor instead of LKML or software vendor. | ||
259 | */ | ||
260 | #define HW_ERR "[Hardware Error]: " | ||
261 | |||
255 | #ifdef CONFIG_PRINTK | 262 | #ifdef CONFIG_PRINTK |
256 | asmlinkage int vprintk(const char *fmt, va_list args) | 263 | asmlinkage int vprintk(const char *fmt, va_list args) |
257 | __attribute__ ((format (printf, 1, 0))); | 264 | __attribute__ ((format (printf, 1, 0))); |