diff options
| -rw-r--r-- | arch/x86/include/asm/mce.h | 4 | ||||
| -rw-r--r-- | arch/x86/include/asm/msr-index.h | 3 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 32 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 9 | ||||
| -rw-r--r-- | include/linux/kernel.h | 7 |
5 files changed, 26 insertions, 29 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f32a4301c4d4..c62c13cb9788 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
| @@ -38,6 +38,10 @@ | |||
| 38 | #define MCM_ADDR_MEM 3 /* memory address */ | 38 | #define MCM_ADDR_MEM 3 /* memory address */ |
| 39 | #define MCM_ADDR_GENERIC 7 /* generic */ | 39 | #define MCM_ADDR_GENERIC 7 /* generic */ |
| 40 | 40 | ||
| 41 | /* CTL2 register defines */ | ||
| 42 | #define MCI_CTL2_CMCI_EN (1ULL << 30) | ||
| 43 | #define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL | ||
| 44 | |||
| 41 | #define MCJ_CTX_MASK 3 | 45 | #define MCJ_CTX_MASK 3 |
| 42 | #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) | 46 | #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) |
| 43 | #define MCJ_CTX_RANDOM 0 /* inject context: random */ | 47 | #define MCJ_CTX_RANDOM 0 /* inject context: random */ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 65bbec2093aa..986f7790fdb2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
| @@ -96,9 +96,6 @@ | |||
| 96 | #define MSR_IA32_MC0_CTL2 0x00000280 | 96 | #define MSR_IA32_MC0_CTL2 0x00000280 |
| 97 | #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) | 97 | #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) |
| 98 | 98 | ||
| 99 | #define CMCI_EN (1ULL << 30) | ||
| 100 | #define CMCI_THRESHOLD_MASK 0xffffULL | ||
| 101 | |||
| 102 | #define MSR_P6_PERFCTR0 0x000000c1 | 99 | #define MSR_P6_PERFCTR0 0x000000c1 |
| 103 | #define MSR_P6_PERFCTR1 0x000000c2 | 100 | #define MSR_P6_PERFCTR1 0x000000c2 |
| 104 | #define MSR_P6_EVNTSEL0 0x00000186 | 101 | #define MSR_P6_EVNTSEL0 0x00000186 |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e1269d62c569..ed41562909fe 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
| @@ -107,8 +107,8 @@ EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | |||
| 107 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, | 107 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, |
| 108 | void *data) | 108 | void *data) |
| 109 | { | 109 | { |
| 110 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); | 110 | pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n"); |
| 111 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); | 111 | pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n"); |
| 112 | 112 | ||
| 113 | return NOTIFY_STOP; | 113 | return NOTIFY_STOP; |
| 114 | } | 114 | } |
| @@ -211,11 +211,11 @@ void mce_log(struct mce *mce) | |||
| 211 | 211 | ||
| 212 | static void print_mce(struct mce *m) | 212 | static void print_mce(struct mce *m) |
| 213 | { | 213 | { |
| 214 | pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | 214 | pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", |
| 215 | m->extcpu, m->mcgstatus, m->bank, m->status); | 215 | m->extcpu, m->mcgstatus, m->bank, m->status); |
| 216 | 216 | ||
| 217 | if (m->ip) { | 217 | if (m->ip) { |
| 218 | pr_emerg("RIP%s %02x:<%016Lx> ", | 218 | pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", |
| 219 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | 219 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", |
| 220 | m->cs, m->ip); | 220 | m->cs, m->ip); |
| 221 | 221 | ||
| @@ -224,14 +224,14 @@ static void print_mce(struct mce *m) | |||
| 224 | pr_cont("\n"); | 224 | pr_cont("\n"); |
| 225 | } | 225 | } |
| 226 | 226 | ||
| 227 | pr_emerg("TSC %llx ", m->tsc); | 227 | pr_emerg(HW_ERR "TSC %llx ", m->tsc); |
| 228 | if (m->addr) | 228 | if (m->addr) |
| 229 | pr_cont("ADDR %llx ", m->addr); | 229 | pr_cont("ADDR %llx ", m->addr); |
| 230 | if (m->misc) | 230 | if (m->misc) |
| 231 | pr_cont("MISC %llx ", m->misc); | 231 | pr_cont("MISC %llx ", m->misc); |
| 232 | 232 | ||
| 233 | pr_cont("\n"); | 233 | pr_cont("\n"); |
| 234 | pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | 234 | pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", |
| 235 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); | 235 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); |
| 236 | 236 | ||
| 237 | /* | 237 | /* |
| @@ -241,16 +241,6 @@ static void print_mce(struct mce *m) | |||
| 241 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); | 241 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); |
| 242 | } | 242 | } |
| 243 | 243 | ||
| 244 | static void print_mce_head(void) | ||
| 245 | { | ||
| 246 | pr_emerg("\nHARDWARE ERROR\n"); | ||
| 247 | } | ||
| 248 | |||
| 249 | static void print_mce_tail(void) | ||
| 250 | { | ||
| 251 | pr_emerg("This is not a software problem!\n"); | ||
| 252 | } | ||
| 253 | |||
| 254 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | 244 | #define PANIC_TIMEOUT 5 /* 5 seconds */ |
| 255 | 245 | ||
| 256 | static atomic_t mce_paniced; | 246 | static atomic_t mce_paniced; |
| @@ -291,7 +281,6 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
| 291 | if (atomic_inc_return(&mce_fake_paniced) > 1) | 281 | if (atomic_inc_return(&mce_fake_paniced) > 1) |
| 292 | return; | 282 | return; |
| 293 | } | 283 | } |
| 294 | print_mce_head(); | ||
| 295 | /* First print corrected ones that are still unlogged */ | 284 | /* First print corrected ones that are still unlogged */ |
| 296 | for (i = 0; i < MCE_LOG_LEN; i++) { | 285 | for (i = 0; i < MCE_LOG_LEN; i++) { |
| 297 | struct mce *m = &mcelog.entry[i]; | 286 | struct mce *m = &mcelog.entry[i]; |
| @@ -322,16 +311,15 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
| 322 | apei_err = apei_write_mce(final); | 311 | apei_err = apei_write_mce(final); |
| 323 | } | 312 | } |
| 324 | if (cpu_missing) | 313 | if (cpu_missing) |
| 325 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); | 314 | pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n"); |
| 326 | print_mce_tail(); | ||
| 327 | if (exp) | 315 | if (exp) |
| 328 | printk(KERN_EMERG "Machine check: %s\n", exp); | 316 | pr_emerg(HW_ERR "Machine check: %s\n", exp); |
| 329 | if (!fake_panic) { | 317 | if (!fake_panic) { |
| 330 | if (panic_timeout == 0) | 318 | if (panic_timeout == 0) |
| 331 | panic_timeout = mce_panic_timeout; | 319 | panic_timeout = mce_panic_timeout; |
| 332 | panic(msg); | 320 | panic(msg); |
| 333 | } else | 321 | } else |
| 334 | printk(KERN_EMERG "Fake kernel panic: %s\n", msg); | 322 | pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); |
| 335 | } | 323 | } |
| 336 | 324 | ||
| 337 | /* Support code for software error injection */ | 325 | /* Support code for software error injection */ |
| @@ -1221,7 +1209,7 @@ int mce_notify_irq(void) | |||
| 1221 | schedule_work(&mce_trigger_work); | 1209 | schedule_work(&mce_trigger_work); |
| 1222 | 1210 | ||
| 1223 | if (__ratelimit(&ratelimit)) | 1211 | if (__ratelimit(&ratelimit)) |
| 1224 | printk(KERN_INFO "Machine check events logged\n"); | 1212 | pr_info(HW_ERR "Machine check events logged\n"); |
| 1225 | 1213 | ||
| 1226 | return 1; | 1214 | return 1; |
| 1227 | } | 1215 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 62b48e40920a..6fcd0936194f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
| @@ -95,19 +95,20 @@ static void cmci_discover(int banks, int boot) | |||
| 95 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 95 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 96 | 96 | ||
| 97 | /* Already owned by someone else? */ | 97 | /* Already owned by someone else? */ |
| 98 | if (val & CMCI_EN) { | 98 | if (val & MCI_CTL2_CMCI_EN) { |
| 99 | if (test_and_clear_bit(i, owned) && !boot) | 99 | if (test_and_clear_bit(i, owned) && !boot) |
| 100 | print_update("SHD", &hdr, i); | 100 | print_update("SHD", &hdr, i); |
| 101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
| 102 | continue; | 102 | continue; |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | val |= CMCI_EN | CMCI_THRESHOLD; | 105 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
| 106 | val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; | ||
| 106 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 107 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 107 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 108 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 108 | 109 | ||
| 109 | /* Did the enable bit stick? -- the bank supports CMCI */ | 110 | /* Did the enable bit stick? -- the bank supports CMCI */ |
| 110 | if (val & CMCI_EN) { | 111 | if (val & MCI_CTL2_CMCI_EN) { |
| 111 | if (!test_and_set_bit(i, owned) && !boot) | 112 | if (!test_and_set_bit(i, owned) && !boot) |
| 112 | print_update("CMCI", &hdr, i); | 113 | print_update("CMCI", &hdr, i); |
| 113 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 114 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
| @@ -155,7 +156,7 @@ void cmci_clear(void) | |||
| 155 | continue; | 156 | continue; |
| 156 | /* Disable CMCI */ | 157 | /* Disable CMCI */ |
| 157 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 158 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 158 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | 159 | val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); |
| 159 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 160 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 160 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 161 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
| 161 | } | 162 | } |
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 38e462e00594..7d5b10ff63e0 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
| @@ -252,6 +252,13 @@ extern struct pid *session_of_pgrp(struct pid *pgrp); | |||
| 252 | #define FW_WARN "[Firmware Warn]: " | 252 | #define FW_WARN "[Firmware Warn]: " |
| 253 | #define FW_INFO "[Firmware Info]: " | 253 | #define FW_INFO "[Firmware Info]: " |
| 254 | 254 | ||
| 255 | /* | ||
| 256 | * HW_ERR | ||
| 257 | * Add this to a message for hardware errors, so that user can report | ||
| 258 | * it to hardware vendor instead of LKML or software vendor. | ||
| 259 | */ | ||
| 260 | #define HW_ERR "[Hardware Error]: " | ||
| 261 | |||
| 255 | #ifdef CONFIG_PRINTK | 262 | #ifdef CONFIG_PRINTK |
| 256 | asmlinkage int vprintk(const char *fmt, va_list args) | 263 | asmlinkage int vprintk(const char *fmt, va_list args) |
| 257 | __attribute__ ((format (printf, 1, 0))); | 264 | __attribute__ ((format (printf, 1, 0))); |
