diff options
| -rw-r--r-- | arch/x86/include/asm/mce.h | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 58 | ||||
| -rw-r--r-- | drivers/edac/Makefile | 2 | ||||
| -rw-r--r-- | drivers/edac/edac_mce_amd.c | 15 |
4 files changed, 53 insertions, 24 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index b608a64c5814..f1363b72364f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
| @@ -133,6 +133,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} | |||
| 133 | static inline void enable_p5_mce(void) {} | 133 | static inline void enable_p5_mce(void) {} |
| 134 | #endif | 134 | #endif |
| 135 | 135 | ||
| 136 | extern void (*x86_mce_decode_callback)(struct mce *m); | ||
| 137 | |||
| 136 | void mce_setup(struct mce *m); | 138 | void mce_setup(struct mce *m); |
| 137 | void mce_log(struct mce *m); | 139 | void mce_log(struct mce *m); |
| 138 | DECLARE_PER_CPU(struct sys_device, mce_dev); | 140 | DECLARE_PER_CPU(struct sys_device, mce_dev); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 183c3457d2f4..b1598a9436d0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
| @@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | |||
| 85 | static DEFINE_PER_CPU(struct mce, mces_seen); | 85 | static DEFINE_PER_CPU(struct mce, mces_seen); |
| 86 | static int cpu_missing; | 86 | static int cpu_missing; |
| 87 | 87 | ||
| 88 | static void default_decode_mce(struct mce *m) | ||
| 89 | { | ||
| 90 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); | ||
| 91 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); | ||
| 92 | } | ||
| 93 | |||
| 94 | /* | ||
| 95 | * CPU/chipset specific EDAC code can register a callback here to print | ||
| 96 | * MCE errors in a human-readable form: | ||
| 97 | */ | ||
| 98 | void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; | ||
| 99 | EXPORT_SYMBOL(x86_mce_decode_callback); | ||
| 88 | 100 | ||
| 89 | /* MCA banks polled by the period polling timer for corrected events */ | 101 | /* MCA banks polled by the period polling timer for corrected events */ |
| 90 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 102 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
| @@ -165,46 +177,46 @@ void mce_log(struct mce *mce) | |||
| 165 | set_bit(0, &mce_need_notify); | 177 | set_bit(0, &mce_need_notify); |
| 166 | } | 178 | } |
| 167 | 179 | ||
| 168 | void __weak decode_mce(struct mce *m) | ||
| 169 | { | ||
| 170 | return; | ||
| 171 | } | ||
| 172 | |||
| 173 | static void print_mce(struct mce *m) | 180 | static void print_mce(struct mce *m) |
| 174 | { | 181 | { |
| 175 | printk(KERN_EMERG | 182 | pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", |
| 176 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | ||
| 177 | m->extcpu, m->mcgstatus, m->bank, m->status); | 183 | m->extcpu, m->mcgstatus, m->bank, m->status); |
| 184 | |||
| 178 | if (m->ip) { | 185 | if (m->ip) { |
| 179 | printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", | 186 | pr_emerg("RIP%s %02x:<%016Lx> ", |
| 180 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | 187 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", |
| 181 | m->cs, m->ip); | 188 | m->cs, m->ip); |
| 189 | |||
| 182 | if (m->cs == __KERNEL_CS) | 190 | if (m->cs == __KERNEL_CS) |
| 183 | print_symbol("{%s}", m->ip); | 191 | print_symbol("{%s}", m->ip); |
| 184 | printk(KERN_CONT "\n"); | 192 | pr_cont("\n"); |
| 185 | } | 193 | } |
| 186 | printk(KERN_EMERG "TSC %llx ", m->tsc); | 194 | |
| 195 | pr_emerg("TSC %llx ", m->tsc); | ||
| 187 | if (m->addr) | 196 | if (m->addr) |
| 188 | printk(KERN_CONT "ADDR %llx ", m->addr); | 197 | pr_cont("ADDR %llx ", m->addr); |
| 189 | if (m->misc) | 198 | if (m->misc) |
| 190 | printk(KERN_CONT "MISC %llx ", m->misc); | 199 | pr_cont("MISC %llx ", m->misc); |
| 191 | printk(KERN_CONT "\n"); | 200 | |
| 192 | printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | 201 | pr_cont("\n"); |
| 193 | m->cpuvendor, m->cpuid, m->time, m->socketid, | 202 | pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", |
| 194 | m->apicid); | 203 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); |
| 195 | 204 | ||
| 196 | decode_mce(m); | 205 | /* |
| 206 | * Print out human-readable details about the MCE error, | ||
| 207 | * (if the CPU has an implementation for that): | ||
| 208 | */ | ||
| 209 | x86_mce_decode_callback(m); | ||
| 197 | } | 210 | } |
| 198 | 211 | ||
| 199 | static void print_mce_head(void) | 212 | static void print_mce_head(void) |
| 200 | { | 213 | { |
| 201 | printk(KERN_EMERG "\nHARDWARE ERROR\n"); | 214 | pr_emerg("\nHARDWARE ERROR\n"); |
| 202 | } | 215 | } |
| 203 | 216 | ||
| 204 | static void print_mce_tail(void) | 217 | static void print_mce_tail(void) |
| 205 | { | 218 | { |
| 206 | printk(KERN_EMERG "This is not a software problem!\n" | 219 | pr_emerg("This is not a software problem!\n"); |
| 207 | "Run through mcelog --ascii to decode and contact your hardware vendor\n"); | ||
| 208 | } | 220 | } |
| 209 | 221 | ||
| 210 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | 222 | #define PANIC_TIMEOUT 5 /* 5 seconds */ |
| @@ -218,6 +230,7 @@ static atomic_t mce_fake_paniced; | |||
| 218 | static void wait_for_panic(void) | 230 | static void wait_for_panic(void) |
| 219 | { | 231 | { |
| 220 | long timeout = PANIC_TIMEOUT*USEC_PER_SEC; | 232 | long timeout = PANIC_TIMEOUT*USEC_PER_SEC; |
| 233 | |||
| 221 | preempt_disable(); | 234 | preempt_disable(); |
| 222 | local_irq_enable(); | 235 | local_irq_enable(); |
| 223 | while (timeout-- > 0) | 236 | while (timeout-- > 0) |
| @@ -285,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
| 285 | static int msr_to_offset(u32 msr) | 298 | static int msr_to_offset(u32 msr) |
| 286 | { | 299 | { |
| 287 | unsigned bank = __get_cpu_var(injectm.bank); | 300 | unsigned bank = __get_cpu_var(injectm.bank); |
| 301 | |||
| 288 | if (msr == rip_msr) | 302 | if (msr == rip_msr) |
| 289 | return offsetof(struct mce, ip); | 303 | return offsetof(struct mce, ip); |
| 290 | if (msr == MSR_IA32_MCx_STATUS(bank)) | 304 | if (msr == MSR_IA32_MCx_STATUS(bank)) |
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 7a473bbe8abd..8701cd7ce4e3 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile | |||
| @@ -18,7 +18,7 @@ edac_core-objs += edac_pci.o edac_pci_sysfs.o | |||
| 18 | endif | 18 | endif |
| 19 | 19 | ||
| 20 | ifdef CONFIG_CPU_SUP_AMD | 20 | ifdef CONFIG_CPU_SUP_AMD |
| 21 | edac_core-objs += edac_mce_amd.o | 21 | obj-$(CONFIG_X86_MCE) += edac_mce_amd.o |
| 22 | endif | 22 | endif |
| 23 | 23 | ||
| 24 | obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o | 24 | obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o |
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c index 0c21c370c9dd..83a01a1187d7 100644 --- a/drivers/edac/edac_mce_amd.c +++ b/drivers/edac/edac_mce_amd.c | |||
| @@ -362,7 +362,7 @@ static inline void amd_decode_err_code(unsigned int ec) | |||
| 362 | pr_warning("Huh? Unknown MCE error 0x%x\n", ec); | 362 | pr_warning("Huh? Unknown MCE error 0x%x\n", ec); |
| 363 | } | 363 | } |
| 364 | 364 | ||
| 365 | void decode_mce(struct mce *m) | 365 | static void amd_decode_mce(struct mce *m) |
| 366 | { | 366 | { |
| 367 | struct err_regs regs; | 367 | struct err_regs regs; |
| 368 | int node, ecc; | 368 | int node, ecc; |
| @@ -420,3 +420,16 @@ void decode_mce(struct mce *m) | |||
| 420 | 420 | ||
| 421 | amd_decode_err_code(m->status & 0xffff); | 421 | amd_decode_err_code(m->status & 0xffff); |
| 422 | } | 422 | } |
| 423 | |||
| 424 | static int __init mce_amd_init(void) | ||
| 425 | { | ||
| 426 | /* | ||
| 427 | * We can decode MCEs for Opteron and later CPUs: | ||
| 428 | */ | ||
| 429 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && | ||
| 430 | (boot_cpu_data.x86 >= 0xf)) | ||
| 431 | x86_mce_decode_callback = amd_decode_mce; | ||
| 432 | |||
| 433 | return 0; | ||
| 434 | } | ||
| 435 | early_initcall(mce_amd_init); | ||
