diff options
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck/mce.c')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 38 |
1 files changed, 25 insertions, 13 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 6773610061d..5031814ac94 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <asm/mce.h> | 36 | #include <asm/mce.h> |
37 | #include <asm/msr.h> | 37 | #include <asm/msr.h> |
38 | 38 | ||
39 | #include "mce-internal.h" | ||
39 | #include "mce.h" | 40 | #include "mce.h" |
40 | 41 | ||
41 | /* Handle unconfigured int18 (should never happen) */ | 42 | /* Handle unconfigured int18 (should never happen) */ |
@@ -191,7 +192,7 @@ static void print_mce(struct mce *m) | |||
191 | "and contact your hardware vendor\n"); | 192 | "and contact your hardware vendor\n"); |
192 | } | 193 | } |
193 | 194 | ||
194 | static void mce_panic(char *msg, struct mce *final) | 195 | static void mce_panic(char *msg, struct mce *final, char *exp) |
195 | { | 196 | { |
196 | int i; | 197 | int i; |
197 | 198 | ||
@@ -214,6 +215,8 @@ static void mce_panic(char *msg, struct mce *final) | |||
214 | } | 215 | } |
215 | if (final) | 216 | if (final) |
216 | print_mce(final); | 217 | print_mce(final); |
218 | if (exp) | ||
219 | printk(KERN_EMERG "Machine check: %s\n", exp); | ||
217 | panic(msg); | 220 | panic(msg); |
218 | } | 221 | } |
219 | 222 | ||
@@ -358,6 +361,22 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
358 | EXPORT_SYMBOL_GPL(machine_check_poll); | 361 | EXPORT_SYMBOL_GPL(machine_check_poll); |
359 | 362 | ||
360 | /* | 363 | /* |
364 | * Do a quick check if any of the events requires a panic. | ||
365 | * This decides if we keep the events around or clear them. | ||
366 | */ | ||
367 | static int mce_no_way_out(struct mce *m, char **msg) | ||
368 | { | ||
369 | int i; | ||
370 | |||
371 | for (i = 0; i < banks; i++) { | ||
372 | m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | ||
373 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) | ||
374 | return 1; | ||
375 | } | ||
376 | return 0; | ||
377 | } | ||
378 | |||
379 | /* | ||
361 | * The actual machine check handler. This only handles real | 380 | * The actual machine check handler. This only handles real |
362 | * exceptions when something got corrupted coming in through int 18. | 381 | * exceptions when something got corrupted coming in through int 18. |
363 | * | 382 | * |
@@ -381,6 +400,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
381 | */ | 400 | */ |
382 | int kill_it = 0; | 401 | int kill_it = 0; |
383 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | 402 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); |
403 | char *msg = "Unknown"; | ||
384 | 404 | ||
385 | atomic_inc(&mce_entry); | 405 | atomic_inc(&mce_entry); |
386 | 406 | ||
@@ -395,10 +415,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
395 | mce_setup(&m); | 415 | mce_setup(&m); |
396 | 416 | ||
397 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | 417 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); |
398 | 418 | no_way_out = mce_no_way_out(&m, &msg); | |
399 | /* if the restart IP is not valid, we're done for */ | ||
400 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) | ||
401 | no_way_out = 1; | ||
402 | 419 | ||
403 | barrier(); | 420 | barrier(); |
404 | 421 | ||
@@ -430,18 +447,13 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
430 | __set_bit(i, toclear); | 447 | __set_bit(i, toclear); |
431 | 448 | ||
432 | if (m.status & MCI_STATUS_EN) { | 449 | if (m.status & MCI_STATUS_EN) { |
433 | /* if PCC was set, there's no way out */ | ||
434 | no_way_out |= !!(m.status & MCI_STATUS_PCC); | ||
435 | /* | 450 | /* |
436 | * If this error was uncorrectable and there was | 451 | * If this error was uncorrectable and there was |
437 | * an overflow, we're in trouble. If no overflow, | 452 | * an overflow, we're in trouble. If no overflow, |
438 | * we might get away with just killing a task. | 453 | * we might get away with just killing a task. |
439 | */ | 454 | */ |
440 | if (m.status & MCI_STATUS_UC) { | 455 | if (m.status & MCI_STATUS_UC) |
441 | if (tolerant < 1 || m.status & MCI_STATUS_OVER) | ||
442 | no_way_out = 1; | ||
443 | kill_it = 1; | 456 | kill_it = 1; |
444 | } | ||
445 | } else { | 457 | } else { |
446 | /* | 458 | /* |
447 | * Machine check event was not enabled. Clear, but | 459 | * Machine check event was not enabled. Clear, but |
@@ -483,7 +495,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
483 | * has not set tolerant to an insane level, give up and die. | 495 | * has not set tolerant to an insane level, give up and die. |
484 | */ | 496 | */ |
485 | if (no_way_out && tolerant < 3) | 497 | if (no_way_out && tolerant < 3) |
486 | mce_panic("Machine check", &panicm); | 498 | mce_panic("Machine check", &panicm, msg); |
487 | 499 | ||
488 | /* | 500 | /* |
489 | * If the error seems to be unrecoverable, something should be | 501 | * If the error seems to be unrecoverable, something should be |
@@ -511,7 +523,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
511 | if (user_space) { | 523 | if (user_space) { |
512 | force_sig(SIGBUS, current); | 524 | force_sig(SIGBUS, current); |
513 | } else if (panic_on_oops || tolerant < 2) { | 525 | } else if (panic_on_oops || tolerant < 2) { |
514 | mce_panic("Uncorrected machine check", &panicm); | 526 | mce_panic("Uncorrected machine check", &panicm, msg); |
515 | } | 527 | } |
516 | } | 528 | } |
517 | 529 | ||