aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mcheck/mce.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck/mce.c')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c38
1 files changed, 25 insertions, 13 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 6773610061d..5031814ac94 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -36,6 +36,7 @@
36#include <asm/mce.h> 36#include <asm/mce.h>
37#include <asm/msr.h> 37#include <asm/msr.h>
38 38
39#include "mce-internal.h"
39#include "mce.h" 40#include "mce.h"
40 41
41/* Handle unconfigured int18 (should never happen) */ 42/* Handle unconfigured int18 (should never happen) */
@@ -191,7 +192,7 @@ static void print_mce(struct mce *m)
191 "and contact your hardware vendor\n"); 192 "and contact your hardware vendor\n");
192} 193}
193 194
194static void mce_panic(char *msg, struct mce *final) 195static void mce_panic(char *msg, struct mce *final, char *exp)
195{ 196{
196 int i; 197 int i;
197 198
@@ -214,6 +215,8 @@ static void mce_panic(char *msg, struct mce *final)
214 } 215 }
215 if (final) 216 if (final)
216 print_mce(final); 217 print_mce(final);
218 if (exp)
219 printk(KERN_EMERG "Machine check: %s\n", exp);
217 panic(msg); 220 panic(msg);
218} 221}
219 222
@@ -358,6 +361,22 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
358EXPORT_SYMBOL_GPL(machine_check_poll); 361EXPORT_SYMBOL_GPL(machine_check_poll);
359 362
360/* 363/*
364 * Do a quick check if any of the events requires a panic.
365 * This decides if we keep the events around or clear them.
366 */
367static int mce_no_way_out(struct mce *m, char **msg)
368{
369 int i;
370
371 for (i = 0; i < banks; i++) {
372 m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
373 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
374 return 1;
375 }
376 return 0;
377}
378
379/*
361 * The actual machine check handler. This only handles real 380 * The actual machine check handler. This only handles real
362 * exceptions when something got corrupted coming in through int 18. 381 * exceptions when something got corrupted coming in through int 18.
363 * 382 *
@@ -381,6 +400,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
381 */ 400 */
382 int kill_it = 0; 401 int kill_it = 0;
383 DECLARE_BITMAP(toclear, MAX_NR_BANKS); 402 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
403 char *msg = "Unknown";
384 404
385 atomic_inc(&mce_entry); 405 atomic_inc(&mce_entry);
386 406
@@ -395,10 +415,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
395 mce_setup(&m); 415 mce_setup(&m);
396 416
397 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 417 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
398 418 no_way_out = mce_no_way_out(&m, &msg);
399 /* if the restart IP is not valid, we're done for */
400 if (!(m.mcgstatus & MCG_STATUS_RIPV))
401 no_way_out = 1;
402 419
403 barrier(); 420 barrier();
404 421
@@ -430,18 +447,13 @@ void do_machine_check(struct pt_regs *regs, long error_code)
430 __set_bit(i, toclear); 447 __set_bit(i, toclear);
431 448
432 if (m.status & MCI_STATUS_EN) { 449 if (m.status & MCI_STATUS_EN) {
433 /* if PCC was set, there's no way out */
434 no_way_out |= !!(m.status & MCI_STATUS_PCC);
435 /* 450 /*
436 * If this error was uncorrectable and there was 451 * If this error was uncorrectable and there was
437 * an overflow, we're in trouble. If no overflow, 452 * an overflow, we're in trouble. If no overflow,
438 * we might get away with just killing a task. 453 * we might get away with just killing a task.
439 */ 454 */
440 if (m.status & MCI_STATUS_UC) { 455 if (m.status & MCI_STATUS_UC)
441 if (tolerant < 1 || m.status & MCI_STATUS_OVER)
442 no_way_out = 1;
443 kill_it = 1; 456 kill_it = 1;
444 }
445 } else { 457 } else {
446 /* 458 /*
447 * Machine check event was not enabled. Clear, but 459 * Machine check event was not enabled. Clear, but
@@ -483,7 +495,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
483 * has not set tolerant to an insane level, give up and die. 495 * has not set tolerant to an insane level, give up and die.
484 */ 496 */
485 if (no_way_out && tolerant < 3) 497 if (no_way_out && tolerant < 3)
486 mce_panic("Machine check", &panicm); 498 mce_panic("Machine check", &panicm, msg);
487 499
488 /* 500 /*
489 * If the error seems to be unrecoverable, something should be 501 * If the error seems to be unrecoverable, something should be
@@ -511,7 +523,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
511 if (user_space) { 523 if (user_space) {
512 force_sig(SIGBUS, current); 524 force_sig(SIGBUS, current);
513 } else if (panic_on_oops || tolerant < 2) { 525 } else if (panic_on_oops || tolerant < 2) {
514 mce_panic("Uncorrected machine check", &panicm); 526 mce_panic("Uncorrected machine check", &panicm, msg);
515 } 527 }
516 } 528 }
517 529