aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2006-04-07 13:49:57 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-04-09 14:53:52 -0400
commit553f265fe883a23502ee351845f09334790f18b8 (patch)
tree1fd60e72d7d5cbed34812eb6763899d2bd37c152
parentbe56db6186999a8571ae480cf2b929578f6dfd68 (diff)
[PATCH] x86_64: Don't run NMI watchdog during machine checks
Machine checks can stall the machine for a long time and it's not good to trigger the nmi watchdog during that. Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/x86_64/kernel/mce.c8
-rw-r--r--arch/x86_64/kernel/nmi.c7
-rw-r--r--include/asm-x86_64/mce.h7
3 files changed, 21 insertions, 1 deletions
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 10b3e348fc9..6f0790e8b6d 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -29,6 +29,8 @@
29#define MISC_MCELOG_MINOR 227 29#define MISC_MCELOG_MINOR 227
30#define NR_BANKS 6 30#define NR_BANKS 6
31 31
32atomic_t mce_entry;
33
32static int mce_dont_init; 34static int mce_dont_init;
33 35
34/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic, 36/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
@@ -172,10 +174,12 @@ void do_machine_check(struct pt_regs * regs, long error_code)
172 int i; 174 int i;
173 int panicm_found = 0; 175 int panicm_found = 0;
174 176
177 atomic_inc(&mce_entry);
178
175 if (regs) 179 if (regs)
176 notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); 180 notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
177 if (!banks) 181 if (!banks)
178 return; 182 goto out2;
179 183
180 memset(&m, 0, sizeof(struct mce)); 184 memset(&m, 0, sizeof(struct mce));
181 m.cpu = safe_smp_processor_id(); 185 m.cpu = safe_smp_processor_id();
@@ -266,6 +270,8 @@ void do_machine_check(struct pt_regs * regs, long error_code)
266 out: 270 out:
267 /* Last thing done in the machine check exception to clear state. */ 271 /* Last thing done in the machine check exception to clear state. */
268 wrmsrl(MSR_IA32_MCG_STATUS, 0); 272 wrmsrl(MSR_IA32_MCG_STATUS, 0);
273 out2:
274 atomic_dec(&mce_entry);
269} 275}
270 276
271/* 277/*
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index d9e4067faf0..4e6357fe0ec 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -34,6 +34,7 @@
34#include <asm/proto.h> 34#include <asm/proto.h>
35#include <asm/kdebug.h> 35#include <asm/kdebug.h>
36#include <asm/local.h> 36#include <asm/local.h>
37#include <asm/mce.h>
37 38
38/* 39/*
39 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: 40 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -480,6 +481,12 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
480 __get_cpu_var(nmi_touch) = 0; 481 __get_cpu_var(nmi_touch) = 0;
481 touched = 1; 482 touched = 1;
482 } 483 }
484#ifdef CONFIG_X86_MCE
485 /* Could check oops_in_progress here too, but it's safer
486 not too */
487 if (atomic_read(&mce_entry) > 0)
488 touched = 1;
489#endif
483 if (!touched && __get_cpu_var(last_irq_sum) == sum) { 490 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
484 /* 491 /*
485 * Ayiee, looks like this CPU is stuck ... 492 * Ayiee, looks like this CPU is stuck ...
diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h
index 5d298b799a9..7229785094e 100644
--- a/include/asm-x86_64/mce.h
+++ b/include/asm-x86_64/mce.h
@@ -70,6 +70,9 @@ struct mce_log {
70#define MCE_THRESHOLD_BASE MCE_EXTENDED_BANK + 1 /* MCE_AMD */ 70#define MCE_THRESHOLD_BASE MCE_EXTENDED_BANK + 1 /* MCE_AMD */
71#define MCE_THRESHOLD_DRAM_ECC MCE_THRESHOLD_BASE + 4 71#define MCE_THRESHOLD_DRAM_ECC MCE_THRESHOLD_BASE + 4
72 72
73#ifdef __KERNEL__
74#include <asm/atomic.h>
75
73void mce_log(struct mce *m); 76void mce_log(struct mce *m);
74#ifdef CONFIG_X86_MCE_INTEL 77#ifdef CONFIG_X86_MCE_INTEL
75void mce_intel_feature_init(struct cpuinfo_x86 *c); 78void mce_intel_feature_init(struct cpuinfo_x86 *c);
@@ -87,4 +90,8 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)
87} 90}
88#endif 91#endif
89 92
93extern atomic_t mce_entry;
94
95#endif
96
90#endif 97#endif