aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorAndi Kleen <andi@firstfloor.org>2009-05-27 15:56:56 -0400
committerH. Peter Anvin <hpa@zytor.com>2009-06-03 17:45:33 -0400
commit29b0f591d678838435fbb3e15ef20266f1a9e01d (patch)
treec477e019b6e6b4260a2713dc054304ff1f87777c /arch/x86/kernel
parent1b2797dcc9f0ad89bc382ace26c6baafbc7e33c2 (diff)
x86, mce: default to panic timeout for machine checks
Fatal machine checks can be logged to disk after boot, but only if the system did a warm reboot. That's unfortunately difficult with the default panic behaviour, which waits forever and the admin has to press the power button because modern systems usually miss a reset button. This clears the machine checks in the registers and make it impossible to log them. This patch changes the default for machine check panic to always reboot after 30s. Then the mce can be successfully logged after reboot. I believe this will improve machine check experience for any system running the X server. This is dependent on successfull boot logging of MCEs. This currently only works on Intel systems, on AMD there are quite a lot of systems around which leave junk in the machine check registers after boot, so it's disabled here. These systems will continue to default to endless waiting panic. v2: Only force panic timeout when it's shorter (H.Seto) v3: Only force timeout when there is no timeout (based on comment H.Seto) [ Fix changelog - HS ] Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c7
1 files changed, 7 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a7dc369a9974..79d243145b8f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -82,6 +82,7 @@ static unsigned long notify_user;
82static int rip_msr; 82static int rip_msr;
83static int mce_bootlog = -1; 83static int mce_bootlog = -1;
84static int monarch_timeout = -1; 84static int monarch_timeout = -1;
85static int mce_panic_timeout;
85 86
86static char trigger[128]; 87static char trigger[128];
87static char *trigger_argv[2] = { trigger, NULL }; 88static char *trigger_argv[2] = { trigger, NULL };
@@ -216,6 +217,8 @@ static void wait_for_panic(void)
216 local_irq_enable(); 217 local_irq_enable();
217 while (timeout-- > 0) 218 while (timeout-- > 0)
218 udelay(1); 219 udelay(1);
220 if (panic_timeout == 0)
221 panic_timeout = mce_panic_timeout;
219 panic("Panicing machine check CPU died"); 222 panic("Panicing machine check CPU died");
220} 223}
221 224
@@ -253,6 +256,8 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
253 printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); 256 printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
254 if (exp) 257 if (exp)
255 printk(KERN_EMERG "Machine check: %s\n", exp); 258 printk(KERN_EMERG "Machine check: %s\n", exp);
259 if (panic_timeout == 0)
260 panic_timeout = mce_panic_timeout;
256 panic(msg); 261 panic(msg);
257} 262}
258 263
@@ -1117,6 +1122,8 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
1117 } 1122 }
1118 if (monarch_timeout < 0) 1123 if (monarch_timeout < 0)
1119 monarch_timeout = 0; 1124 monarch_timeout = 0;
1125 if (mce_bootlog != 0)
1126 mce_panic_timeout = 30;
1120} 1127}
1121 1128
1122static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) 1129static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)