aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWim Van Sebroeck <wim@iguana.be>2009-06-08 13:41:51 -0400
committerWim Van Sebroeck <wim@iguana.be>2009-06-18 03:31:58 -0400
commit55e8ddecec6a9dbe35a99d03cc4189fd7c56e600 (patch)
treeb0b13894e0054481601bc173f73facbc0c24fa23
parentde8cd9a3067e25a860c225f794e6b249b73aa6b1 (diff)
[WATCHDOG] iTCO_wdt: Fix ICH7+ reboot issue.
Bugzilla: 9868 & 10195. There seems to be a bug into the SMM code that handles TCO Timeout SMI. Andriy Gapon found that the code on his DG33TL system does the following: > The handler is quite simple - it tests value in TCO1_CNT against 0x800, i.e. > checks TCO_TMR_HLT. If the bit is set the handler goes into an infinite loop, > apparently to allow the second timeout and reboot. Otherwise it simply clears > TIMEOUT bit in TCO1_STS and that's it. > So the logic seems to be reversed, because it is hard to see how TIMEOUT can > get set to 1 and SMI generated when TCO_TMR_HLT is set (other than a > transitional effect). The only trick we have is to bypass the SMM code by turning of the generation of the SMI#. The trick can only be enabled by setting the vendorsupport module parameter to 911. This trick doesn't work well on laptop's. Note: this is a dirty hack. Please handle with care. The only real fix is that the bug in the SMM bios code get's fixed. Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
-rw-r--r--drivers/watchdog/iTCO_vendor_support.c82
1 files changed, 76 insertions, 6 deletions
diff --git a/drivers/watchdog/iTCO_vendor_support.c b/drivers/watchdog/iTCO_vendor_support.c
index 843ef626bc50..5133bca5ccbe 100644
--- a/drivers/watchdog/iTCO_vendor_support.c
+++ b/drivers/watchdog/iTCO_vendor_support.c
@@ -19,7 +19,7 @@
19 19
20/* Module and version information */ 20/* Module and version information */
21#define DRV_NAME "iTCO_vendor_support" 21#define DRV_NAME "iTCO_vendor_support"
22#define DRV_VERSION "1.03" 22#define DRV_VERSION "1.04"
23#define PFX DRV_NAME ": " 23#define PFX DRV_NAME ": "
24 24
25/* Includes */ 25/* Includes */
@@ -44,11 +44,14 @@
44#define SUPERMICRO_OLD_BOARD 1 44#define SUPERMICRO_OLD_BOARD 1
45/* SuperMicro Pentium 4 / Xeon 4 / EMT64T Era Systems */ 45/* SuperMicro Pentium 4 / Xeon 4 / EMT64T Era Systems */
46#define SUPERMICRO_NEW_BOARD 2 46#define SUPERMICRO_NEW_BOARD 2
47/* Broken BIOS */
48#define BROKEN_BIOS 911
47 49
48static int vendorsupport; 50static int vendorsupport;
49module_param(vendorsupport, int, 0); 51module_param(vendorsupport, int, 0);
50MODULE_PARM_DESC(vendorsupport, "iTCO vendor specific support mode, default=" 52MODULE_PARM_DESC(vendorsupport, "iTCO vendor specific support mode, default="
51 "0 (none), 1=SuperMicro Pent3, 2=SuperMicro Pent4+"); 53 "0 (none), 1=SuperMicro Pent3, 2=SuperMicro Pent4+, "
54 "911=Broken SMI BIOS");
52 55
53/* 56/*
54 * Vendor Specific Support 57 * Vendor Specific Support
@@ -243,25 +246,92 @@ static void supermicro_new_pre_set_heartbeat(unsigned int heartbeat)
243} 246}
244 247
245/* 248/*
249 * Vendor Support: 911
250 * Board: Some Intel ICHx based motherboards
251 * iTCO chipset: ICH7+
252 *
253 * Some Intel motherboards have a broken BIOS implementation: i.e.
254 * the SMI handler clear's the TIMEOUT bit in the TC01_STS register
255 * and does not reload the time. Thus the TCO watchdog does not reboot
256 * the system.
257 *
258 * These are the conclusions of Andriy Gapon <avg@icyb.net.ua> after
259 * debugging: the SMI handler is quite simple - it tests value in
260 * TCO1_CNT against 0x800, i.e. checks TCO_TMR_HLT. If the bit is set
261 * the handler goes into an infinite loop, apparently to allow the
262 * second timeout and reboot. Otherwise it simply clears TIMEOUT bit
263 * in TCO1_STS and that's it.
264 * So the logic seems to be reversed, because it is hard to see how
265 * TIMEOUT can get set to 1 and SMI generated when TCO_TMR_HLT is set
266 * (other than a transitional effect).
267 *
268 * The only fix found to get the motherboard(s) to reboot is to put
269 * the glb_smi_en bit to 0. This is a dirty hack that bypasses the
270 * broken code by disabling Global SMI.
271 *
272 * WARNING: globally disabling SMI could possibly lead to dramatic
273 * problems, especially on laptops! I.e. various ACPI things where
274 * SMI is used for communication between OS and firmware.
275 *
276 * Don't use this fix if you don't need to!!!
277 */
278
279static void broken_bios_start(unsigned long acpibase)
280{
281 unsigned long val32;
282
283 val32 = inl(SMI_EN);
284 /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI#
285 Bit 0: GBL_SMI_EN -> 0 = No SMI# will be generated by ICH. */
286 val32 &= 0xffffdffe;
287 outl(val32, SMI_EN);
288}
289
290static void broken_bios_stop(unsigned long acpibase)
291{
292 unsigned long val32;
293
294 val32 = inl(SMI_EN);
295 /* Bit 13: TCO_EN -> 1 = Enables TCO logic generating an SMI#
296 Bit 0: GBL_SMI_EN -> 1 = Turn global SMI on again. */
297 val32 |= 0x00002001;
298 outl(val32, SMI_EN);
299}
300
301/*
246 * Generic Support Functions 302 * Generic Support Functions
247 */ 303 */
248 304
249void iTCO_vendor_pre_start(unsigned long acpibase, 305void iTCO_vendor_pre_start(unsigned long acpibase,
250 unsigned int heartbeat) 306 unsigned int heartbeat)
251{ 307{
252 if (vendorsupport == SUPERMICRO_OLD_BOARD) 308 switch (vendorsupport) {
309 case SUPERMICRO_OLD_BOARD:
253 supermicro_old_pre_start(acpibase); 310 supermicro_old_pre_start(acpibase);
254 else if (vendorsupport == SUPERMICRO_NEW_BOARD) 311 break;
312 case SUPERMICRO_NEW_BOARD:
255 supermicro_new_pre_start(heartbeat); 313 supermicro_new_pre_start(heartbeat);
314 break;
315 case BROKEN_BIOS:
316 broken_bios_start(acpibase);
317 break;
318 }
256} 319}
257EXPORT_SYMBOL(iTCO_vendor_pre_start); 320EXPORT_SYMBOL(iTCO_vendor_pre_start);
258 321
259void iTCO_vendor_pre_stop(unsigned long acpibase) 322void iTCO_vendor_pre_stop(unsigned long acpibase)
260{ 323{
261 if (vendorsupport == SUPERMICRO_OLD_BOARD) 324 switch (vendorsupport) {
325 case SUPERMICRO_OLD_BOARD:
262 supermicro_old_pre_stop(acpibase); 326 supermicro_old_pre_stop(acpibase);
263 else if (vendorsupport == SUPERMICRO_NEW_BOARD) 327 break;
328 case SUPERMICRO_NEW_BOARD:
264 supermicro_new_pre_stop(); 329 supermicro_new_pre_stop();
330 break;
331 case BROKEN_BIOS:
332 broken_bios_stop(acpibase);
333 break;
334 }
265} 335}
266EXPORT_SYMBOL(iTCO_vendor_pre_stop); 336EXPORT_SYMBOL(iTCO_vendor_pre_stop);
267 337