diff options
author | Wim Van Sebroeck <wim@iguana.be> | 2009-06-08 13:41:51 -0400 |
---|---|---|
committer | Wim Van Sebroeck <wim@iguana.be> | 2009-06-18 03:31:58 -0400 |
commit | 55e8ddecec6a9dbe35a99d03cc4189fd7c56e600 (patch) | |
tree | b0b13894e0054481601bc173f73facbc0c24fa23 /drivers/watchdog | |
parent | de8cd9a3067e25a860c225f794e6b249b73aa6b1 (diff) |
[WATCHDOG] iTCO_wdt: Fix ICH7+ reboot issue.
Bugzilla: 9868 & 10195.
There seems to be a bug into the SMM code that handles TCO Timeout SMI.
Andriy Gapon found that the code on his DG33TL system does the following:
> The handler is quite simple - it tests value in TCO1_CNT against 0x800, i.e.
> checks TCO_TMR_HLT. If the bit is set the handler goes into an infinite loop,
> apparently to allow the second timeout and reboot. Otherwise it simply clears
> TIMEOUT bit in TCO1_STS and that's it.
> So the logic seems to be reversed, because it is hard to see how TIMEOUT can
> get set to 1 and SMI generated when TCO_TMR_HLT is set (other than a
> transitional effect).
The only trick we have is to bypass the SMM code by turning of the generation
of the SMI#. The trick can only be enabled by setting the vendorsupport module
parameter to 911. This trick doesn't work well on laptop's.
Note: this is a dirty hack. Please handle with care. The only real fix is that
the bug in the SMM bios code get's fixed.
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
Diffstat (limited to 'drivers/watchdog')
-rw-r--r-- | drivers/watchdog/iTCO_vendor_support.c | 82 |
1 files changed, 76 insertions, 6 deletions
diff --git a/drivers/watchdog/iTCO_vendor_support.c b/drivers/watchdog/iTCO_vendor_support.c index 843ef626bc50..5133bca5ccbe 100644 --- a/drivers/watchdog/iTCO_vendor_support.c +++ b/drivers/watchdog/iTCO_vendor_support.c | |||
@@ -19,7 +19,7 @@ | |||
19 | 19 | ||
20 | /* Module and version information */ | 20 | /* Module and version information */ |
21 | #define DRV_NAME "iTCO_vendor_support" | 21 | #define DRV_NAME "iTCO_vendor_support" |
22 | #define DRV_VERSION "1.03" | 22 | #define DRV_VERSION "1.04" |
23 | #define PFX DRV_NAME ": " | 23 | #define PFX DRV_NAME ": " |
24 | 24 | ||
25 | /* Includes */ | 25 | /* Includes */ |
@@ -44,11 +44,14 @@ | |||
44 | #define SUPERMICRO_OLD_BOARD 1 | 44 | #define SUPERMICRO_OLD_BOARD 1 |
45 | /* SuperMicro Pentium 4 / Xeon 4 / EMT64T Era Systems */ | 45 | /* SuperMicro Pentium 4 / Xeon 4 / EMT64T Era Systems */ |
46 | #define SUPERMICRO_NEW_BOARD 2 | 46 | #define SUPERMICRO_NEW_BOARD 2 |
47 | /* Broken BIOS */ | ||
48 | #define BROKEN_BIOS 911 | ||
47 | 49 | ||
48 | static int vendorsupport; | 50 | static int vendorsupport; |
49 | module_param(vendorsupport, int, 0); | 51 | module_param(vendorsupport, int, 0); |
50 | MODULE_PARM_DESC(vendorsupport, "iTCO vendor specific support mode, default=" | 52 | MODULE_PARM_DESC(vendorsupport, "iTCO vendor specific support mode, default=" |
51 | "0 (none), 1=SuperMicro Pent3, 2=SuperMicro Pent4+"); | 53 | "0 (none), 1=SuperMicro Pent3, 2=SuperMicro Pent4+, " |
54 | "911=Broken SMI BIOS"); | ||
52 | 55 | ||
53 | /* | 56 | /* |
54 | * Vendor Specific Support | 57 | * Vendor Specific Support |
@@ -243,25 +246,92 @@ static void supermicro_new_pre_set_heartbeat(unsigned int heartbeat) | |||
243 | } | 246 | } |
244 | 247 | ||
245 | /* | 248 | /* |
249 | * Vendor Support: 911 | ||
250 | * Board: Some Intel ICHx based motherboards | ||
251 | * iTCO chipset: ICH7+ | ||
252 | * | ||
253 | * Some Intel motherboards have a broken BIOS implementation: i.e. | ||
254 | * the SMI handler clear's the TIMEOUT bit in the TC01_STS register | ||
255 | * and does not reload the time. Thus the TCO watchdog does not reboot | ||
256 | * the system. | ||
257 | * | ||
258 | * These are the conclusions of Andriy Gapon <avg@icyb.net.ua> after | ||
259 | * debugging: the SMI handler is quite simple - it tests value in | ||
260 | * TCO1_CNT against 0x800, i.e. checks TCO_TMR_HLT. If the bit is set | ||
261 | * the handler goes into an infinite loop, apparently to allow the | ||
262 | * second timeout and reboot. Otherwise it simply clears TIMEOUT bit | ||
263 | * in TCO1_STS and that's it. | ||
264 | * So the logic seems to be reversed, because it is hard to see how | ||
265 | * TIMEOUT can get set to 1 and SMI generated when TCO_TMR_HLT is set | ||
266 | * (other than a transitional effect). | ||
267 | * | ||
268 | * The only fix found to get the motherboard(s) to reboot is to put | ||
269 | * the glb_smi_en bit to 0. This is a dirty hack that bypasses the | ||
270 | * broken code by disabling Global SMI. | ||
271 | * | ||
272 | * WARNING: globally disabling SMI could possibly lead to dramatic | ||
273 | * problems, especially on laptops! I.e. various ACPI things where | ||
274 | * SMI is used for communication between OS and firmware. | ||
275 | * | ||
276 | * Don't use this fix if you don't need to!!! | ||
277 | */ | ||
278 | |||
279 | static void broken_bios_start(unsigned long acpibase) | ||
280 | { | ||
281 | unsigned long val32; | ||
282 | |||
283 | val32 = inl(SMI_EN); | ||
284 | /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# | ||
285 | Bit 0: GBL_SMI_EN -> 0 = No SMI# will be generated by ICH. */ | ||
286 | val32 &= 0xffffdffe; | ||
287 | outl(val32, SMI_EN); | ||
288 | } | ||
289 | |||
290 | static void broken_bios_stop(unsigned long acpibase) | ||
291 | { | ||
292 | unsigned long val32; | ||
293 | |||
294 | val32 = inl(SMI_EN); | ||
295 | /* Bit 13: TCO_EN -> 1 = Enables TCO logic generating an SMI# | ||
296 | Bit 0: GBL_SMI_EN -> 1 = Turn global SMI on again. */ | ||
297 | val32 |= 0x00002001; | ||
298 | outl(val32, SMI_EN); | ||
299 | } | ||
300 | |||
301 | /* | ||
246 | * Generic Support Functions | 302 | * Generic Support Functions |
247 | */ | 303 | */ |
248 | 304 | ||
249 | void iTCO_vendor_pre_start(unsigned long acpibase, | 305 | void iTCO_vendor_pre_start(unsigned long acpibase, |
250 | unsigned int heartbeat) | 306 | unsigned int heartbeat) |
251 | { | 307 | { |
252 | if (vendorsupport == SUPERMICRO_OLD_BOARD) | 308 | switch (vendorsupport) { |
309 | case SUPERMICRO_OLD_BOARD: | ||
253 | supermicro_old_pre_start(acpibase); | 310 | supermicro_old_pre_start(acpibase); |
254 | else if (vendorsupport == SUPERMICRO_NEW_BOARD) | 311 | break; |
312 | case SUPERMICRO_NEW_BOARD: | ||
255 | supermicro_new_pre_start(heartbeat); | 313 | supermicro_new_pre_start(heartbeat); |
314 | break; | ||
315 | case BROKEN_BIOS: | ||
316 | broken_bios_start(acpibase); | ||
317 | break; | ||
318 | } | ||
256 | } | 319 | } |
257 | EXPORT_SYMBOL(iTCO_vendor_pre_start); | 320 | EXPORT_SYMBOL(iTCO_vendor_pre_start); |
258 | 321 | ||
259 | void iTCO_vendor_pre_stop(unsigned long acpibase) | 322 | void iTCO_vendor_pre_stop(unsigned long acpibase) |
260 | { | 323 | { |
261 | if (vendorsupport == SUPERMICRO_OLD_BOARD) | 324 | switch (vendorsupport) { |
325 | case SUPERMICRO_OLD_BOARD: | ||
262 | supermicro_old_pre_stop(acpibase); | 326 | supermicro_old_pre_stop(acpibase); |
263 | else if (vendorsupport == SUPERMICRO_NEW_BOARD) | 327 | break; |
328 | case SUPERMICRO_NEW_BOARD: | ||
264 | supermicro_new_pre_stop(); | 329 | supermicro_new_pre_stop(); |
330 | break; | ||
331 | case BROKEN_BIOS: | ||
332 | broken_bios_stop(acpibase); | ||
333 | break; | ||
334 | } | ||
265 | } | 335 | } |
266 | EXPORT_SYMBOL(iTCO_vendor_pre_stop); | 336 | EXPORT_SYMBOL(iTCO_vendor_pre_stop); |
267 | 337 | ||