diff options
Diffstat (limited to 'drivers/char')
-rw-r--r-- | drivers/char/ipmi/ipmi_watchdog.c | 136 |
1 files changed, 94 insertions, 42 deletions
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 6b634e8d9519..9e9c5de2e549 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c | |||
@@ -49,9 +49,19 @@ | |||
49 | #include <linux/poll.h> | 49 | #include <linux/poll.h> |
50 | #include <linux/string.h> | 50 | #include <linux/string.h> |
51 | #include <linux/ctype.h> | 51 | #include <linux/ctype.h> |
52 | #include <linux/delay.h> | ||
52 | #include <asm/atomic.h> | 53 | #include <asm/atomic.h> |
53 | #ifdef CONFIG_X86_LOCAL_APIC | 54 | |
54 | #include <asm/apic.h> | 55 | #ifdef CONFIG_X86 |
56 | /* This is ugly, but I've determined that x86 is the only architecture | ||
57 | that can reasonably support the IPMI NMI watchdog timeout at this | ||
58 | time. If another architecture adds this capability somehow, it | ||
59 | will have to be a somewhat different mechanism and I have no idea | ||
60 | how it will work. So in the unlikely event that another | ||
61 | architecture supports this, we can figure out a good generic | ||
62 | mechanism for it at that time. */ | ||
63 | #include <asm/kdebug.h> | ||
64 | #define HAVE_DIE_NMI_POST | ||
55 | #endif | 65 | #endif |
56 | 66 | ||
57 | #define PFX "IPMI Watchdog: " | 67 | #define PFX "IPMI Watchdog: " |
@@ -317,6 +327,11 @@ static unsigned char ipmi_version_minor; | |||
317 | /* If a pretimeout occurs, this is used to allow only one panic to happen. */ | 327 | /* If a pretimeout occurs, this is used to allow only one panic to happen. */ |
318 | static atomic_t preop_panic_excl = ATOMIC_INIT(-1); | 328 | static atomic_t preop_panic_excl = ATOMIC_INIT(-1); |
319 | 329 | ||
330 | #ifdef HAVE_DIE_NMI_POST | ||
331 | static int testing_nmi; | ||
332 | static int nmi_handler_registered; | ||
333 | #endif | ||
334 | |||
320 | static int ipmi_heartbeat(void); | 335 | static int ipmi_heartbeat(void); |
321 | static void panic_halt_ipmi_heartbeat(void); | 336 | static void panic_halt_ipmi_heartbeat(void); |
322 | 337 | ||
@@ -358,6 +373,10 @@ static int i_ipmi_set_timeout(struct ipmi_smi_msg *smi_msg, | |||
358 | int hbnow = 0; | 373 | int hbnow = 0; |
359 | 374 | ||
360 | 375 | ||
376 | /* These can be cleared as we are setting the timeout. */ | ||
377 | ipmi_start_timer_on_heartbeat = 0; | ||
378 | pretimeout_since_last_heartbeat = 0; | ||
379 | |||
361 | data[0] = 0; | 380 | data[0] = 0; |
362 | WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS); | 381 | WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS); |
363 | 382 | ||
@@ -432,13 +451,12 @@ static int ipmi_set_timeout(int do_heartbeat) | |||
432 | 451 | ||
433 | wait_for_completion(&set_timeout_wait); | 452 | wait_for_completion(&set_timeout_wait); |
434 | 453 | ||
454 | mutex_unlock(&set_timeout_lock); | ||
455 | |||
435 | if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB) | 456 | if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB) |
436 | || ((send_heartbeat_now) | 457 | || ((send_heartbeat_now) |
437 | && (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY))) | 458 | && (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY))) |
438 | { | ||
439 | rv = ipmi_heartbeat(); | 459 | rv = ipmi_heartbeat(); |
440 | } | ||
441 | mutex_unlock(&set_timeout_lock); | ||
442 | 460 | ||
443 | out: | 461 | out: |
444 | return rv; | 462 | return rv; |
@@ -518,12 +536,10 @@ static int ipmi_heartbeat(void) | |||
518 | int rv; | 536 | int rv; |
519 | struct ipmi_system_interface_addr addr; | 537 | struct ipmi_system_interface_addr addr; |
520 | 538 | ||
521 | if (ipmi_ignore_heartbeat) { | 539 | if (ipmi_ignore_heartbeat) |
522 | return 0; | 540 | return 0; |
523 | } | ||
524 | 541 | ||
525 | if (ipmi_start_timer_on_heartbeat) { | 542 | if (ipmi_start_timer_on_heartbeat) { |
526 | ipmi_start_timer_on_heartbeat = 0; | ||
527 | ipmi_watchdog_state = action_val; | 543 | ipmi_watchdog_state = action_val; |
528 | return ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); | 544 | return ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); |
529 | } else if (pretimeout_since_last_heartbeat) { | 545 | } else if (pretimeout_since_last_heartbeat) { |
@@ -531,7 +547,6 @@ static int ipmi_heartbeat(void) | |||
531 | We don't want to set the action, though, we want to | 547 | We don't want to set the action, though, we want to |
532 | leave that alone (thus it can't be combined with the | 548 | leave that alone (thus it can't be combined with the |
533 | above operation. */ | 549 | above operation. */ |
534 | pretimeout_since_last_heartbeat = 0; | ||
535 | return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY); | 550 | return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY); |
536 | } | 551 | } |
537 | 552 | ||
@@ -919,6 +934,45 @@ static void ipmi_register_watchdog(int ipmi_intf) | |||
919 | printk(KERN_CRIT PFX "Unable to register misc device\n"); | 934 | printk(KERN_CRIT PFX "Unable to register misc device\n"); |
920 | } | 935 | } |
921 | 936 | ||
937 | #ifdef HAVE_DIE_NMI_POST | ||
938 | if (nmi_handler_registered) { | ||
939 | int old_pretimeout = pretimeout; | ||
940 | int old_timeout = timeout; | ||
941 | int old_preop_val = preop_val; | ||
942 | |||
943 | /* Set the pretimeout to go off in a second and give | ||
944 | ourselves plenty of time to stop the timer. */ | ||
945 | ipmi_watchdog_state = WDOG_TIMEOUT_RESET; | ||
946 | preop_val = WDOG_PREOP_NONE; /* Make sure nothing happens */ | ||
947 | pretimeout = 99; | ||
948 | timeout = 100; | ||
949 | |||
950 | testing_nmi = 1; | ||
951 | |||
952 | rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); | ||
953 | if (rv) { | ||
954 | printk(KERN_WARNING PFX "Error starting timer to" | ||
955 | " test NMI: 0x%x. The NMI pretimeout will" | ||
956 | " likely not work\n", rv); | ||
957 | rv = 0; | ||
958 | goto out_restore; | ||
959 | } | ||
960 | |||
961 | msleep(1500); | ||
962 | |||
963 | if (testing_nmi != 2) { | ||
964 | printk(KERN_WARNING PFX "IPMI NMI didn't seem to" | ||
965 | " occur. The NMI pretimeout will" | ||
966 | " likely not work\n"); | ||
967 | } | ||
968 | out_restore: | ||
969 | testing_nmi = 0; | ||
970 | preop_val = old_preop_val; | ||
971 | pretimeout = old_pretimeout; | ||
972 | timeout = old_timeout; | ||
973 | } | ||
974 | #endif | ||
975 | |||
922 | out: | 976 | out: |
923 | up_write(®ister_sem); | 977 | up_write(®ister_sem); |
924 | 978 | ||
@@ -928,6 +982,10 @@ static void ipmi_register_watchdog(int ipmi_intf) | |||
928 | ipmi_watchdog_state = action_val; | 982 | ipmi_watchdog_state = action_val; |
929 | ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); | 983 | ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); |
930 | printk(KERN_INFO PFX "Starting now!\n"); | 984 | printk(KERN_INFO PFX "Starting now!\n"); |
985 | } else { | ||
986 | /* Stop the timer now. */ | ||
987 | ipmi_watchdog_state = WDOG_TIMEOUT_NONE; | ||
988 | ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); | ||
931 | } | 989 | } |
932 | } | 990 | } |
933 | 991 | ||
@@ -964,17 +1022,28 @@ static void ipmi_unregister_watchdog(int ipmi_intf) | |||
964 | up_write(®ister_sem); | 1022 | up_write(®ister_sem); |
965 | } | 1023 | } |
966 | 1024 | ||
967 | #ifdef HAVE_NMI_HANDLER | 1025 | #ifdef HAVE_DIE_NMI_POST |
968 | static int | 1026 | static int |
969 | ipmi_nmi(void *dev_id, int cpu, int handled) | 1027 | ipmi_nmi(struct notifier_block *self, unsigned long val, void *data) |
970 | { | 1028 | { |
1029 | if (val != DIE_NMI_POST) | ||
1030 | return NOTIFY_OK; | ||
1031 | |||
1032 | if (testing_nmi) { | ||
1033 | testing_nmi = 2; | ||
1034 | return NOTIFY_STOP; | ||
1035 | } | ||
1036 | |||
971 | /* If we are not expecting a timeout, ignore it. */ | 1037 | /* If we are not expecting a timeout, ignore it. */ |
972 | if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) | 1038 | if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) |
973 | return NOTIFY_DONE; | 1039 | return NOTIFY_OK; |
1040 | |||
1041 | if (preaction_val != WDOG_PRETIMEOUT_NMI) | ||
1042 | return NOTIFY_OK; | ||
974 | 1043 | ||
975 | /* If no one else handled the NMI, we assume it was the IPMI | 1044 | /* If no one else handled the NMI, we assume it was the IPMI |
976 | watchdog. */ | 1045 | watchdog. */ |
977 | if ((!handled) && (preop_val == WDOG_PREOP_PANIC)) { | 1046 | if (preop_val == WDOG_PREOP_PANIC) { |
978 | /* On some machines, the heartbeat will give | 1047 | /* On some machines, the heartbeat will give |
979 | an error and not work unless we re-enable | 1048 | an error and not work unless we re-enable |
980 | the timer. So do so. */ | 1049 | the timer. So do so. */ |
@@ -983,18 +1052,12 @@ ipmi_nmi(void *dev_id, int cpu, int handled) | |||
983 | panic(PFX "pre-timeout"); | 1052 | panic(PFX "pre-timeout"); |
984 | } | 1053 | } |
985 | 1054 | ||
986 | return NOTIFY_DONE; | 1055 | return NOTIFY_STOP; |
987 | } | 1056 | } |
988 | 1057 | ||
989 | static struct nmi_handler ipmi_nmi_handler = | 1058 | static struct notifier_block ipmi_nmi_handler = { |
990 | { | 1059 | .notifier_call = ipmi_nmi |
991 | .link = LIST_HEAD_INIT(ipmi_nmi_handler.link), | ||
992 | .dev_name = "ipmi_watchdog", | ||
993 | .dev_id = NULL, | ||
994 | .handler = ipmi_nmi, | ||
995 | .priority = 0, /* Call us last. */ | ||
996 | }; | 1060 | }; |
997 | int nmi_handler_registered; | ||
998 | #endif | 1061 | #endif |
999 | 1062 | ||
1000 | static int wdog_reboot_handler(struct notifier_block *this, | 1063 | static int wdog_reboot_handler(struct notifier_block *this, |
@@ -1111,7 +1174,7 @@ static int preaction_op(const char *inval, char *outval) | |||
1111 | preaction_val = WDOG_PRETIMEOUT_NONE; | 1174 | preaction_val = WDOG_PRETIMEOUT_NONE; |
1112 | else if (strcmp(inval, "pre_smi") == 0) | 1175 | else if (strcmp(inval, "pre_smi") == 0) |
1113 | preaction_val = WDOG_PRETIMEOUT_SMI; | 1176 | preaction_val = WDOG_PRETIMEOUT_SMI; |
1114 | #ifdef HAVE_NMI_HANDLER | 1177 | #ifdef HAVE_DIE_NMI_POST |
1115 | else if (strcmp(inval, "pre_nmi") == 0) | 1178 | else if (strcmp(inval, "pre_nmi") == 0) |
1116 | preaction_val = WDOG_PRETIMEOUT_NMI; | 1179 | preaction_val = WDOG_PRETIMEOUT_NMI; |
1117 | #endif | 1180 | #endif |
@@ -1145,7 +1208,7 @@ static int preop_op(const char *inval, char *outval) | |||
1145 | 1208 | ||
1146 | static void check_parms(void) | 1209 | static void check_parms(void) |
1147 | { | 1210 | { |
1148 | #ifdef HAVE_NMI_HANDLER | 1211 | #ifdef HAVE_DIE_NMI_POST |
1149 | int do_nmi = 0; | 1212 | int do_nmi = 0; |
1150 | int rv; | 1213 | int rv; |
1151 | 1214 | ||
@@ -1158,20 +1221,9 @@ static void check_parms(void) | |||
1158 | preop_op("preop_none", NULL); | 1221 | preop_op("preop_none", NULL); |
1159 | do_nmi = 0; | 1222 | do_nmi = 0; |
1160 | } | 1223 | } |
1161 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1162 | if (nmi_watchdog == NMI_IO_APIC) { | ||
1163 | printk(KERN_WARNING PFX "nmi_watchdog is set to IO APIC" | ||
1164 | " mode (value is %d), that is incompatible" | ||
1165 | " with using NMI in the IPMI watchdog." | ||
1166 | " Disabling IPMI nmi pretimeout.\n", | ||
1167 | nmi_watchdog); | ||
1168 | preaction_val = WDOG_PRETIMEOUT_NONE; | ||
1169 | do_nmi = 0; | ||
1170 | } | ||
1171 | #endif | ||
1172 | } | 1224 | } |
1173 | if (do_nmi && !nmi_handler_registered) { | 1225 | if (do_nmi && !nmi_handler_registered) { |
1174 | rv = request_nmi(&ipmi_nmi_handler); | 1226 | rv = register_die_notifier(&ipmi_nmi_handler); |
1175 | if (rv) { | 1227 | if (rv) { |
1176 | printk(KERN_WARNING PFX | 1228 | printk(KERN_WARNING PFX |
1177 | "Can't register nmi handler\n"); | 1229 | "Can't register nmi handler\n"); |
@@ -1179,7 +1231,7 @@ static void check_parms(void) | |||
1179 | } else | 1231 | } else |
1180 | nmi_handler_registered = 1; | 1232 | nmi_handler_registered = 1; |
1181 | } else if (!do_nmi && nmi_handler_registered) { | 1233 | } else if (!do_nmi && nmi_handler_registered) { |
1182 | release_nmi(&ipmi_nmi_handler); | 1234 | unregister_die_notifier(&ipmi_nmi_handler); |
1183 | nmi_handler_registered = 0; | 1235 | nmi_handler_registered = 0; |
1184 | } | 1236 | } |
1185 | #endif | 1237 | #endif |
@@ -1215,9 +1267,9 @@ static int __init ipmi_wdog_init(void) | |||
1215 | 1267 | ||
1216 | rv = ipmi_smi_watcher_register(&smi_watcher); | 1268 | rv = ipmi_smi_watcher_register(&smi_watcher); |
1217 | if (rv) { | 1269 | if (rv) { |
1218 | #ifdef HAVE_NMI_HANDLER | 1270 | #ifdef HAVE_DIE_NMI_POST |
1219 | if (preaction_val == WDOG_PRETIMEOUT_NMI) | 1271 | if (nmi_handler_registered) |
1220 | release_nmi(&ipmi_nmi_handler); | 1272 | unregister_die_notifier(&ipmi_nmi_handler); |
1221 | #endif | 1273 | #endif |
1222 | atomic_notifier_chain_unregister(&panic_notifier_list, | 1274 | atomic_notifier_chain_unregister(&panic_notifier_list, |
1223 | &wdog_panic_notifier); | 1275 | &wdog_panic_notifier); |
@@ -1236,9 +1288,9 @@ static void __exit ipmi_wdog_exit(void) | |||
1236 | ipmi_smi_watcher_unregister(&smi_watcher); | 1288 | ipmi_smi_watcher_unregister(&smi_watcher); |
1237 | ipmi_unregister_watchdog(watchdog_ifnum); | 1289 | ipmi_unregister_watchdog(watchdog_ifnum); |
1238 | 1290 | ||
1239 | #ifdef HAVE_NMI_HANDLER | 1291 | #ifdef HAVE_DIE_NMI_POST |
1240 | if (nmi_handler_registered) | 1292 | if (nmi_handler_registered) |
1241 | release_nmi(&ipmi_nmi_handler); | 1293 | unregister_die_notifier(&ipmi_nmi_handler); |
1242 | #endif | 1294 | #endif |
1243 | 1295 | ||
1244 | atomic_notifier_chain_unregister(&panic_notifier_list, | 1296 | atomic_notifier_chain_unregister(&panic_notifier_list, |