diff options
author | Corey Minyard <cminyard@mvista.com> | 2011-12-19 20:12:02 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-12-20 13:25:04 -0500 |
commit | b75d91f7ca513f0a4d0d1ad0942fb90cf5a2bc88 (patch) | |
tree | 3ed9062637e769ab8bd89748baf2b167244bbda8 /drivers/char | |
parent | ff05b6f7ae762b6eb464183eec994b28ea09f6dd (diff) |
ipmi_watchdog: restore settings when BMC reset
If the BMC gets reset, it will return 0x80 response errors.
In less than a week
# grep "Error 80 on cmd 22" /var/log/kernel |wc -l
378681
In this case, it is probably a good idea to restore the IPMI settings.
Signed-off-by: Corey Minyard <cminyard@mvista.com>
Tested-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com>
Reported-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/char')
-rw-r--r-- | drivers/char/ipmi/ipmi_watchdog.c | 41 |
1 files changed, 38 insertions, 3 deletions
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index c2917ffad2c2..34767a6d7f42 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c | |||
@@ -139,6 +139,8 @@ | |||
139 | #define IPMI_WDOG_SET_TIMER 0x24 | 139 | #define IPMI_WDOG_SET_TIMER 0x24 |
140 | #define IPMI_WDOG_GET_TIMER 0x25 | 140 | #define IPMI_WDOG_GET_TIMER 0x25 |
141 | 141 | ||
142 | #define IPMI_WDOG_TIMER_NOT_INIT_RESP 0x80 | ||
143 | |||
142 | /* These are here until the real ones get into the watchdog.h interface. */ | 144 | /* These are here until the real ones get into the watchdog.h interface. */ |
143 | #ifndef WDIOC_GETTIMEOUT | 145 | #ifndef WDIOC_GETTIMEOUT |
144 | #define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int) | 146 | #define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int) |
@@ -596,6 +598,7 @@ static int ipmi_heartbeat(void) | |||
596 | struct kernel_ipmi_msg msg; | 598 | struct kernel_ipmi_msg msg; |
597 | int rv; | 599 | int rv; |
598 | struct ipmi_system_interface_addr addr; | 600 | struct ipmi_system_interface_addr addr; |
601 | int timeout_retries = 0; | ||
599 | 602 | ||
600 | if (ipmi_ignore_heartbeat) | 603 | if (ipmi_ignore_heartbeat) |
601 | return 0; | 604 | return 0; |
@@ -616,6 +619,7 @@ static int ipmi_heartbeat(void) | |||
616 | 619 | ||
617 | mutex_lock(&heartbeat_lock); | 620 | mutex_lock(&heartbeat_lock); |
618 | 621 | ||
622 | restart: | ||
619 | atomic_set(&heartbeat_tofree, 2); | 623 | atomic_set(&heartbeat_tofree, 2); |
620 | 624 | ||
621 | /* | 625 | /* |
@@ -653,7 +657,33 @@ static int ipmi_heartbeat(void) | |||
653 | /* Wait for the heartbeat to be sent. */ | 657 | /* Wait for the heartbeat to be sent. */ |
654 | wait_for_completion(&heartbeat_wait); | 658 | wait_for_completion(&heartbeat_wait); |
655 | 659 | ||
656 | if (heartbeat_recv_msg.msg.data[0] != 0) { | 660 | if (heartbeat_recv_msg.msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP) { |
661 | timeout_retries++; | ||
662 | if (timeout_retries > 3) { | ||
663 | printk(KERN_ERR PFX ": Unable to restore the IPMI" | ||
664 | " watchdog's settings, giving up.\n"); | ||
665 | rv = -EIO; | ||
666 | goto out_unlock; | ||
667 | } | ||
668 | |||
669 | /* | ||
670 | * The timer was not initialized, that means the BMC was | ||
671 | * probably reset and lost the watchdog information. Attempt | ||
672 | * to restore the timer's info. Note that we still hold | ||
673 | * the heartbeat lock, to keep a heartbeat from happening | ||
674 | * in this process, so must say no heartbeat to avoid a | ||
675 | * deadlock on this mutex. | ||
676 | */ | ||
677 | rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); | ||
678 | if (rv) { | ||
679 | printk(KERN_ERR PFX ": Unable to send the command to" | ||
680 | " set the watchdog's settings, giving up.\n"); | ||
681 | goto out_unlock; | ||
682 | } | ||
683 | |||
684 | /* We might need a new heartbeat, so do it now */ | ||
685 | goto restart; | ||
686 | } else if (heartbeat_recv_msg.msg.data[0] != 0) { | ||
657 | /* | 687 | /* |
658 | * Got an error in the heartbeat response. It was already | 688 | * Got an error in the heartbeat response. It was already |
659 | * reported in ipmi_wdog_msg_handler, but we should return | 689 | * reported in ipmi_wdog_msg_handler, but we should return |
@@ -662,6 +692,7 @@ static int ipmi_heartbeat(void) | |||
662 | rv = -EINVAL; | 692 | rv = -EINVAL; |
663 | } | 693 | } |
664 | 694 | ||
695 | out_unlock: | ||
665 | mutex_unlock(&heartbeat_lock); | 696 | mutex_unlock(&heartbeat_lock); |
666 | 697 | ||
667 | return rv; | 698 | return rv; |
@@ -922,11 +953,15 @@ static struct miscdevice ipmi_wdog_miscdev = { | |||
922 | static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg, | 953 | static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg, |
923 | void *handler_data) | 954 | void *handler_data) |
924 | { | 955 | { |
925 | if (msg->msg.data[0] != 0) { | 956 | if (msg->msg.cmd == IPMI_WDOG_RESET_TIMER && |
957 | msg->msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP) | ||
958 | printk(KERN_INFO PFX "response: The IPMI controller appears" | ||
959 | " to have been reset, will attempt to reinitialize" | ||
960 | " the watchdog timer\n"); | ||
961 | else if (msg->msg.data[0] != 0) | ||
926 | printk(KERN_ERR PFX "response: Error %x on cmd %x\n", | 962 | printk(KERN_ERR PFX "response: Error %x on cmd %x\n", |
927 | msg->msg.data[0], | 963 | msg->msg.data[0], |
928 | msg->msg.cmd); | 964 | msg->msg.cmd); |
929 | } | ||
930 | 965 | ||
931 | ipmi_free_recv_msg(msg); | 966 | ipmi_free_recv_msg(msg); |
932 | } | 967 | } |