aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/char/ipmi/ipmi_watchdog.c
diff options
context:
space:
mode:
authorCorey Minyard <cminyard@mvista.com>2011-12-19 20:12:02 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-12-20 13:25:04 -0500
commitb75d91f7ca513f0a4d0d1ad0942fb90cf5a2bc88 (patch)
tree3ed9062637e769ab8bd89748baf2b167244bbda8 /drivers/char/ipmi/ipmi_watchdog.c
parentff05b6f7ae762b6eb464183eec994b28ea09f6dd (diff)
ipmi_watchdog: restore settings when BMC reset
If the BMC gets reset, it will return 0x80 response errors. In less than a week # grep "Error 80 on cmd 22" /var/log/kernel |wc -l 378681 In this case, it is probably a good idea to restore the IPMI settings. Signed-off-by: Corey Minyard <cminyard@mvista.com> Tested-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com> Reported-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/char/ipmi/ipmi_watchdog.c')
-rw-r--r--drivers/char/ipmi/ipmi_watchdog.c41
1 files changed, 38 insertions, 3 deletions
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index c2917ffad2c2..34767a6d7f42 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -139,6 +139,8 @@
139#define IPMI_WDOG_SET_TIMER 0x24 139#define IPMI_WDOG_SET_TIMER 0x24
140#define IPMI_WDOG_GET_TIMER 0x25 140#define IPMI_WDOG_GET_TIMER 0x25
141 141
142#define IPMI_WDOG_TIMER_NOT_INIT_RESP 0x80
143
142/* These are here until the real ones get into the watchdog.h interface. */ 144/* These are here until the real ones get into the watchdog.h interface. */
143#ifndef WDIOC_GETTIMEOUT 145#ifndef WDIOC_GETTIMEOUT
144#define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int) 146#define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int)
@@ -596,6 +598,7 @@ static int ipmi_heartbeat(void)
596 struct kernel_ipmi_msg msg; 598 struct kernel_ipmi_msg msg;
597 int rv; 599 int rv;
598 struct ipmi_system_interface_addr addr; 600 struct ipmi_system_interface_addr addr;
601 int timeout_retries = 0;
599 602
600 if (ipmi_ignore_heartbeat) 603 if (ipmi_ignore_heartbeat)
601 return 0; 604 return 0;
@@ -616,6 +619,7 @@ static int ipmi_heartbeat(void)
616 619
617 mutex_lock(&heartbeat_lock); 620 mutex_lock(&heartbeat_lock);
618 621
622restart:
619 atomic_set(&heartbeat_tofree, 2); 623 atomic_set(&heartbeat_tofree, 2);
620 624
621 /* 625 /*
@@ -653,7 +657,33 @@ static int ipmi_heartbeat(void)
653 /* Wait for the heartbeat to be sent. */ 657 /* Wait for the heartbeat to be sent. */
654 wait_for_completion(&heartbeat_wait); 658 wait_for_completion(&heartbeat_wait);
655 659
656 if (heartbeat_recv_msg.msg.data[0] != 0) { 660 if (heartbeat_recv_msg.msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP) {
661 timeout_retries++;
662 if (timeout_retries > 3) {
663 printk(KERN_ERR PFX ": Unable to restore the IPMI"
664 " watchdog's settings, giving up.\n");
665 rv = -EIO;
666 goto out_unlock;
667 }
668
669 /*
670 * The timer was not initialized, that means the BMC was
671 * probably reset and lost the watchdog information. Attempt
672 * to restore the timer's info. Note that we still hold
673 * the heartbeat lock, to keep a heartbeat from happening
674 * in this process, so must say no heartbeat to avoid a
675 * deadlock on this mutex.
676 */
677 rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
678 if (rv) {
679 printk(KERN_ERR PFX ": Unable to send the command to"
680 " set the watchdog's settings, giving up.\n");
681 goto out_unlock;
682 }
683
684 /* We might need a new heartbeat, so do it now */
685 goto restart;
686 } else if (heartbeat_recv_msg.msg.data[0] != 0) {
657 /* 687 /*
658 * Got an error in the heartbeat response. It was already 688 * Got an error in the heartbeat response. It was already
659 * reported in ipmi_wdog_msg_handler, but we should return 689 * reported in ipmi_wdog_msg_handler, but we should return
@@ -662,6 +692,7 @@ static int ipmi_heartbeat(void)
662 rv = -EINVAL; 692 rv = -EINVAL;
663 } 693 }
664 694
695out_unlock:
665 mutex_unlock(&heartbeat_lock); 696 mutex_unlock(&heartbeat_lock);
666 697
667 return rv; 698 return rv;
@@ -922,11 +953,15 @@ static struct miscdevice ipmi_wdog_miscdev = {
922static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg, 953static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg,
923 void *handler_data) 954 void *handler_data)
924{ 955{
925 if (msg->msg.data[0] != 0) { 956 if (msg->msg.cmd == IPMI_WDOG_RESET_TIMER &&
957 msg->msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP)
958 printk(KERN_INFO PFX "response: The IPMI controller appears"
959 " to have been reset, will attempt to reinitialize"
960 " the watchdog timer\n");
961 else if (msg->msg.data[0] != 0)
926 printk(KERN_ERR PFX "response: Error %x on cmd %x\n", 962 printk(KERN_ERR PFX "response: Error %x on cmd %x\n",
927 msg->msg.data[0], 963 msg->msg.data[0],
928 msg->msg.cmd); 964 msg->msg.cmd);
929 }
930 965
931 ipmi_free_recv_msg(msg); 966 ipmi_free_recv_msg(msg);
932} 967}