aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuenter Roeck <linux@roeck-us.net>2016-02-28 16:12:15 -0500
committerWim Van Sebroeck <wim@iguana.be>2016-03-16 16:11:14 -0400
commit664a39236e718f9f03fa73fc01006da9ced04efc (patch)
tree116bfdc2660493a14cc08769d23236c1070b533a
parentfb32e9b9deeb5df2913deb7d2ae8c36f4f66ecf3 (diff)
watchdog: Introduce hardware maximum heartbeat in watchdog core
Introduce an optional hardware maximum heartbeat in the watchdog core. The hardware maximum heartbeat can be lower than the maximum timeout. Drivers can set the maximum hardware heartbeat value in the watchdog data structure. If the configured timeout exceeds the maximum hardware heartbeat, the watchdog core enables a timer function to assist sending keepalive requests to the watchdog driver. Signed-off-by: Guenter Roeck <linux@roeck-us.net> Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
-rw-r--r--Documentation/watchdog/watchdog-kernel-api.txt19
-rw-r--r--drivers/watchdog/watchdog_dev.c129
-rw-r--r--include/linux/watchdog.h28
3 files changed, 158 insertions, 18 deletions
diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt
index dd8f912c0576..15a02595ade1 100644
--- a/Documentation/watchdog/watchdog-kernel-api.txt
+++ b/Documentation/watchdog/watchdog-kernel-api.txt
@@ -52,6 +52,7 @@ struct watchdog_device {
52 unsigned int timeout; 52 unsigned int timeout;
53 unsigned int min_timeout; 53 unsigned int min_timeout;
54 unsigned int max_timeout; 54 unsigned int max_timeout;
55 unsigned int max_hw_heartbeat_ms;
55 struct notifier_block reboot_nb; 56 struct notifier_block reboot_nb;
56 struct notifier_block restart_nb; 57 struct notifier_block restart_nb;
57 void *driver_data; 58 void *driver_data;
@@ -73,8 +74,18 @@ It contains following fields:
73 additional information about the watchdog timer itself. (Like it's unique name) 74 additional information about the watchdog timer itself. (Like it's unique name)
74* ops: a pointer to the list of watchdog operations that the watchdog supports. 75* ops: a pointer to the list of watchdog operations that the watchdog supports.
75* timeout: the watchdog timer's timeout value (in seconds). 76* timeout: the watchdog timer's timeout value (in seconds).
77 This is the time after which the system will reboot if user space does
78 not send a heartbeat request if WDOG_ACTIVE is set.
76* min_timeout: the watchdog timer's minimum timeout value (in seconds). 79* min_timeout: the watchdog timer's minimum timeout value (in seconds).
77* max_timeout: the watchdog timer's maximum timeout value (in seconds). 80 If set, the minimum configurable value for 'timeout'.
81* max_timeout: the watchdog timer's maximum timeout value (in seconds),
82 as seen from userspace. If set, the maximum configurable value for
83 'timeout'. Not used if max_hw_heartbeat_ms is non-zero.
84* max_hw_heartbeat_ms: Maximum hardware heartbeat, in milli-seconds.
85 If set, the infrastructure will send heartbeats to the watchdog driver
86 if 'timeout' is larger than max_hw_heartbeat_ms, unless WDOG_ACTIVE
87 is set and userspace failed to send a heartbeat for at least 'timeout'
88 seconds.
78* reboot_nb: notifier block that is registered for reboot notifications, for 89* reboot_nb: notifier block that is registered for reboot notifications, for
79 internal use only. If the driver calls watchdog_stop_on_reboot, watchdog core 90 internal use only. If the driver calls watchdog_stop_on_reboot, watchdog core
80 will stop the watchdog on such notifications. 91 will stop the watchdog on such notifications.
@@ -153,7 +164,11 @@ they are supported. These optional routines/operations are:
153 and -EIO for "could not write value to the watchdog". On success this 164 and -EIO for "could not write value to the watchdog". On success this
154 routine should set the timeout value of the watchdog_device to the 165 routine should set the timeout value of the watchdog_device to the
155 achieved timeout value (which may be different from the requested one 166 achieved timeout value (which may be different from the requested one
156 because the watchdog does not necessarily has a 1 second resolution). 167 because the watchdog does not necessarily have a 1 second resolution).
168 Drivers implementing max_hw_heartbeat_ms set the hardware watchdog heartbeat
169 to the minimum of timeout and max_hw_heartbeat_ms. Those drivers set the
170 timeout value of the watchdog_device either to the requested timeout value
171 (if it is larger than max_hw_heartbeat_ms), or to the achieved timeout value.
157 (Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the 172 (Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the
158 watchdog's info structure). 173 watchdog's info structure).
159 If the watchdog driver does not have to perform any action but setting the 174 If the watchdog driver does not have to perform any action but setting the
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index b5e700186ae0..e668a9e8b648 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -36,6 +36,7 @@
36#include <linux/errno.h> /* For the -ENODEV/... values */ 36#include <linux/errno.h> /* For the -ENODEV/... values */
37#include <linux/fs.h> /* For file operations */ 37#include <linux/fs.h> /* For file operations */
38#include <linux/init.h> /* For __init/__exit/... */ 38#include <linux/init.h> /* For __init/__exit/... */
39#include <linux/jiffies.h> /* For timeout functions */
39#include <linux/kernel.h> /* For printk/panic/... */ 40#include <linux/kernel.h> /* For printk/panic/... */
40#include <linux/kref.h> /* For data references */ 41#include <linux/kref.h> /* For data references */
41#include <linux/miscdevice.h> /* For handling misc devices */ 42#include <linux/miscdevice.h> /* For handling misc devices */
@@ -44,6 +45,7 @@
44#include <linux/slab.h> /* For memory functions */ 45#include <linux/slab.h> /* For memory functions */
45#include <linux/types.h> /* For standard types (like size_t) */ 46#include <linux/types.h> /* For standard types (like size_t) */
46#include <linux/watchdog.h> /* For watchdog specific items */ 47#include <linux/watchdog.h> /* For watchdog specific items */
48#include <linux/workqueue.h> /* For workqueue */
47#include <linux/uaccess.h> /* For copy_to_user/put_user/... */ 49#include <linux/uaccess.h> /* For copy_to_user/put_user/... */
48 50
49#include "watchdog_core.h" 51#include "watchdog_core.h"
@@ -61,6 +63,8 @@ struct watchdog_core_data {
61 struct cdev cdev; 63 struct cdev cdev;
62 struct watchdog_device *wdd; 64 struct watchdog_device *wdd;
63 struct mutex lock; 65 struct mutex lock;
66 unsigned long last_keepalive;
67 struct delayed_work work;
64 unsigned long status; /* Internal status bits */ 68 unsigned long status; /* Internal status bits */
65#define _WDOG_DEV_OPEN 0 /* Opened ? */ 69#define _WDOG_DEV_OPEN 0 /* Opened ? */
66#define _WDOG_ALLOW_RELEASE 1 /* Did we receive the magic char ? */ 70#define _WDOG_ALLOW_RELEASE 1 /* Did we receive the magic char ? */
@@ -71,6 +75,76 @@ static dev_t watchdog_devt;
71/* Reference to watchdog device behind /dev/watchdog */ 75/* Reference to watchdog device behind /dev/watchdog */
72static struct watchdog_core_data *old_wd_data; 76static struct watchdog_core_data *old_wd_data;
73 77
78static struct workqueue_struct *watchdog_wq;
79
80static inline bool watchdog_need_worker(struct watchdog_device *wdd)
81{
82 /* All variables in milli-seconds */
83 unsigned int hm = wdd->max_hw_heartbeat_ms;
84 unsigned int t = wdd->timeout * 1000;
85
86 /*
87 * A worker to generate heartbeat requests is needed if all of the
88 * following conditions are true.
89 * - Userspace activated the watchdog.
90 * - The driver provided a value for the maximum hardware timeout, and
91 * thus is aware that the framework supports generating heartbeat
92 * requests.
93 * - Userspace requests a longer timeout than the hardware can handle.
94 */
95 return watchdog_active(wdd) && hm && t > hm;
96}
97
98static long watchdog_next_keepalive(struct watchdog_device *wdd)
99{
100 struct watchdog_core_data *wd_data = wdd->wd_data;
101 unsigned int timeout_ms = wdd->timeout * 1000;
102 unsigned long keepalive_interval;
103 unsigned long last_heartbeat;
104 unsigned long virt_timeout;
105 unsigned int hw_heartbeat_ms;
106
107 virt_timeout = wd_data->last_keepalive + msecs_to_jiffies(timeout_ms);
108 hw_heartbeat_ms = min(timeout_ms, wdd->max_hw_heartbeat_ms);
109 keepalive_interval = msecs_to_jiffies(hw_heartbeat_ms / 2);
110
111 /*
112 * To ensure that the watchdog times out wdd->timeout seconds
113 * after the most recent ping from userspace, the last
114 * worker ping has to come in hw_heartbeat_ms before this timeout.
115 */
116 last_heartbeat = virt_timeout - msecs_to_jiffies(hw_heartbeat_ms);
117 return min_t(long, last_heartbeat - jiffies, keepalive_interval);
118}
119
120static inline void watchdog_update_worker(struct watchdog_device *wdd)
121{
122 struct watchdog_core_data *wd_data = wdd->wd_data;
123
124 if (watchdog_need_worker(wdd)) {
125 long t = watchdog_next_keepalive(wdd);
126
127 if (t > 0)
128 mod_delayed_work(watchdog_wq, &wd_data->work, t);
129 } else {
130 cancel_delayed_work(&wd_data->work);
131 }
132}
133
134static int __watchdog_ping(struct watchdog_device *wdd)
135{
136 int err;
137
138 if (wdd->ops->ping)
139 err = wdd->ops->ping(wdd); /* ping the watchdog */
140 else
141 err = wdd->ops->start(wdd); /* restart watchdog */
142
143 watchdog_update_worker(wdd);
144
145 return err;
146}
147
74/* 148/*
75 * watchdog_ping: ping the watchdog. 149 * watchdog_ping: ping the watchdog.
76 * @wdd: the watchdog device to ping 150 * @wdd: the watchdog device to ping
@@ -85,17 +159,28 @@ static struct watchdog_core_data *old_wd_data;
85 159
86static int watchdog_ping(struct watchdog_device *wdd) 160static int watchdog_ping(struct watchdog_device *wdd)
87{ 161{
88 int err; 162 struct watchdog_core_data *wd_data = wdd->wd_data;
89 163
90 if (!watchdog_active(wdd)) 164 if (!watchdog_active(wdd))
91 return 0; 165 return 0;
92 166
93 if (wdd->ops->ping) 167 wd_data->last_keepalive = jiffies;
94 err = wdd->ops->ping(wdd); /* ping the watchdog */ 168 return __watchdog_ping(wdd);
95 else 169}
96 err = wdd->ops->start(wdd); /* restart watchdog */
97 170
98 return err; 171static void watchdog_ping_work(struct work_struct *work)
172{
173 struct watchdog_core_data *wd_data;
174 struct watchdog_device *wdd;
175
176 wd_data = container_of(to_delayed_work(work), struct watchdog_core_data,
177 work);
178
179 mutex_lock(&wd_data->lock);
180 wdd = wd_data->wdd;
181 if (wdd && watchdog_active(wdd))
182 __watchdog_ping(wdd);
183 mutex_unlock(&wd_data->lock);
99} 184}
100 185
101/* 186/*
@@ -111,14 +196,20 @@ static int watchdog_ping(struct watchdog_device *wdd)
111 196
112static int watchdog_start(struct watchdog_device *wdd) 197static int watchdog_start(struct watchdog_device *wdd)
113{ 198{
199 struct watchdog_core_data *wd_data = wdd->wd_data;
200 unsigned long started_at;
114 int err; 201 int err;
115 202
116 if (watchdog_active(wdd)) 203 if (watchdog_active(wdd))
117 return 0; 204 return 0;
118 205
206 started_at = jiffies;
119 err = wdd->ops->start(wdd); 207 err = wdd->ops->start(wdd);
120 if (err == 0) 208 if (err == 0) {
121 set_bit(WDOG_ACTIVE, &wdd->status); 209 set_bit(WDOG_ACTIVE, &wdd->status);
210 wd_data->last_keepalive = started_at;
211 watchdog_update_worker(wdd);
212 }
122 213
123 return err; 214 return err;
124} 215}
@@ -137,6 +228,7 @@ static int watchdog_start(struct watchdog_device *wdd)
137 228
138static int watchdog_stop(struct watchdog_device *wdd) 229static int watchdog_stop(struct watchdog_device *wdd)
139{ 230{
231 struct watchdog_core_data *wd_data = wdd->wd_data;
140 int err; 232 int err;
141 233
142 if (!watchdog_active(wdd)) 234 if (!watchdog_active(wdd))
@@ -149,8 +241,10 @@ static int watchdog_stop(struct watchdog_device *wdd)
149 } 241 }
150 242
151 err = wdd->ops->stop(wdd); 243 err = wdd->ops->stop(wdd);
152 if (err == 0) 244 if (err == 0) {
153 clear_bit(WDOG_ACTIVE, &wdd->status); 245 clear_bit(WDOG_ACTIVE, &wdd->status);
246 cancel_delayed_work(&wd_data->work);
247 }
154 248
155 return err; 249 return err;
156} 250}
@@ -196,6 +290,8 @@ static int watchdog_set_timeout(struct watchdog_device *wdd,
196 else 290 else
197 wdd->timeout = timeout; 291 wdd->timeout = timeout;
198 292
293 watchdog_update_worker(wdd);
294
199 return err; 295 return err;
200} 296}
201 297
@@ -616,6 +712,8 @@ static int watchdog_release(struct inode *inode, struct file *file)
616 watchdog_ping(wdd); 712 watchdog_ping(wdd);
617 } 713 }
618 714
715 cancel_delayed_work_sync(&wd_data->work);
716
619 /* make sure that /dev/watchdog can be re-opened */ 717 /* make sure that /dev/watchdog can be re-opened */
620 clear_bit(_WDOG_DEV_OPEN, &wd_data->status); 718 clear_bit(_WDOG_DEV_OPEN, &wd_data->status);
621 719
@@ -665,6 +763,11 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno)
665 wd_data->wdd = wdd; 763 wd_data->wdd = wdd;
666 wdd->wd_data = wd_data; 764 wdd->wd_data = wd_data;
667 765
766 if (!watchdog_wq)
767 return -ENODEV;
768
769 INIT_DELAYED_WORK(&wd_data->work, watchdog_ping_work);
770
668 if (wdd->id == 0) { 771 if (wdd->id == 0) {
669 old_wd_data = wd_data; 772 old_wd_data = wd_data;
670 watchdog_miscdev.parent = wdd->parent; 773 watchdog_miscdev.parent = wdd->parent;
@@ -722,6 +825,8 @@ static void watchdog_cdev_unregister(struct watchdog_device *wdd)
722 wdd->wd_data = NULL; 825 wdd->wd_data = NULL;
723 mutex_unlock(&wd_data->lock); 826 mutex_unlock(&wd_data->lock);
724 827
828 cancel_delayed_work_sync(&wd_data->work);
829
725 kref_put(&wd_data->kref, watchdog_core_data_release); 830 kref_put(&wd_data->kref, watchdog_core_data_release);
726} 831}
727 832
@@ -787,6 +892,13 @@ int __init watchdog_dev_init(void)
787{ 892{
788 int err; 893 int err;
789 894
895 watchdog_wq = alloc_workqueue("watchdogd",
896 WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
897 if (!watchdog_wq) {
898 pr_err("Failed to create watchdog workqueue\n");
899 return -ENOMEM;
900 }
901
790 err = class_register(&watchdog_class); 902 err = class_register(&watchdog_class);
791 if (err < 0) { 903 if (err < 0) {
792 pr_err("couldn't register class\n"); 904 pr_err("couldn't register class\n");
@@ -813,4 +925,5 @@ void __exit watchdog_dev_exit(void)
813{ 925{
814 unregister_chrdev_region(watchdog_devt, MAX_DOGS); 926 unregister_chrdev_region(watchdog_devt, MAX_DOGS);
815 class_unregister(&watchdog_class); 927 class_unregister(&watchdog_class);
928 destroy_workqueue(watchdog_wq);
816} 929}
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 0b565f2ad242..8e82daecb7d3 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -10,8 +10,9 @@
10 10
11 11
12#include <linux/bitops.h> 12#include <linux/bitops.h>
13#include <linux/device.h>
14#include <linux/cdev.h> 13#include <linux/cdev.h>
14#include <linux/device.h>
15#include <linux/kernel.h>
15#include <linux/notifier.h> 16#include <linux/notifier.h>
16#include <uapi/linux/watchdog.h> 17#include <uapi/linux/watchdog.h>
17 18
@@ -61,14 +62,19 @@ struct watchdog_ops {
61 * @bootstatus: Status of the watchdog device at boot. 62 * @bootstatus: Status of the watchdog device at boot.
62 * @timeout: The watchdog devices timeout value (in seconds). 63 * @timeout: The watchdog devices timeout value (in seconds).
63 * @min_timeout:The watchdog devices minimum timeout value (in seconds). 64 * @min_timeout:The watchdog devices minimum timeout value (in seconds).
64 * @max_timeout:The watchdog devices maximum timeout value (in seconds). 65 * @max_timeout:The watchdog devices maximum timeout value (in seconds)
66 * as configurable from user space. Only relevant if
67 * max_hw_heartbeat_ms is not provided.
68 * @max_hw_heartbeat_ms:
69 * Hardware limit for maximum timeout, in milli-seconds.
70 * Replaces max_timeout if specified.
65 * @reboot_nb: The notifier block to stop watchdog on reboot. 71 * @reboot_nb: The notifier block to stop watchdog on reboot.
66 * @restart_nb: The notifier block to register a restart function. 72 * @restart_nb: The notifier block to register a restart function.
67 * @driver_data:Pointer to the drivers private data. 73 * @driver_data:Pointer to the drivers private data.
68 * @wd_data: Pointer to watchdog core internal data. 74 * @wd_data: Pointer to watchdog core internal data.
69 * @status: Field that contains the devices internal status bits. 75 * @status: Field that contains the devices internal status bits.
70 * @deferred: entry in wtd_deferred_reg_list which is used to 76 * @deferred: Entry in wtd_deferred_reg_list which is used to
71 * register early initialized watchdogs. 77 * register early initialized watchdogs.
72 * 78 *
73 * The watchdog_device structure contains all information about a 79 * The watchdog_device structure contains all information about a
74 * watchdog timer device. 80 * watchdog timer device.
@@ -89,6 +95,7 @@ struct watchdog_device {
89 unsigned int timeout; 95 unsigned int timeout;
90 unsigned int min_timeout; 96 unsigned int min_timeout;
91 unsigned int max_timeout; 97 unsigned int max_timeout;
98 unsigned int max_hw_heartbeat_ms;
92 struct notifier_block reboot_nb; 99 struct notifier_block reboot_nb;
93 struct notifier_block restart_nb; 100 struct notifier_block restart_nb;
94 void *driver_data; 101 void *driver_data;
@@ -128,13 +135,18 @@ static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigne
128{ 135{
129 /* 136 /*
130 * The timeout is invalid if 137 * The timeout is invalid if
138 * - the requested value is larger than UINT_MAX / 1000
139 * (since internal calculations are done in milli-seconds),
140 * or
131 * - the requested value is smaller than the configured minimum timeout, 141 * - the requested value is smaller than the configured minimum timeout,
132 * or 142 * or
133 * - a maximum timeout is configured, and the requested value is larger 143 * - a maximum hardware timeout is not configured, a maximum timeout
134 * than the maximum timeout. 144 * is configured, and the requested value is larger than the
145 * configured maximum timeout.
135 */ 146 */
136 return t < wdd->min_timeout || 147 return t > UINT_MAX / 1000 || t < wdd->min_timeout ||
137 (wdd->max_timeout && t > wdd->max_timeout); 148 (!wdd->max_hw_heartbeat_ms && wdd->max_timeout &&
149 t > wdd->max_timeout);
138} 150}
139 151
140/* Use the following functions to manipulate watchdog driver specific data */ 152/* Use the following functions to manipulate watchdog driver specific data */