aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilipp Reisner <philipp.reisner@linbit.com>2011-06-29 08:06:08 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-05-09 09:10:45 -0400
commitdfa8bedbfe881caf6676704ab1aae18dfe8e430a (patch)
tree416102d8cd59c6be45b072c2ce123c6cb8f63f52
parent02ee8f95fadf7c94b3d28df436a095152f6392b2 (diff)
drbd: Implemented the disk-timeout option
When the disk-timeout is active, and it expires for a single request, we consider the local disk as D_FAILED. Note: With this change, I made both timeout based state transitions HARD state transitions. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r--drivers/block/drbd/drbd_main.c5
-rw-r--r--drivers/block/drbd/drbd_receiver.c2
-rw-r--r--drivers/block/drbd/drbd_req.c32
-rw-r--r--include/linux/drbd_limits.h5
-rw-r--r--include/linux/drbd_nl.h1
5 files changed, 31 insertions, 14 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index bc8a8a7556da..4bd636524dd1 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1404,6 +1404,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1404 /* Here we have the actions that are performed after a 1404 /* Here we have the actions that are performed after a
1405 state change. This function might sleep */ 1405 state change. This function might sleep */
1406 1406
1407 if (os.disk <= D_NEGOTIATING && ns.disk > D_NEGOTIATING)
1408 mod_timer(&mdev->request_timer, jiffies + HZ);
1409
1407 nsm.i = -1; 1410 nsm.i = -1;
1408 if (ns.susp_nod) { 1411 if (ns.susp_nod) {
1409 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) 1412 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
@@ -3318,6 +3321,8 @@ static void drbd_delete_device(unsigned int minor)
3318 if (!mdev) 3321 if (!mdev)
3319 return; 3322 return;
3320 3323
3324 del_timer_sync(&mdev->request_timer);
3325
3321 /* paranoia asserts */ 3326 /* paranoia asserts */
3322 if (mdev->open_cnt != 0) 3327 if (mdev->open_cnt != 0)
3323 dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, 3328 dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt,
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e7ed0aa93a16..a85bbe1bbc2b 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3803,8 +3803,6 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3803 atomic_set(&mdev->rs_pending_cnt, 0); 3803 atomic_set(&mdev->rs_pending_cnt, 0);
3804 wake_up(&mdev->misc_wait); 3804 wake_up(&mdev->misc_wait);
3805 3805
3806 del_timer(&mdev->request_timer);
3807
3808 /* make sure syncer is stopped and w_resume_next_sg queued */ 3806 /* make sure syncer is stopped and w_resume_next_sg queued */
3809 del_timer_sync(&mdev->resync_timer); 3807 del_timer_sync(&mdev->resync_timer);
3810 resync_timer_fn((unsigned long)mdev); 3808 resync_timer_fn((unsigned long)mdev);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 1a8aac4b0c2f..ef145e33a647 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1208,13 +1208,19 @@ void request_timer_fn(unsigned long data)
1208 struct drbd_conf *mdev = (struct drbd_conf *) data; 1208 struct drbd_conf *mdev = (struct drbd_conf *) data;
1209 struct drbd_request *req; /* oldest request */ 1209 struct drbd_request *req; /* oldest request */
1210 struct list_head *le; 1210 struct list_head *le;
1211 unsigned long et = 0; /* effective timeout = ko_count * timeout */ 1211 unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
1212 1212
1213 if (get_net_conf(mdev)) { 1213 if (get_net_conf(mdev)) {
1214 et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count; 1214 ent = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count;
1215 put_net_conf(mdev); 1215 put_net_conf(mdev);
1216 } 1216 }
1217 if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) 1217 if (get_ldev(mdev)) {
1218 dt = mdev->ldev->dc.disk_timeout * HZ / 10;
1219 put_ldev(mdev);
1220 }
1221 et = min_not_zero(dt, ent);
1222
1223 if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED))
1218 return; /* Recurring timer stopped */ 1224 return; /* Recurring timer stopped */
1219 1225
1220 spin_lock_irq(&mdev->req_lock); 1226 spin_lock_irq(&mdev->req_lock);
@@ -1227,17 +1233,19 @@ void request_timer_fn(unsigned long data)
1227 1233
1228 le = le->prev; 1234 le = le->prev;
1229 req = list_entry(le, struct drbd_request, tl_requests); 1235 req = list_entry(le, struct drbd_request, tl_requests);
1230 if (time_is_before_eq_jiffies(req->start_time + et)) { 1236 if (ent && req->rq_state & RQ_NET_PENDING) {
1231 if (req->rq_state & RQ_NET_PENDING) { 1237 if (time_is_before_eq_jiffies(req->start_time + ent)) {
1232 dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); 1238 dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
1233 _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL); 1239 _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
1234 } else {
1235 dev_warn(DEV, "Local backing block device frozen?\n");
1236 mod_timer(&mdev->request_timer, jiffies + et);
1237 } 1240 }
1238 } else {
1239 mod_timer(&mdev->request_timer, req->start_time + et);
1240 } 1241 }
1241 1242 if (dt && req->rq_state & RQ_LOCAL_PENDING) {
1243 if (time_is_before_eq_jiffies(req->start_time + dt)) {
1244 dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
1245 __drbd_chk_io_error(mdev, 1);
1246 }
1247 }
1248 nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et;
1242 spin_unlock_irq(&mdev->req_lock); 1249 spin_unlock_irq(&mdev->req_lock);
1250 mod_timer(&mdev->request_timer, nt);
1243} 1251}
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 928c84dfaf42..fb670bf603f7 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -48,6 +48,11 @@
48#define DRBD_TIMEOUT_MAX 600 48#define DRBD_TIMEOUT_MAX 600
49#define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ 49#define DRBD_TIMEOUT_DEF 60 /* 6 seconds */
50 50
51 /* If backing disk takes longer than disk_timeout, mark the disk as failed */
52#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */
53#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
54#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */
55
51 /* active connection retries when C_WF_CONNECTION */ 56 /* active connection retries when C_WF_CONNECTION */
52#define DRBD_CONNECT_INT_MIN 1 57#define DRBD_CONNECT_INT_MIN 1
53#define DRBD_CONNECT_INT_MAX 120 58#define DRBD_CONNECT_INT_MAX 120
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index 7203c9ead233..a8706f08ab36 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -31,6 +31,7 @@ NL_PACKET(disk_conf, 3,
31 NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs) 31 NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs)
32 NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier) 32 NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier)
33 NL_BIT( 58, T_MAY_IGNORE, no_disk_drain) 33 NL_BIT( 58, T_MAY_IGNORE, no_disk_drain)
34 NL_INTEGER( 89, T_MAY_IGNORE, disk_timeout)
34) 35)
35 36
36NL_PACKET(detach, 4, 37NL_PACKET(detach, 4,