diff options
author | Philipp Reisner <philipp.reisner@linbit.com> | 2011-06-29 08:06:08 -0400 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2012-05-09 09:10:45 -0400 |
commit | dfa8bedbfe881caf6676704ab1aae18dfe8e430a (patch) | |
tree | 416102d8cd59c6be45b072c2ce123c6cb8f63f52 | |
parent | 02ee8f95fadf7c94b3d28df436a095152f6392b2 (diff) |
drbd: Implemented the disk-timeout option
When the disk-timeout is active, and it expires for a single request,
we consider the local disk as D_FAILED. Note: With this change,
I made both timeout based state transitions HARD state transitions.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 5 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 32 | ||||
-rw-r--r-- | include/linux/drbd_limits.h | 5 | ||||
-rw-r--r-- | include/linux/drbd_nl.h | 1 |
5 files changed, 31 insertions, 14 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index bc8a8a7556da..4bd636524dd1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -1404,6 +1404,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1404 | /* Here we have the actions that are performed after a | 1404 | /* Here we have the actions that are performed after a |
1405 | state change. This function might sleep */ | 1405 | state change. This function might sleep */ |
1406 | 1406 | ||
1407 | if (os.disk <= D_NEGOTIATING && ns.disk > D_NEGOTIATING) | ||
1408 | mod_timer(&mdev->request_timer, jiffies + HZ); | ||
1409 | |||
1407 | nsm.i = -1; | 1410 | nsm.i = -1; |
1408 | if (ns.susp_nod) { | 1411 | if (ns.susp_nod) { |
1409 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) | 1412 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) |
@@ -3318,6 +3321,8 @@ static void drbd_delete_device(unsigned int minor) | |||
3318 | if (!mdev) | 3321 | if (!mdev) |
3319 | return; | 3322 | return; |
3320 | 3323 | ||
3324 | del_timer_sync(&mdev->request_timer); | ||
3325 | |||
3321 | /* paranoia asserts */ | 3326 | /* paranoia asserts */ |
3322 | if (mdev->open_cnt != 0) | 3327 | if (mdev->open_cnt != 0) |
3323 | dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, | 3328 | dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e7ed0aa93a16..a85bbe1bbc2b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -3803,8 +3803,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) | |||
3803 | atomic_set(&mdev->rs_pending_cnt, 0); | 3803 | atomic_set(&mdev->rs_pending_cnt, 0); |
3804 | wake_up(&mdev->misc_wait); | 3804 | wake_up(&mdev->misc_wait); |
3805 | 3805 | ||
3806 | del_timer(&mdev->request_timer); | ||
3807 | |||
3808 | /* make sure syncer is stopped and w_resume_next_sg queued */ | 3806 | /* make sure syncer is stopped and w_resume_next_sg queued */ |
3809 | del_timer_sync(&mdev->resync_timer); | 3807 | del_timer_sync(&mdev->resync_timer); |
3810 | resync_timer_fn((unsigned long)mdev); | 3808 | resync_timer_fn((unsigned long)mdev); |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 1a8aac4b0c2f..ef145e33a647 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -1208,13 +1208,19 @@ void request_timer_fn(unsigned long data) | |||
1208 | struct drbd_conf *mdev = (struct drbd_conf *) data; | 1208 | struct drbd_conf *mdev = (struct drbd_conf *) data; |
1209 | struct drbd_request *req; /* oldest request */ | 1209 | struct drbd_request *req; /* oldest request */ |
1210 | struct list_head *le; | 1210 | struct list_head *le; |
1211 | unsigned long et = 0; /* effective timeout = ko_count * timeout */ | 1211 | unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ |
1212 | 1212 | ||
1213 | if (get_net_conf(mdev)) { | 1213 | if (get_net_conf(mdev)) { |
1214 | et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count; | 1214 | ent = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count; |
1215 | put_net_conf(mdev); | 1215 | put_net_conf(mdev); |
1216 | } | 1216 | } |
1217 | if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) | 1217 | if (get_ldev(mdev)) { |
1218 | dt = mdev->ldev->dc.disk_timeout * HZ / 10; | ||
1219 | put_ldev(mdev); | ||
1220 | } | ||
1221 | et = min_not_zero(dt, ent); | ||
1222 | |||
1223 | if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED)) | ||
1218 | return; /* Recurring timer stopped */ | 1224 | return; /* Recurring timer stopped */ |
1219 | 1225 | ||
1220 | spin_lock_irq(&mdev->req_lock); | 1226 | spin_lock_irq(&mdev->req_lock); |
@@ -1227,17 +1233,19 @@ void request_timer_fn(unsigned long data) | |||
1227 | 1233 | ||
1228 | le = le->prev; | 1234 | le = le->prev; |
1229 | req = list_entry(le, struct drbd_request, tl_requests); | 1235 | req = list_entry(le, struct drbd_request, tl_requests); |
1230 | if (time_is_before_eq_jiffies(req->start_time + et)) { | 1236 | if (ent && req->rq_state & RQ_NET_PENDING) { |
1231 | if (req->rq_state & RQ_NET_PENDING) { | 1237 | if (time_is_before_eq_jiffies(req->start_time + ent)) { |
1232 | dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); | 1238 | dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); |
1233 | _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL); | 1239 | _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); |
1234 | } else { | ||
1235 | dev_warn(DEV, "Local backing block device frozen?\n"); | ||
1236 | mod_timer(&mdev->request_timer, jiffies + et); | ||
1237 | } | 1240 | } |
1238 | } else { | ||
1239 | mod_timer(&mdev->request_timer, req->start_time + et); | ||
1240 | } | 1241 | } |
1241 | 1242 | if (dt && req->rq_state & RQ_LOCAL_PENDING) { | |
1243 | if (time_is_before_eq_jiffies(req->start_time + dt)) { | ||
1244 | dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); | ||
1245 | __drbd_chk_io_error(mdev, 1); | ||
1246 | } | ||
1247 | } | ||
1248 | nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et; | ||
1242 | spin_unlock_irq(&mdev->req_lock); | 1249 | spin_unlock_irq(&mdev->req_lock); |
1250 | mod_timer(&mdev->request_timer, nt); | ||
1243 | } | 1251 | } |
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 928c84dfaf42..fb670bf603f7 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h | |||
@@ -48,6 +48,11 @@ | |||
48 | #define DRBD_TIMEOUT_MAX 600 | 48 | #define DRBD_TIMEOUT_MAX 600 |
49 | #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ | 49 | #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ |
50 | 50 | ||
51 | /* If backing disk takes longer than disk_timeout, mark the disk as failed */ | ||
52 | #define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */ | ||
53 | #define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */ | ||
54 | #define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */ | ||
55 | |||
51 | /* active connection retries when C_WF_CONNECTION */ | 56 | /* active connection retries when C_WF_CONNECTION */ |
52 | #define DRBD_CONNECT_INT_MIN 1 | 57 | #define DRBD_CONNECT_INT_MIN 1 |
53 | #define DRBD_CONNECT_INT_MAX 120 | 58 | #define DRBD_CONNECT_INT_MAX 120 |
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index 7203c9ead233..a8706f08ab36 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h | |||
@@ -31,6 +31,7 @@ NL_PACKET(disk_conf, 3, | |||
31 | NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs) | 31 | NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs) |
32 | NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier) | 32 | NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier) |
33 | NL_BIT( 58, T_MAY_IGNORE, no_disk_drain) | 33 | NL_BIT( 58, T_MAY_IGNORE, no_disk_drain) |
34 | NL_INTEGER( 89, T_MAY_IGNORE, disk_timeout) | ||
34 | ) | 35 | ) |
35 | 36 | ||
36 | NL_PACKET(detach, 4, | 37 | NL_PACKET(detach, 4, |