drbd: fix resend/resubmit of frozen IO

DRBD can freeze IO, due to fencing policy (fencing resource-and-stonith), or because we lost access to data (on-no-data-accessible suspend-io). Resuming from there (re-connect, or re-attach, or explicit admin intervention) should "just work". Unfortunately, if the re-attach/re-connect did not happen within the timeout, since the commit drbd: Implemented real timeout checking for request processing time if so configured, the request_timer_fn() would timeout and detach/disconnect virtually immediately. This change tracks the most recent attach and connect, and does not timeout within <configured timeout interval> after attach/connect. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
author: Lars Ellenberg <lars.ellenberg@linbit.com> 2012-04-25 05:46:14 -0400
committer: Philipp Reisner <philipp.reisner@linbit.com> 2012-05-09 09:16:58 -0400
commit: ba280c092e6eca8a70c502e4510061535fdce382 (patch)
tree: 70bf9476e9667ec5afec09f5a2c8f4b9cc91e75c /drivers/block/drbd
parent: 5de738272e38f7051c7a44c42631b71a0e2a1e80 (diff)
3 files changed, 48 insertions, 15 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f215ad430bb..302a6e786f7 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1049,6 +1049,8 @@ struct drbd_conf {
        struct crypto_hash *csums_tfm;
        struct crypto_hash *verify_tfm;
+        unsigned long last_reattach_jif;
+        unsigned long last_reconnect_jif;
        struct drbd_thread receiver;
        struct drbd_thread worker;
        struct drbd_thread asender;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index deccff3af77..ab501b23b50 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1326,6 +1326,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
        if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
                drbd_resume_al(mdev);
+        /* remember last connect and attach times so request_timer_fn() won't
+         * kill newly established sessions while we are still trying to thaw
+         * previously frozen IO */
+        if (os.conn != C_WF_REPORT_PARAMS && ns.conn == C_WF_REPORT_PARAMS)
+                mdev->last_reconnect_jif = jiffies;
+        if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
+            ns.disk > D_NEGOTIATING)
+                mdev->last_reattach_jif = jiffies;
        ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
        if (ascw) {
                ascw->os = os;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 340d57b9856..4a642ce62ba 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1200,43 +1200,65 @@ void request_timer_fn(unsigned long data)
        struct drbd_request *req; /* oldest request */
        struct list_head *le;
        unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
+        unsigned long now;
        if (get_net_conf(mdev)) {
-                ent = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count;
+                if (mdev->state.conn >= C_WF_REPORT_PARAMS)
+                        ent = mdev->net_conf->timeout*HZ/10
+                                * mdev->net_conf->ko_count;
                put_net_conf(mdev);
        }
-        if (get_ldev(mdev)) {
+        if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */
                dt = mdev->ldev->dc.disk_timeout * HZ / 10;
                put_ldev(mdev);
        }
        et = min_not_zero(dt, ent);
-        if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED))
+        if (!et)
                return; /* Recurring timer stopped */
+        now = jiffies;
        spin_lock_irq(&mdev->req_lock);
        le = &mdev->oldest_tle->requests;
        if (list_empty(le)) {
                spin_unlock_irq(&mdev->req_lock);
-                mod_timer(&mdev->request_timer, jiffies + et);
+                mod_timer(&mdev->request_timer, now + et);
                return;
        }
        le = le->prev;
        req = list_entry(le, struct drbd_request, tl_requests);
-        if (ent && req->rq_state & RQ_NET_PENDING) {
-                if (time_is_before_eq_jiffies(req->start_time + ent)) {
+        /* The request is considered timed out, if
-                        dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
+         * - we have some effective timeout from the configuration,
-                        _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
+         *   with above state restrictions applied,
-                }
+         * - the oldest request is waiting for a response from the network
+         *   resp. the local disk,
+         * - the oldest request is in fact older than the effective timeout,
+         * - the connection was established (resp. disk was attached)
+         *   for longer than the timeout already.
+         * Note that for 32bit jiffies and very stable connections/disks,
+         * we may have a wrap around, which is catched by
+         *   !time_in_range(now, last_..._jif, last_..._jif + timeout).
+         *
+         * Side effect: once per 32bit wrap-around interval, which means every
+         * ~198 days with 250 HZ, we have a window where the timeout would need
+         * to expire twice (worst case) to become effective. Good enough.
+         */
+        if (ent && req->rq_state & RQ_NET_PENDING &&
+                 time_after(now, req->start_time + ent) &&
+                !time_in_range(now, mdev->last_reconnect_jif, mdev->last_reconnect_jif + ent)) {
+                dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
+                _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
        }
-        if (dt && req->rq_state & RQ_LOCAL_PENDING) {
+        if (dt && req->rq_state & RQ_LOCAL_PENDING &&
-                if (time_is_before_eq_jiffies(req->start_time + dt)) {
+                 time_after(now, req->start_time + dt) &&
-                        dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
+                !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
-                        __drbd_chk_io_error(mdev, 1);
+                dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
-                }
+                __drbd_chk_io_error(mdev, 1);
        }
-        nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et;
+        nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
        spin_unlock_irq(&mdev->req_lock);
        mod_timer(&mdev->request_timer, nt);
 }
author	Lars Ellenberg <lars.ellenberg@linbit.com>	2012-04-25 05:46:14 -0400
committer	Philipp Reisner <philipp.reisner@linbit.com>	2012-05-09 09:16:58 -0400
commit	ba280c092e6eca8a70c502e4510061535fdce382 (patch)
tree	70bf9476e9667ec5afec09f5a2c8f4b9cc91e75c /drivers/block/drbd
parent	5de738272e38f7051c7a44c42631b71a0e2a1e80 (diff)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f215ad430bb..302a6e786f7 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h
@@ -1049,6 +1049,8 @@ struct drbd_conf {
1049	struct crypto_hash *csums_tfm;	1049	struct crypto_hash *csums_tfm;
1050	struct crypto_hash *verify_tfm;	1050	struct crypto_hash *verify_tfm;
1051		1051
		1052	unsigned long last_reattach_jif;
		1053	unsigned long last_reconnect_jif;
1052	struct drbd_thread receiver;	1054	struct drbd_thread receiver;
1053	struct drbd_thread worker;	1055	struct drbd_thread worker;
1054	struct drbd_thread asender;	1056	struct drbd_thread asender;


diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index deccff3af77..ab501b23b50 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c
@@ -1326,6 +1326,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1326	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)	1326	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1327	drbd_resume_al(mdev);	1327	drbd_resume_al(mdev);
1328		1328
		1329	/* remember last connect and attach times so request_timer_fn() won't
		1330	* kill newly established sessions while we are still trying to thaw
		1331	* previously frozen IO */
		1332	if (os.conn != C_WF_REPORT_PARAMS && ns.conn == C_WF_REPORT_PARAMS)
		1333	mdev->last_reconnect_jif = jiffies;
		1334	if ((os.disk == D_ATTACHING \|\| os.disk == D_NEGOTIATING) &&
		1335	ns.disk > D_NEGOTIATING)
		1336	mdev->last_reattach_jif = jiffies;
		1337
1329	ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);	1338	ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
1330	if (ascw) {	1339	if (ascw) {
1331	ascw->os = os;	1340	ascw->os = os;


diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 340d57b9856..4a642ce62ba 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c
@@ -1200,43 +1200,65 @@ void request_timer_fn(unsigned long data)
1200	struct drbd_request req; / oldest request */	1200	struct drbd_request req; / oldest request */
1201	struct list_head *le;	1201	struct list_head *le;
1202	unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */	1202	unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
		1203	unsigned long now;
1203		1204
1204	if (get_net_conf(mdev)) {	1205	if (get_net_conf(mdev)) {
1205	ent = mdev->net_conf->timeoutHZ/10 mdev->net_conf->ko_count;	1206	if (mdev->state.conn >= C_WF_REPORT_PARAMS)
		1207	ent = mdev->net_conf->timeout*HZ/10
		1208	* mdev->net_conf->ko_count;
1206	put_net_conf(mdev);	1209	put_net_conf(mdev);
1207	}	1210	}
1208	if (get_ldev(mdev)) {	1211	if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */
1209	dt = mdev->ldev->dc.disk_timeout * HZ / 10;	1212	dt = mdev->ldev->dc.disk_timeout * HZ / 10;
1210	put_ldev(mdev);	1213	put_ldev(mdev);
1211	}	1214	}
1212	et = min_not_zero(dt, ent);	1215	et = min_not_zero(dt, ent);
1213		1216
1214	if (!et \|\| (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED))	1217	if (!et)
1215	return; /* Recurring timer stopped */	1218	return; /* Recurring timer stopped */
1216		1219
		1220	now = jiffies;
		1221
1217	spin_lock_irq(&mdev->req_lock);	1222	spin_lock_irq(&mdev->req_lock);
1218	le = &mdev->oldest_tle->requests;	1223	le = &mdev->oldest_tle->requests;
1219	if (list_empty(le)) {	1224	if (list_empty(le)) {
1220	spin_unlock_irq(&mdev->req_lock);	1225	spin_unlock_irq(&mdev->req_lock);
1221	mod_timer(&mdev->request_timer, jiffies + et);	1226	mod_timer(&mdev->request_timer, now + et);
1222	return;	1227	return;
1223	}	1228	}
1224		1229
1225	le = le->prev;	1230	le = le->prev;
1226	req = list_entry(le, struct drbd_request, tl_requests);	1231	req = list_entry(le, struct drbd_request, tl_requests);
1227	if (ent && req->rq_state & RQ_NET_PENDING) {	1232
1228	if (time_is_before_eq_jiffies(req->start_time + ent)) {	1233	/* The request is considered timed out, if
1229	dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");	1234	* - we have some effective timeout from the configuration,
1230	_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE \| CS_HARD, NULL);	1235	* with above state restrictions applied,
1231	}	1236	* - the oldest request is waiting for a response from the network
		1237	* resp. the local disk,
		1238	* - the oldest request is in fact older than the effective timeout,
		1239	* - the connection was established (resp. disk was attached)
		1240	* for longer than the timeout already.
		1241	* Note that for 32bit jiffies and very stable connections/disks,
		1242	* we may have a wrap around, which is catched by
		1243	* !time_in_range(now, last_..._jif, last_..._jif + timeout).
		1244	*
		1245	* Side effect: once per 32bit wrap-around interval, which means every
		1246	* ~198 days with 250 HZ, we have a window where the timeout would need
		1247	* to expire twice (worst case) to become effective. Good enough.
		1248	*/
		1249	if (ent && req->rq_state & RQ_NET_PENDING &&
		1250	time_after(now, req->start_time + ent) &&
		1251	!time_in_range(now, mdev->last_reconnect_jif, mdev->last_reconnect_jif + ent)) {
		1252	dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
		1253	_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE \| CS_HARD, NULL);
1232	}	1254	}
1233	if (dt && req->rq_state & RQ_LOCAL_PENDING) {	1255	if (dt && req->rq_state & RQ_LOCAL_PENDING &&
1234	if (time_is_before_eq_jiffies(req->start_time + dt)) {	1256	time_after(now, req->start_time + dt) &&
1235	dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");	1257	!time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
1236	__drbd_chk_io_error(mdev, 1);	1258	dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
1237	}	1259	__drbd_chk_io_error(mdev, 1);
1238	}	1260	}
1239	nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et;	1261	nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
1240	spin_unlock_irq(&mdev->req_lock);	1262	spin_unlock_irq(&mdev->req_lock);
1241	mod_timer(&mdev->request_timer, nt);	1263	mod_timer(&mdev->request_timer, nt);
1242	}	1264	}