drbd: allow read requests to be retried after force-detach

Sometimes, a lower level block device turns into a tar-pit, not completing requests at all, not even doing error completion. We can force-detach from such a tar-pit block device, either by disk-timeout, or by drbdadm detach --force. Queueing for retry only from the request destruction path (kref hit 0) makes it impossible to retry affected read requests from the peer, until the local IO completion happened, as the locally submitted bio holds a reference on the drbd request object. If we can only complete READs when the local completion finally happens, we would not need to force-detach in the first place. Instead, queue for retry where we otherwise had done the error completion. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
author: Lars Ellenberg <lars.ellenberg@linbit.com> 2012-07-24 04:12:36 -0400
committer: Philipp Reisner <philipp.reisner@linbit.com> 2012-11-08 10:58:37 -0500
commit: 9a278a7906066a1b4f37fff9b5e27a92af0ca3ce (patch)
tree: 8dd98edd53bc8e0c17f2681e4418b96ffb7af736 /drivers/block/drbd/drbd_main.c
parent: 934722a2dbf87b43d39c787441e511157d5add94 (diff)
1 files changed, 18 insertions, 5 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index d07cb31a36ea..c0acd86c8415 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2216,12 +2216,25 @@ static void do_retry(struct work_struct *ws)
                struct drbd_conf *mdev = req->w.mdev;
                struct bio *bio = req->master_bio;
                unsigned long start_time = req->start_time;
+                bool expected;
-                /* We have exclusive access to this request object.
-                 * If it had not been RQ_POSTPONED, the code path which queued
+                expected = 
-                 * it here would have completed and freed it already.
+                        expect(atomic_read(&req->completion_ref) == 0) &&
+                        expect(req->rq_state & RQ_POSTPONED) &&
+                        expect((req->rq_state & RQ_LOCAL_PENDING) == 0 ||
+                                (req->rq_state & RQ_LOCAL_ABORTED) != 0);
+                if (!expected)
+                        dev_err(DEV, "req=%p completion_ref=%d rq_state=%x\n",
+                                req, atomic_read(&req->completion_ref),
+                                req->rq_state);
+                /* We still need to put one kref associated with the
+                 * "completion_ref" going zero in the code path that queued it
+                 * here.  The request object may still be referenced by a
+                 * frozen local req->private_bio, in case we force-detached.
                 */
-                mempool_free(req, drbd_request_mempool);
+                kref_put(&req->kref, drbd_req_destroy);
                /* A single suspended or otherwise blocking device may stall
                 * all others as well.  Fortunately, this code path is to
author	Lars Ellenberg <lars.ellenberg@linbit.com>	2012-07-24 04:12:36 -0400
committer	Philipp Reisner <philipp.reisner@linbit.com>	2012-11-08 10:58:37 -0500
commit	9a278a7906066a1b4f37fff9b5e27a92af0ca3ce (patch)
tree	8dd98edd53bc8e0c17f2681e4418b96ffb7af736 /drivers/block/drbd/drbd_main.c
parent	934722a2dbf87b43d39c787441e511157d5add94 (diff)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d07cb31a36ea..c0acd86c8415 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c
@@ -2216,12 +2216,25 @@ static void do_retry(struct work_struct *ws)
2216	struct drbd_conf *mdev = req->w.mdev;	2216	struct drbd_conf *mdev = req->w.mdev;
2217	struct bio *bio = req->master_bio;	2217	struct bio *bio = req->master_bio;
2218	unsigned long start_time = req->start_time;	2218	unsigned long start_time = req->start_time;
2219		2219	bool expected;
2220	/* We have exclusive access to this request object.	2220
2221	* If it had not been RQ_POSTPONED, the code path which queued	2221	expected =
2222	* it here would have completed and freed it already.	2222	expect(atomic_read(&req->completion_ref) == 0) &&
		2223	expect(req->rq_state & RQ_POSTPONED) &&
		2224	expect((req->rq_state & RQ_LOCAL_PENDING) == 0 \|\|
		2225	(req->rq_state & RQ_LOCAL_ABORTED) != 0);
		2226
		2227	if (!expected)
		2228	dev_err(DEV, "req=%p completion_ref=%d rq_state=%x\n",
		2229	req, atomic_read(&req->completion_ref),
		2230	req->rq_state);
		2231
		2232	/* We still need to put one kref associated with the
		2233	* "completion_ref" going zero in the code path that queued it
		2234	* here. The request object may still be referenced by a
		2235	* frozen local req->private_bio, in case we force-detached.
2223	*/	2236	*/
2224	mempool_free(req, drbd_request_mempool);	2237	kref_put(&req->kref, drbd_req_destroy);
2225		2238
2226	/* A single suspended or otherwise blocking device may stall	2239	/* A single suspended or otherwise blocking device may stall
2227	* all others as well. Fortunately, this code path is to	2240	* all others as well. Fortunately, this code path is to