aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2012-07-24 04:12:36 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-11-08 10:58:37 -0500
commit9a278a7906066a1b4f37fff9b5e27a92af0ca3ce (patch)
tree8dd98edd53bc8e0c17f2681e4418b96ffb7af736
parent934722a2dbf87b43d39c787441e511157d5add94 (diff)
drbd: allow read requests to be retried after force-detach
Sometimes, a lower level block device turns into a tar-pit, not completing requests at all, not even doing error completion. We can force-detach from such a tar-pit block device, either by disk-timeout, or by drbdadm detach --force. Queueing for retry only from the request destruction path (kref hit 0) makes it impossible to retry affected read requests from the peer, until the local IO completion happened, as the locally submitted bio holds a reference on the drbd request object. If we can only complete READs when the local completion finally happens, we would not need to force-detach in the first place. Instead, queue for retry where we otherwise had done the error completion. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r--drivers/block/drbd/drbd_main.c23
-rw-r--r--drivers/block/drbd/drbd_req.c19
-rw-r--r--drivers/block/drbd/drbd_req.h1
3 files changed, 29 insertions, 14 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index d07cb31a36ea..c0acd86c8415 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2216,12 +2216,25 @@ static void do_retry(struct work_struct *ws)
2216 struct drbd_conf *mdev = req->w.mdev; 2216 struct drbd_conf *mdev = req->w.mdev;
2217 struct bio *bio = req->master_bio; 2217 struct bio *bio = req->master_bio;
2218 unsigned long start_time = req->start_time; 2218 unsigned long start_time = req->start_time;
2219 2219 bool expected;
2220 /* We have exclusive access to this request object. 2220
2221 * If it had not been RQ_POSTPONED, the code path which queued 2221 expected =
2222 * it here would have completed and freed it already. 2222 expect(atomic_read(&req->completion_ref) == 0) &&
2223 expect(req->rq_state & RQ_POSTPONED) &&
2224 expect((req->rq_state & RQ_LOCAL_PENDING) == 0 ||
2225 (req->rq_state & RQ_LOCAL_ABORTED) != 0);
2226
2227 if (!expected)
2228 dev_err(DEV, "req=%p completion_ref=%d rq_state=%x\n",
2229 req, atomic_read(&req->completion_ref),
2230 req->rq_state);
2231
2232 /* We still need to put one kref associated with the
2233 * "completion_ref" going zero in the code path that queued it
2234 * here. The request object may still be referenced by a
2235 * frozen local req->private_bio, in case we force-detached.
2223 */ 2236 */
2224 mempool_free(req, drbd_request_mempool); 2237 kref_put(&req->kref, drbd_req_destroy);
2225 2238
2226 /* A single suspended or otherwise blocking device may stall 2239 /* A single suspended or otherwise blocking device may stall
2227 * all others as well. Fortunately, this code path is to 2240 * all others as well. Fortunately, this code path is to
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index f2ba43e78377..c45479aaff8e 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -92,7 +92,7 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
92 return req; 92 return req;
93} 93}
94 94
95static void drbd_req_destroy(struct kref *kref) 95void drbd_req_destroy(struct kref *kref)
96{ 96{
97 struct drbd_request *req = container_of(kref, struct drbd_request, kref); 97 struct drbd_request *req = container_of(kref, struct drbd_request, kref);
98 struct drbd_conf *mdev = req->w.mdev; 98 struct drbd_conf *mdev = req->w.mdev;
@@ -152,10 +152,7 @@ static void drbd_req_destroy(struct kref *kref)
152 } 152 }
153 } 153 }
154 154
155 if (s & RQ_POSTPONED) 155 mempool_free(req, drbd_request_mempool);
156 drbd_restart_request(req);
157 else
158 mempool_free(req, drbd_request_mempool);
159} 156}
160 157
161static void wake_all_senders(struct drbd_tconn *tconn) { 158static void wake_all_senders(struct drbd_tconn *tconn) {
@@ -292,10 +289,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
292 m->error = ok ? 0 : (error ?: -EIO); 289 m->error = ok ? 0 : (error ?: -EIO);
293 m->bio = req->master_bio; 290 m->bio = req->master_bio;
294 req->master_bio = NULL; 291 req->master_bio = NULL;
295 } else {
296 /* Assert that this will be drbd_req_destroy()ed
297 * with this very invokation. */
298 D_ASSERT(atomic_read(&req->kref.refcount) == 1);
299 } 292 }
300} 293}
301 294
@@ -320,6 +313,14 @@ static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_
320 313
321 /* else */ 314 /* else */
322 drbd_req_complete(req, m); 315 drbd_req_complete(req, m);
316
317 if (req->rq_state & RQ_POSTPONED) {
318 /* don't destroy the req object just yet,
319 * but queue it for retry */
320 drbd_restart_request(req);
321 return 0;
322 }
323
323 return 1; 324 return 1;
324} 325}
325 326
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 90e5a1eea727..9611713c3580 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -267,6 +267,7 @@ struct bio_and_error {
267 int error; 267 int error;
268}; 268};
269 269
270extern void drbd_req_destroy(struct kref *kref);
270extern void _req_may_be_done(struct drbd_request *req, 271extern void _req_may_be_done(struct drbd_request *req,
271 struct bio_and_error *m); 272 struct bio_and_error *m);
272extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, 273extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,