aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2010-05-27 03:45:45 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-06-01 05:12:27 -0400
commitd255e5ff5fc6cc6c60dd014d1261448a7bbc8134 (patch)
treeaf933be5ddb81b1e47be2f91ff558c5c792f2f0f /drivers/block
parent32fa7e91f923d8b2578c42016ff3a94efc9968a2 (diff)
drbd: fix hang on local read errors while disconnected
"canceled" w_read_retry_remote never completed, if they have been canceled after drbd_disconnect connection teardown cleanup has already run (or we are currently not connected anyways). Fixed by not queueing a remote retry if we already know it won't work (pdsk not uptodate), and cleanup ourselves on "cancel", in case we hit a race with drbd_disconnect. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/drbd/drbd_req.c22
-rw-r--r--drivers/block/drbd/drbd_req.h1
-rw-r--r--drivers/block/drbd/drbd_worker.c6
3 files changed, 16 insertions, 13 deletions
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index e6c4d579eaba..8915644af722 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -452,20 +452,21 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
452 452
453 dev_alert(DEV, "Local READ failed sec=%llus size=%u\n", 453 dev_alert(DEV, "Local READ failed sec=%llus size=%u\n",
454 (unsigned long long)req->sector, req->size); 454 (unsigned long long)req->sector, req->size);
455 /* _req_mod(req,to_be_send); oops, recursion... */
456 D_ASSERT(!(req->rq_state & RQ_NET_MASK)); 455 D_ASSERT(!(req->rq_state & RQ_NET_MASK));
457 req->rq_state |= RQ_NET_PENDING;
458 inc_ap_pending(mdev);
459 456
460 __drbd_chk_io_error(mdev, FALSE); 457 __drbd_chk_io_error(mdev, FALSE);
461 put_ldev(mdev); 458 put_ldev(mdev);
462 /* NOTE: if we have no connection,
463 * or know the peer has no good data either,
464 * then we don't actually need to "queue_for_net_read",
465 * but we do so anyways, since the drbd_io_error()
466 * and the potential state change to "Diskless"
467 * needs to be done from process context */
468 459
460 /* no point in retrying if there is no good remote data,
461 * or we have no connection. */
462 if (mdev->state.pdsk != D_UP_TO_DATE) {
463 _req_may_be_done(req, m);
464 break;
465 }
466
467 /* _req_mod(req,to_be_send); oops, recursion... */
468 req->rq_state |= RQ_NET_PENDING;
469 inc_ap_pending(mdev);
469 /* fall through: _req_mod(req,queue_for_net_read); */ 470 /* fall through: _req_mod(req,queue_for_net_read); */
470 471
471 case queue_for_net_read: 472 case queue_for_net_read:
@@ -575,6 +576,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
575 _req_may_be_done(req, m); 576 _req_may_be_done(req, m);
576 break; 577 break;
577 578
579 case read_retry_remote_canceled:
580 req->rq_state &= ~RQ_NET_QUEUED;
581 /* fall through, in case we raced with drbd_disconnect */
578 case connection_lost_while_pending: 582 case connection_lost_while_pending:
579 /* transfer log cleanup after connection loss */ 583 /* transfer log cleanup after connection loss */
580 /* assert something? */ 584 /* assert something? */
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 16119d7056cc..02d575d24518 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -91,6 +91,7 @@ enum drbd_req_event {
91 send_failed, 91 send_failed,
92 handed_over_to_network, 92 handed_over_to_network,
93 connection_lost_while_pending, 93 connection_lost_while_pending,
94 read_retry_remote_canceled,
94 recv_acked_by_peer, 95 recv_acked_by_peer,
95 write_acked_by_peer, 96 write_acked_by_peer,
96 write_acked_by_peer_and_sis, /* and set_in_sync */ 97 write_acked_by_peer_and_sis, /* and set_in_sync */
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index a12b447bafbd..67371fcbc5aa 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -266,10 +266,8 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
266 * to give the disk the chance to relocate that block */ 266 * to give the disk the chance to relocate that block */
267 267
268 spin_lock_irq(&mdev->req_lock); 268 spin_lock_irq(&mdev->req_lock);
269 if (cancel || 269 if (cancel || mdev->state.pdsk != D_UP_TO_DATE) {
270 mdev->state.conn < C_CONNECTED || 270 _req_mod(req, read_retry_remote_canceled);
271 mdev->state.pdsk <= D_INCONSISTENT) {
272 _req_mod(req, send_canceled);
273 spin_unlock_irq(&mdev->req_lock); 271 spin_unlock_irq(&mdev->req_lock);
274 dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n"); 272 dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n");
275 return 1; 273 return 1;