diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2010-05-27 03:45:45 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-06-01 05:12:27 -0400 |
commit | d255e5ff5fc6cc6c60dd014d1261448a7bbc8134 (patch) | |
tree | af933be5ddb81b1e47be2f91ff558c5c792f2f0f /drivers/block | |
parent | 32fa7e91f923d8b2578c42016ff3a94efc9968a2 (diff) |
drbd: fix hang on local read errors while disconnected
"canceled" w_read_retry_remote never completed, if they have been
canceled after drbd_disconnect connection teardown cleanup has already
run (or we are currently not connected anyways).
Fixed by not queueing a remote retry if we already know it won't work
(pdsk not uptodate), and cleanup ourselves on "cancel", in case we hit a
race with drbd_disconnect.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 22 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 6 |
3 files changed, 16 insertions, 13 deletions
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e6c4d579eaba..8915644af722 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -452,20 +452,21 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
452 | 452 | ||
453 | dev_alert(DEV, "Local READ failed sec=%llus size=%u\n", | 453 | dev_alert(DEV, "Local READ failed sec=%llus size=%u\n", |
454 | (unsigned long long)req->sector, req->size); | 454 | (unsigned long long)req->sector, req->size); |
455 | /* _req_mod(req,to_be_send); oops, recursion... */ | ||
456 | D_ASSERT(!(req->rq_state & RQ_NET_MASK)); | 455 | D_ASSERT(!(req->rq_state & RQ_NET_MASK)); |
457 | req->rq_state |= RQ_NET_PENDING; | ||
458 | inc_ap_pending(mdev); | ||
459 | 456 | ||
460 | __drbd_chk_io_error(mdev, FALSE); | 457 | __drbd_chk_io_error(mdev, FALSE); |
461 | put_ldev(mdev); | 458 | put_ldev(mdev); |
462 | /* NOTE: if we have no connection, | ||
463 | * or know the peer has no good data either, | ||
464 | * then we don't actually need to "queue_for_net_read", | ||
465 | * but we do so anyways, since the drbd_io_error() | ||
466 | * and the potential state change to "Diskless" | ||
467 | * needs to be done from process context */ | ||
468 | 459 | ||
460 | /* no point in retrying if there is no good remote data, | ||
461 | * or we have no connection. */ | ||
462 | if (mdev->state.pdsk != D_UP_TO_DATE) { | ||
463 | _req_may_be_done(req, m); | ||
464 | break; | ||
465 | } | ||
466 | |||
467 | /* _req_mod(req,to_be_send); oops, recursion... */ | ||
468 | req->rq_state |= RQ_NET_PENDING; | ||
469 | inc_ap_pending(mdev); | ||
469 | /* fall through: _req_mod(req,queue_for_net_read); */ | 470 | /* fall through: _req_mod(req,queue_for_net_read); */ |
470 | 471 | ||
471 | case queue_for_net_read: | 472 | case queue_for_net_read: |
@@ -575,6 +576,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
575 | _req_may_be_done(req, m); | 576 | _req_may_be_done(req, m); |
576 | break; | 577 | break; |
577 | 578 | ||
579 | case read_retry_remote_canceled: | ||
580 | req->rq_state &= ~RQ_NET_QUEUED; | ||
581 | /* fall through, in case we raced with drbd_disconnect */ | ||
578 | case connection_lost_while_pending: | 582 | case connection_lost_while_pending: |
579 | /* transfer log cleanup after connection loss */ | 583 | /* transfer log cleanup after connection loss */ |
580 | /* assert something? */ | 584 | /* assert something? */ |
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 16119d7056cc..02d575d24518 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h | |||
@@ -91,6 +91,7 @@ enum drbd_req_event { | |||
91 | send_failed, | 91 | send_failed, |
92 | handed_over_to_network, | 92 | handed_over_to_network, |
93 | connection_lost_while_pending, | 93 | connection_lost_while_pending, |
94 | read_retry_remote_canceled, | ||
94 | recv_acked_by_peer, | 95 | recv_acked_by_peer, |
95 | write_acked_by_peer, | 96 | write_acked_by_peer, |
96 | write_acked_by_peer_and_sis, /* and set_in_sync */ | 97 | write_acked_by_peer_and_sis, /* and set_in_sync */ |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a12b447bafbd..67371fcbc5aa 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -266,10 +266,8 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
266 | * to give the disk the chance to relocate that block */ | 266 | * to give the disk the chance to relocate that block */ |
267 | 267 | ||
268 | spin_lock_irq(&mdev->req_lock); | 268 | spin_lock_irq(&mdev->req_lock); |
269 | if (cancel || | 269 | if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { |
270 | mdev->state.conn < C_CONNECTED || | 270 | _req_mod(req, read_retry_remote_canceled); |
271 | mdev->state.pdsk <= D_INCONSISTENT) { | ||
272 | _req_mod(req, send_canceled); | ||
273 | spin_unlock_irq(&mdev->req_lock); | 271 | spin_unlock_irq(&mdev->req_lock); |
274 | dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n"); | 272 | dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n"); |
275 | return 1; | 273 | return 1; |