diff options
author | Philipp Reisner <philipp.reisner@linbit.com> | 2011-12-14 12:01:21 -0500 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2012-05-09 09:16:30 -0400 |
commit | 7caacb69ac468ea713e8e8ba77be8040d8fe7bbe (patch) | |
tree | a2cfa89a0b77fe22160ca51767c37269c677b263 /drivers/block | |
parent | 4afc433cf8066c112bd2bdd949d78ff8e8b4ba3f (diff) |
drbd: Consider the disk-timeout also for meta-data IO operations
If the backing device is already frozen during attach, we failed
to recognize that. The current disk-timeout code works on top
of the drbd_request objects. During attach we do not allow IO
and therefore never generate a drbd_request object but block
before that in drbd_make_request().
This patch adds the timeout to all drbd_md_sync_page_io().
Before this patch we used to go from D_ATTACHING directly
to D_DISKLESS if IO failed during attach. We can no longer
do this since we have to stay in D_FAILED until all IO
ops issued to the backing device returned.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 13 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 4 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 3 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 64 |
4 files changed, 45 insertions, 39 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 601ad9ef0437..08bd7c1b36e1 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -88,9 +88,16 @@ static bool md_io_allowed(struct drbd_conf *mdev) | |||
88 | return ds >= D_NEGOTIATING || ds == D_ATTACHING; | 88 | return ds >= D_NEGOTIATING || ds == D_ATTACHING; |
89 | } | 89 | } |
90 | 90 | ||
91 | void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done) | 91 | void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, |
92 | unsigned int *done) | ||
92 | { | 93 | { |
93 | wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev)); | 94 | long dt = bdev->dc.disk_timeout * HZ / 10; |
95 | if (dt == 0) | ||
96 | dt = MAX_SCHEDULE_TIMEOUT; | ||
97 | |||
98 | dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt); | ||
99 | if (dt == 0) | ||
100 | dev_err(DEV, "meta-data IO operation timed out\n"); | ||
94 | } | 101 | } |
95 | 102 | ||
96 | static int _drbd_md_sync_page_io(struct drbd_conf *mdev, | 103 | static int _drbd_md_sync_page_io(struct drbd_conf *mdev, |
@@ -130,7 +137,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, | |||
130 | bio_endio(bio, -EIO); | 137 | bio_endio(bio, -EIO); |
131 | else | 138 | else |
132 | submit_bio(rw, bio); | 139 | submit_bio(rw, bio); |
133 | wait_until_done_or_disk_failure(mdev, &mdev->md_io.done); | 140 | wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done); |
134 | ok = bio_flagged(bio, BIO_UPTODATE) && mdev->md_io.error == 0; | 141 | ok = bio_flagged(bio, BIO_UPTODATE) && mdev->md_io.error == 0; |
135 | 142 | ||
136 | out: | 143 | out: |
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 9611db43cc7a..49603bc67fe4 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -1088,7 +1088,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id | |||
1088 | * "in_flight reached zero, all done" event. | 1088 | * "in_flight reached zero, all done" event. |
1089 | */ | 1089 | */ |
1090 | if (!atomic_dec_and_test(&ctx->in_flight)) | 1090 | if (!atomic_dec_and_test(&ctx->in_flight)) |
1091 | wait_until_done_or_disk_failure(mdev, &ctx->done); | 1091 | wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); |
1092 | else | 1092 | else |
1093 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | 1093 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); |
1094 | 1094 | ||
@@ -1195,7 +1195,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc | |||
1195 | } | 1195 | } |
1196 | 1196 | ||
1197 | bm_page_io_async(ctx, idx, WRITE_SYNC); | 1197 | bm_page_io_async(ctx, idx, WRITE_SYNC); |
1198 | wait_until_done_or_disk_failure(mdev, &ctx->done); | 1198 | wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); |
1199 | 1199 | ||
1200 | if (ctx->error) | 1200 | if (ctx->error) |
1201 | drbd_chk_io_error(mdev, 1, true); | 1201 | drbd_chk_io_error(mdev, 1, true); |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 31dee20f3411..fe5797f73d88 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -1541,7 +1541,8 @@ extern void *drbd_md_get_buffer(struct drbd_conf *mdev); | |||
1541 | extern void drbd_md_put_buffer(struct drbd_conf *mdev); | 1541 | extern void drbd_md_put_buffer(struct drbd_conf *mdev); |
1542 | extern int drbd_md_sync_page_io(struct drbd_conf *mdev, | 1542 | extern int drbd_md_sync_page_io(struct drbd_conf *mdev, |
1543 | struct drbd_backing_dev *bdev, sector_t sector, int rw); | 1543 | struct drbd_backing_dev *bdev, sector_t sector, int rw); |
1544 | extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done); | 1544 | extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, |
1545 | unsigned int *done); | ||
1545 | extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); | 1546 | extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); |
1546 | extern void drbd_rs_controller_reset(struct drbd_conf *mdev); | 1547 | extern void drbd_rs_controller_reset(struct drbd_conf *mdev); |
1547 | 1548 | ||
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b2c0e5f0d52c..8ca8925520ad 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -916,11 +916,6 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state | |||
916 | if (ns.disk == D_FAILED && os.disk == D_DISKLESS) | 916 | if (ns.disk == D_FAILED && os.disk == D_DISKLESS) |
917 | ns.disk = D_DISKLESS; | 917 | ns.disk = D_DISKLESS; |
918 | 918 | ||
919 | /* if we are only D_ATTACHING yet, | ||
920 | * we can (and should) go directly to D_DISKLESS. */ | ||
921 | if (ns.disk == D_FAILED && os.disk == D_ATTACHING) | ||
922 | ns.disk = D_DISKLESS; | ||
923 | |||
924 | /* After C_DISCONNECTING only C_STANDALONE may follow */ | 919 | /* After C_DISCONNECTING only C_STANDALONE may follow */ |
925 | if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) | 920 | if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) |
926 | ns.conn = os.conn; | 921 | ns.conn = os.conn; |
@@ -1592,35 +1587,38 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1592 | /* first half of local IO error, failure to attach, | 1587 | /* first half of local IO error, failure to attach, |
1593 | * or administrative detach */ | 1588 | * or administrative detach */ |
1594 | if (os.disk != D_FAILED && ns.disk == D_FAILED) { | 1589 | if (os.disk != D_FAILED && ns.disk == D_FAILED) { |
1595 | enum drbd_io_error_p eh; | 1590 | enum drbd_io_error_p eh = EP_PASS_ON; |
1596 | int was_io_error; | 1591 | int was_io_error = 0; |
1597 | /* corresponding get_ldev was in __drbd_set_state, to serialize | 1592 | /* corresponding get_ldev was in __drbd_set_state, to serialize |
1598 | * our cleanup here with the transition to D_DISKLESS, | 1593 | * our cleanup here with the transition to D_DISKLESS. |
1599 | * so it is safe to dreference ldev here. */ | 1594 | * But is is still not save to dreference ldev here, since |
1600 | eh = mdev->ldev->dc.on_io_error; | 1595 | * we might come from an failed Attach before ldev was set. */ |
1601 | was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); | 1596 | if (mdev->ldev) { |
1602 | 1597 | eh = mdev->ldev->dc.on_io_error; | |
1603 | /* Immediately allow completion of all application IO, that waits | 1598 | was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); |
1604 | for completion from the local disk. */ | 1599 | |
1605 | tl_abort_disk_io(mdev); | 1600 | /* Immediately allow completion of all application IO, that waits |
1606 | 1601 | for completion from the local disk. */ | |
1607 | /* current state still has to be D_FAILED, | 1602 | tl_abort_disk_io(mdev); |
1608 | * there is only one way out: to D_DISKLESS, | 1603 | |
1609 | * and that may only happen after our put_ldev below. */ | 1604 | /* current state still has to be D_FAILED, |
1610 | if (mdev->state.disk != D_FAILED) | 1605 | * there is only one way out: to D_DISKLESS, |
1611 | dev_err(DEV, | 1606 | * and that may only happen after our put_ldev below. */ |
1612 | "ASSERT FAILED: disk is %s during detach\n", | 1607 | if (mdev->state.disk != D_FAILED) |
1613 | drbd_disk_str(mdev->state.disk)); | 1608 | dev_err(DEV, |
1614 | 1609 | "ASSERT FAILED: disk is %s during detach\n", | |
1615 | if (ns.conn >= C_CONNECTED) | 1610 | drbd_disk_str(mdev->state.disk)); |
1616 | drbd_send_state(mdev, ns); | 1611 | |
1617 | 1612 | if (ns.conn >= C_CONNECTED) | |
1618 | drbd_rs_cancel_all(mdev); | 1613 | drbd_send_state(mdev, ns); |
1619 | 1614 | ||
1620 | /* In case we want to get something to stable storage still, | 1615 | drbd_rs_cancel_all(mdev); |
1621 | * this may be the last chance. | 1616 | |
1622 | * Following put_ldev may transition to D_DISKLESS. */ | 1617 | /* In case we want to get something to stable storage still, |
1623 | drbd_md_sync(mdev); | 1618 | * this may be the last chance. |
1619 | * Following put_ldev may transition to D_DISKLESS. */ | ||
1620 | drbd_md_sync(mdev); | ||
1621 | } | ||
1624 | put_ldev(mdev); | 1622 | put_ldev(mdev); |
1625 | 1623 | ||
1626 | if (was_io_error && eh == EP_CALL_HELPER) | 1624 | if (was_io_error && eh == EP_CALL_HELPER) |