diff options
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 39 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 20 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 9 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 4 |
5 files changed, 63 insertions, 11 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index eeab868f056b..32a9ab67a5f6 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -682,7 +682,8 @@ enum { | |||
682 | once no more io in flight, start bitmap io */ | 682 | once no more io in flight, start bitmap io */ |
683 | BITMAP_IO_QUEUED, /* Started bitmap IO */ | 683 | BITMAP_IO_QUEUED, /* Started bitmap IO */ |
684 | GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ | 684 | GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ |
685 | WAS_IO_ERROR, /* Local disk failed returned IO error */ | 685 | WAS_IO_ERROR, /* Local disk failed, returned IO error */ |
686 | WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */ | ||
686 | FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ | 687 | FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ |
687 | RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ | 688 | RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ |
688 | RESIZE_PENDING, /* Size change detected locally, waiting for the response from | 689 | RESIZE_PENDING, /* Size change detected locally, waiting for the response from |
@@ -1142,6 +1143,9 @@ extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, | |||
1142 | extern int drbd_bitmap_io(struct drbd_conf *mdev, | 1143 | extern int drbd_bitmap_io(struct drbd_conf *mdev, |
1143 | int (*io_fn)(struct drbd_conf *), | 1144 | int (*io_fn)(struct drbd_conf *), |
1144 | char *why, enum bm_flag flags); | 1145 | char *why, enum bm_flag flags); |
1146 | extern int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, | ||
1147 | int (*io_fn)(struct drbd_conf *), | ||
1148 | char *why, enum bm_flag flags); | ||
1145 | extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); | 1149 | extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); |
1146 | extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); | 1150 | extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); |
1147 | extern void drbd_go_diskless(struct drbd_conf *mdev); | 1151 | extern void drbd_go_diskless(struct drbd_conf *mdev); |
@@ -1661,14 +1665,15 @@ static inline union drbd_state drbd_read_state(struct drbd_conf *mdev) | |||
1661 | } | 1665 | } |
1662 | 1666 | ||
1663 | enum drbd_force_detach_flags { | 1667 | enum drbd_force_detach_flags { |
1664 | DRBD_IO_ERROR, | 1668 | DRBD_READ_ERROR, |
1669 | DRBD_WRITE_ERROR, | ||
1665 | DRBD_META_IO_ERROR, | 1670 | DRBD_META_IO_ERROR, |
1666 | DRBD_FORCE_DETACH, | 1671 | DRBD_FORCE_DETACH, |
1667 | }; | 1672 | }; |
1668 | 1673 | ||
1669 | #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) | 1674 | #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) |
1670 | static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, | 1675 | static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, |
1671 | enum drbd_force_detach_flags forcedetach, | 1676 | enum drbd_force_detach_flags df, |
1672 | const char *where) | 1677 | const char *where) |
1673 | { | 1678 | { |
1674 | enum drbd_io_error_p ep; | 1679 | enum drbd_io_error_p ep; |
@@ -1678,18 +1683,40 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, | |||
1678 | rcu_read_unlock(); | 1683 | rcu_read_unlock(); |
1679 | switch (ep) { | 1684 | switch (ep) { |
1680 | case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */ | 1685 | case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */ |
1681 | if (forcedetach == DRBD_IO_ERROR) { | 1686 | if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) { |
1682 | if (__ratelimit(&drbd_ratelimit_state)) | 1687 | if (__ratelimit(&drbd_ratelimit_state)) |
1683 | dev_err(DEV, "Local IO failed in %s.\n", where); | 1688 | dev_err(DEV, "Local IO failed in %s.\n", where); |
1684 | if (mdev->state.disk > D_INCONSISTENT) | 1689 | if (mdev->state.disk > D_INCONSISTENT) |
1685 | _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL); | 1690 | _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL); |
1686 | break; | 1691 | break; |
1687 | } | 1692 | } |
1688 | /* NOTE fall through to detach case if forcedetach set */ | 1693 | /* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */ |
1689 | case EP_DETACH: | 1694 | case EP_DETACH: |
1690 | case EP_CALL_HELPER: | 1695 | case EP_CALL_HELPER: |
1696 | /* Remember whether we saw a READ or WRITE error. | ||
1697 | * | ||
1698 | * Recovery of the affected area for WRITE failure is covered | ||
1699 | * by the activity log. | ||
1700 | * READ errors may fall outside that area though. Certain READ | ||
1701 | * errors can be "healed" by writing good data to the affected | ||
1702 | * blocks, which triggers block re-allocation in lower layers. | ||
1703 | * | ||
1704 | * If we can not write the bitmap after a READ error, | ||
1705 | * we may need to trigger a full sync (see w_go_diskless()). | ||
1706 | * | ||
1707 | * Force-detach is not really an IO error, but rather a | ||
1708 | * desperate measure to try to deal with a completely | ||
1709 | * unresponsive lower level IO stack. | ||
1710 | * Still it should be treated as a WRITE error. | ||
1711 | * | ||
1712 | * Meta IO error is always WRITE error: | ||
1713 | * we read meta data only once during attach, | ||
1714 | * which will fail in case of errors. | ||
1715 | */ | ||
1691 | set_bit(WAS_IO_ERROR, &mdev->flags); | 1716 | set_bit(WAS_IO_ERROR, &mdev->flags); |
1692 | if (forcedetach == DRBD_FORCE_DETACH) | 1717 | if (df == DRBD_READ_ERROR) |
1718 | set_bit(WAS_READ_ERROR, &mdev->flags); | ||
1719 | if (df == DRBD_FORCE_DETACH) | ||
1693 | set_bit(FORCE_DETACH, &mdev->flags); | 1720 | set_bit(FORCE_DETACH, &mdev->flags); |
1694 | if (mdev->state.disk > D_FAILED) { | 1721 | if (mdev->state.disk > D_FAILED) { |
1695 | _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); | 1722 | _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5e5a6abb2819..0f73e157dee0 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -3226,6 +3226,26 @@ static int w_go_diskless(struct drbd_work *w, int unused) | |||
3226 | * inc/dec it frequently. Once we are D_DISKLESS, no one will touch | 3226 | * inc/dec it frequently. Once we are D_DISKLESS, no one will touch |
3227 | * the protected members anymore, though, so once put_ldev reaches zero | 3227 | * the protected members anymore, though, so once put_ldev reaches zero |
3228 | * again, it will be safe to free them. */ | 3228 | * again, it will be safe to free them. */ |
3229 | |||
3230 | /* Try to write changed bitmap pages, read errors may have just | ||
3231 | * set some bits outside the area covered by the activity log. | ||
3232 | * | ||
3233 | * If we have an IO error during the bitmap writeout, | ||
3234 | * we will want a full sync next time, just in case. | ||
3235 | * (Do we want a specific meta data flag for this?) | ||
3236 | * | ||
3237 | * If that does not make it to stable storage either, | ||
3238 | * we cannot do anything about that anymore. */ | ||
3239 | if (mdev->bitmap) { | ||
3240 | if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write, | ||
3241 | "detach", BM_LOCKED_MASK)) { | ||
3242 | if (test_bit(WAS_READ_ERROR, &mdev->flags)) { | ||
3243 | drbd_md_set_flag(mdev, MDF_FULL_SYNC); | ||
3244 | drbd_md_sync(mdev); | ||
3245 | } | ||
3246 | } | ||
3247 | } | ||
3248 | |||
3229 | drbd_force_state(mdev, NS(disk, D_DISKLESS)); | 3249 | drbd_force_state(mdev, NS(disk, D_DISKLESS)); |
3230 | return 0; | 3250 | return 0; |
3231 | } | 3251 | } |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 476491ffdabc..52258867222a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -1294,6 +1294,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1294 | 1294 | ||
1295 | /* make sure there is no leftover from previous force-detach attempts */ | 1295 | /* make sure there is no leftover from previous force-detach attempts */ |
1296 | clear_bit(FORCE_DETACH, &mdev->flags); | 1296 | clear_bit(FORCE_DETACH, &mdev->flags); |
1297 | clear_bit(WAS_IO_ERROR, &mdev->flags); | ||
1298 | clear_bit(WAS_READ_ERROR, &mdev->flags); | ||
1297 | 1299 | ||
1298 | /* and no leftover from previously aborted resync or verify, either */ | 1300 | /* and no leftover from previously aborted resync or verify, either */ |
1299 | mdev->rs_total = 0; | 1301 | mdev->rs_total = 0; |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e307890e6afe..97a9e69dd239 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -492,11 +492,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
492 | mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED); | 492 | mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED); |
493 | break; | 493 | break; |
494 | 494 | ||
495 | case WRITE_COMPLETED_WITH_ERROR: | ||
496 | __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); | ||
497 | mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); | ||
498 | break; | ||
499 | |||
495 | case READ_COMPLETED_WITH_ERROR: | 500 | case READ_COMPLETED_WITH_ERROR: |
496 | drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); | 501 | drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); |
497 | /* fall through. */ | 502 | __drbd_chk_io_error(mdev, DRBD_READ_ERROR); |
498 | case WRITE_COMPLETED_WITH_ERROR: | ||
499 | __drbd_chk_io_error(mdev, DRBD_IO_ERROR); | ||
500 | /* fall through. */ | 503 | /* fall through. */ |
501 | case READ_AHEAD_COMPLETED_WITH_ERROR: | 504 | case READ_AHEAD_COMPLETED_WITH_ERROR: |
502 | /* it is legal to fail READA, no __drbd_chk_io_error in that case. */ | 505 | /* it is legal to fail READA, no __drbd_chk_io_error in that case. */ |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d348260301f1..64a7305c678a 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -106,7 +106,7 @@ void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(lo | |||
106 | if (list_empty(&mdev->read_ee)) | 106 | if (list_empty(&mdev->read_ee)) |
107 | wake_up(&mdev->ee_wait); | 107 | wake_up(&mdev->ee_wait); |
108 | if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) | 108 | if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) |
109 | __drbd_chk_io_error(mdev, DRBD_IO_ERROR); | 109 | __drbd_chk_io_error(mdev, DRBD_READ_ERROR); |
110 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | 110 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); |
111 | 111 | ||
112 | drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w); | 112 | drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w); |
@@ -147,7 +147,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel | |||
147 | do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); | 147 | do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); |
148 | 148 | ||
149 | if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) | 149 | if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) |
150 | __drbd_chk_io_error(mdev, DRBD_IO_ERROR); | 150 | __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); |
151 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | 151 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); |
152 | 152 | ||
153 | if (block_id == ID_SYNCER) | 153 | if (block_id == ID_SYNCER) |