aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2012-09-27 09:18:21 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-11-09 08:11:41 -0500
commitedc9f5eb7afa3d832f540fcfe10e3e1087e6f527 (patch)
treeeba63d771575a42a6aa81bd55a59f7d6253d18ea /drivers/block/drbd
parente34b677d09ce375a87acd0360537cbed33881b0c (diff)
drbd: always write bitmap on detach
If we detach due to local read-error (which sets a bit in the bitmap), stay Primary, and then re-attach (which re-reads the bitmap from disk), we potentially lost the "out-of-sync" (or, "bad block") information in the bitmap. Always (try to) write out the changed bitmap pages before going diskless. That way, we don't lose the bit for the bad block, the next resync will fetch it from the peer, and rewrite it locally, which may result in block reallocation in some lower layer (or the hardware), and thereby "heal" the bad blocks. If the bitmap writeout errors out as well, we will (again: try to) mark the "we need a full sync" bit in our super block, if it was a READ error; writes are covered by the activity log already. If that superblock does not make it to disk either, we are sorry. Maybe we just lost an entire disk or controller (or iSCSI connection), and there actually are no bad blocks at all, so we don't need to re-fetch from the peer, there is no "auto-healing" necessary. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r--drivers/block/drbd/drbd_int.h39
-rw-r--r--drivers/block/drbd/drbd_main.c20
-rw-r--r--drivers/block/drbd/drbd_nl.c2
-rw-r--r--drivers/block/drbd/drbd_req.c9
-rw-r--r--drivers/block/drbd/drbd_worker.c4
5 files changed, 63 insertions, 11 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index eeab868f056b..32a9ab67a5f6 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -682,7 +682,8 @@ enum {
682 once no more io in flight, start bitmap io */ 682 once no more io in flight, start bitmap io */
683 BITMAP_IO_QUEUED, /* Started bitmap IO */ 683 BITMAP_IO_QUEUED, /* Started bitmap IO */
684 GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ 684 GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */
685 WAS_IO_ERROR, /* Local disk failed returned IO error */ 685 WAS_IO_ERROR, /* Local disk failed, returned IO error */
686 WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */
686 FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ 687 FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */
687 RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ 688 RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
688 RESIZE_PENDING, /* Size change detected locally, waiting for the response from 689 RESIZE_PENDING, /* Size change detected locally, waiting for the response from
@@ -1142,6 +1143,9 @@ extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
1142extern int drbd_bitmap_io(struct drbd_conf *mdev, 1143extern int drbd_bitmap_io(struct drbd_conf *mdev,
1143 int (*io_fn)(struct drbd_conf *), 1144 int (*io_fn)(struct drbd_conf *),
1144 char *why, enum bm_flag flags); 1145 char *why, enum bm_flag flags);
1146extern int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
1147 int (*io_fn)(struct drbd_conf *),
1148 char *why, enum bm_flag flags);
1145extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); 1149extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
1146extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); 1150extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
1147extern void drbd_go_diskless(struct drbd_conf *mdev); 1151extern void drbd_go_diskless(struct drbd_conf *mdev);
@@ -1661,14 +1665,15 @@ static inline union drbd_state drbd_read_state(struct drbd_conf *mdev)
1661} 1665}
1662 1666
1663enum drbd_force_detach_flags { 1667enum drbd_force_detach_flags {
1664 DRBD_IO_ERROR, 1668 DRBD_READ_ERROR,
1669 DRBD_WRITE_ERROR,
1665 DRBD_META_IO_ERROR, 1670 DRBD_META_IO_ERROR,
1666 DRBD_FORCE_DETACH, 1671 DRBD_FORCE_DETACH,
1667}; 1672};
1668 1673
1669#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) 1674#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
1670static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, 1675static inline void __drbd_chk_io_error_(struct drbd_conf *mdev,
1671 enum drbd_force_detach_flags forcedetach, 1676 enum drbd_force_detach_flags df,
1672 const char *where) 1677 const char *where)
1673{ 1678{
1674 enum drbd_io_error_p ep; 1679 enum drbd_io_error_p ep;
@@ -1678,18 +1683,40 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev,
1678 rcu_read_unlock(); 1683 rcu_read_unlock();
1679 switch (ep) { 1684 switch (ep) {
1680 case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */ 1685 case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */
1681 if (forcedetach == DRBD_IO_ERROR) { 1686 if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) {
1682 if (__ratelimit(&drbd_ratelimit_state)) 1687 if (__ratelimit(&drbd_ratelimit_state))
1683 dev_err(DEV, "Local IO failed in %s.\n", where); 1688 dev_err(DEV, "Local IO failed in %s.\n", where);
1684 if (mdev->state.disk > D_INCONSISTENT) 1689 if (mdev->state.disk > D_INCONSISTENT)
1685 _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL); 1690 _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL);
1686 break; 1691 break;
1687 } 1692 }
1688 /* NOTE fall through to detach case if forcedetach set */ 1693 /* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */
1689 case EP_DETACH: 1694 case EP_DETACH:
1690 case EP_CALL_HELPER: 1695 case EP_CALL_HELPER:
1696 /* Remember whether we saw a READ or WRITE error.
1697 *
1698 * Recovery of the affected area for WRITE failure is covered
1699 * by the activity log.
1700 * READ errors may fall outside that area though. Certain READ
1701 * errors can be "healed" by writing good data to the affected
1702 * blocks, which triggers block re-allocation in lower layers.
1703 *
1704 * If we can not write the bitmap after a READ error,
1705 * we may need to trigger a full sync (see w_go_diskless()).
1706 *
1707 * Force-detach is not really an IO error, but rather a
1708 * desperate measure to try to deal with a completely
1709 * unresponsive lower level IO stack.
1710 * Still it should be treated as a WRITE error.
1711 *
1712 * Meta IO error is always WRITE error:
1713 * we read meta data only once during attach,
1714 * which will fail in case of errors.
1715 */
1691 set_bit(WAS_IO_ERROR, &mdev->flags); 1716 set_bit(WAS_IO_ERROR, &mdev->flags);
1692 if (forcedetach == DRBD_FORCE_DETACH) 1717 if (df == DRBD_READ_ERROR)
1718 set_bit(WAS_READ_ERROR, &mdev->flags);
1719 if (df == DRBD_FORCE_DETACH)
1693 set_bit(FORCE_DETACH, &mdev->flags); 1720 set_bit(FORCE_DETACH, &mdev->flags);
1694 if (mdev->state.disk > D_FAILED) { 1721 if (mdev->state.disk > D_FAILED) {
1695 _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); 1722 _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 5e5a6abb2819..0f73e157dee0 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3226,6 +3226,26 @@ static int w_go_diskless(struct drbd_work *w, int unused)
3226 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch 3226 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
3227 * the protected members anymore, though, so once put_ldev reaches zero 3227 * the protected members anymore, though, so once put_ldev reaches zero
3228 * again, it will be safe to free them. */ 3228 * again, it will be safe to free them. */
3229
3230 /* Try to write changed bitmap pages, read errors may have just
3231 * set some bits outside the area covered by the activity log.
3232 *
3233 * If we have an IO error during the bitmap writeout,
3234 * we will want a full sync next time, just in case.
3235 * (Do we want a specific meta data flag for this?)
3236 *
3237 * If that does not make it to stable storage either,
3238 * we cannot do anything about that anymore. */
3239 if (mdev->bitmap) {
3240 if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write,
3241 "detach", BM_LOCKED_MASK)) {
3242 if (test_bit(WAS_READ_ERROR, &mdev->flags)) {
3243 drbd_md_set_flag(mdev, MDF_FULL_SYNC);
3244 drbd_md_sync(mdev);
3245 }
3246 }
3247 }
3248
3229 drbd_force_state(mdev, NS(disk, D_DISKLESS)); 3249 drbd_force_state(mdev, NS(disk, D_DISKLESS));
3230 return 0; 3250 return 0;
3231} 3251}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 476491ffdabc..52258867222a 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1294,6 +1294,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1294 1294
1295 /* make sure there is no leftover from previous force-detach attempts */ 1295 /* make sure there is no leftover from previous force-detach attempts */
1296 clear_bit(FORCE_DETACH, &mdev->flags); 1296 clear_bit(FORCE_DETACH, &mdev->flags);
1297 clear_bit(WAS_IO_ERROR, &mdev->flags);
1298 clear_bit(WAS_READ_ERROR, &mdev->flags);
1297 1299
1298 /* and no leftover from previously aborted resync or verify, either */ 1300 /* and no leftover from previously aborted resync or verify, either */
1299 mdev->rs_total = 0; 1301 mdev->rs_total = 0;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index e307890e6afe..97a9e69dd239 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -492,11 +492,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
492 mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED); 492 mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED);
493 break; 493 break;
494 494
495 case WRITE_COMPLETED_WITH_ERROR:
496 __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
497 mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
498 break;
499
495 case READ_COMPLETED_WITH_ERROR: 500 case READ_COMPLETED_WITH_ERROR:
496 drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); 501 drbd_set_out_of_sync(mdev, req->i.sector, req->i.size);
497 /* fall through. */ 502 __drbd_chk_io_error(mdev, DRBD_READ_ERROR);
498 case WRITE_COMPLETED_WITH_ERROR:
499 __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
500 /* fall through. */ 503 /* fall through. */
501 case READ_AHEAD_COMPLETED_WITH_ERROR: 504 case READ_AHEAD_COMPLETED_WITH_ERROR:
502 /* it is legal to fail READA, no __drbd_chk_io_error in that case. */ 505 /* it is legal to fail READA, no __drbd_chk_io_error in that case. */
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index d348260301f1..64a7305c678a 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -106,7 +106,7 @@ void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(lo
106 if (list_empty(&mdev->read_ee)) 106 if (list_empty(&mdev->read_ee))
107 wake_up(&mdev->ee_wait); 107 wake_up(&mdev->ee_wait);
108 if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) 108 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
109 __drbd_chk_io_error(mdev, DRBD_IO_ERROR); 109 __drbd_chk_io_error(mdev, DRBD_READ_ERROR);
110 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); 110 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
111 111
112 drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w); 112 drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w);
@@ -147,7 +147,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel
147 do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); 147 do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee);
148 148
149 if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) 149 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
150 __drbd_chk_io_error(mdev, DRBD_IO_ERROR); 150 __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
151 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); 151 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
152 152
153 if (block_id == ID_SYNCER) 153 if (block_id == ID_SYNCER)