diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2012-07-30 03:07:28 -0400 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2012-11-08 10:58:39 -0500 |
commit | 0c849666016cbf541c1030eec55f5f8dd1fba513 (patch) | |
tree | 35a49ef17d65375947404bd81759fc815bf09d5d /drivers | |
parent | bf709c8552bcbbbc66ecc11555a781e814a037d8 (diff) |
drbd: differentiate between normal and forced detach
Aborting local requests (not waiting for completion from the lower level
disk) is dangerous: if the master bio has been completed to upper
layers, data pages may be re-used for other things already.
If local IO is still pending and later completes,
this may cause crashes or corrupt unrelated data.
Only abort local IO if explicitly requested.
Intended use case is a lower level device that turned into a tarpit,
not completing io requests, not even doing error completion.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 4 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 17 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 4 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 4 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 18 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 4 |
8 files changed, 41 insertions, 14 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index f500dc5cdf52..209b2e063b92 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -445,7 +445,7 @@ _al_write_transaction(struct drbd_conf *mdev) | |||
445 | /* drbd_chk_io_error done already */ | 445 | /* drbd_chk_io_error done already */ |
446 | else if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { | 446 | else if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { |
447 | err = -EIO; | 447 | err = -EIO; |
448 | drbd_chk_io_error(mdev, 1, true); | 448 | drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); |
449 | } else { | 449 | } else { |
450 | /* advance ringbuffer position and transaction counter */ | 450 | /* advance ringbuffer position and transaction counter */ |
451 | mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); | 451 | mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); |
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index b3d55d4b6937..33626e34c92a 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -1135,7 +1135,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w | |||
1135 | 1135 | ||
1136 | if (ctx->error) { | 1136 | if (ctx->error) { |
1137 | dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); | 1137 | dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); |
1138 | drbd_chk_io_error(mdev, 1, true); | 1138 | drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); |
1139 | err = -EIO; /* ctx->error ? */ | 1139 | err = -EIO; /* ctx->error ? */ |
1140 | } | 1140 | } |
1141 | 1141 | ||
@@ -1260,7 +1260,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc | |||
1260 | wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); | 1260 | wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); |
1261 | 1261 | ||
1262 | if (ctx->error) | 1262 | if (ctx->error) |
1263 | drbd_chk_io_error(mdev, 1, true); | 1263 | drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); |
1264 | /* that should force detach, so the in memory bitmap will be | 1264 | /* that should force detach, so the in memory bitmap will be |
1265 | * gone in a moment as well. */ | 1265 | * gone in a moment as well. */ |
1266 | 1266 | ||
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b343875c9dee..963766bafab4 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -689,6 +689,7 @@ enum { | |||
689 | BITMAP_IO_QUEUED, /* Started bitmap IO */ | 689 | BITMAP_IO_QUEUED, /* Started bitmap IO */ |
690 | GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ | 690 | GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ |
691 | WAS_IO_ERROR, /* Local disk failed returned IO error */ | 691 | WAS_IO_ERROR, /* Local disk failed returned IO error */ |
692 | FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ | ||
692 | RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ | 693 | RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ |
693 | RESIZE_PENDING, /* Size change detected locally, waiting for the response from | 694 | RESIZE_PENDING, /* Size change detected locally, waiting for the response from |
694 | * the peer, if it changed there as well. */ | 695 | * the peer, if it changed there as well. */ |
@@ -1653,8 +1654,16 @@ static inline union drbd_state drbd_read_state(struct drbd_conf *mdev) | |||
1653 | return rv; | 1654 | return rv; |
1654 | } | 1655 | } |
1655 | 1656 | ||
1657 | enum drbd_force_detach_flags { | ||
1658 | DRBD_IO_ERROR, | ||
1659 | DRBD_META_IO_ERROR, | ||
1660 | DRBD_FORCE_DETACH, | ||
1661 | }; | ||
1662 | |||
1656 | #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) | 1663 | #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) |
1657 | static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) | 1664 | static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, |
1665 | enum drbd_force_detach_flags forcedetach, | ||
1666 | const char *where) | ||
1658 | { | 1667 | { |
1659 | enum drbd_io_error_p ep; | 1668 | enum drbd_io_error_p ep; |
1660 | 1669 | ||
@@ -1663,7 +1672,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, | |||
1663 | rcu_read_unlock(); | 1672 | rcu_read_unlock(); |
1664 | switch (ep) { | 1673 | switch (ep) { |
1665 | case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */ | 1674 | case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */ |
1666 | if (!forcedetach) { | 1675 | if (forcedetach == DRBD_IO_ERROR) { |
1667 | if (__ratelimit(&drbd_ratelimit_state)) | 1676 | if (__ratelimit(&drbd_ratelimit_state)) |
1668 | dev_err(DEV, "Local IO failed in %s.\n", where); | 1677 | dev_err(DEV, "Local IO failed in %s.\n", where); |
1669 | if (mdev->state.disk > D_INCONSISTENT) | 1678 | if (mdev->state.disk > D_INCONSISTENT) |
@@ -1674,6 +1683,8 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, | |||
1674 | case EP_DETACH: | 1683 | case EP_DETACH: |
1675 | case EP_CALL_HELPER: | 1684 | case EP_CALL_HELPER: |
1676 | set_bit(WAS_IO_ERROR, &mdev->flags); | 1685 | set_bit(WAS_IO_ERROR, &mdev->flags); |
1686 | if (forcedetach == DRBD_FORCE_DETACH) | ||
1687 | set_bit(FORCE_DETACH, &mdev->flags); | ||
1677 | if (mdev->state.disk > D_FAILED) { | 1688 | if (mdev->state.disk > D_FAILED) { |
1678 | _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); | 1689 | _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); |
1679 | dev_err(DEV, | 1690 | dev_err(DEV, |
@@ -1693,7 +1704,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, | |||
1693 | */ | 1704 | */ |
1694 | #define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__) | 1705 | #define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__) |
1695 | static inline void drbd_chk_io_error_(struct drbd_conf *mdev, | 1706 | static inline void drbd_chk_io_error_(struct drbd_conf *mdev, |
1696 | int error, int forcedetach, const char *where) | 1707 | int error, enum drbd_force_detach_flags forcedetach, const char *where) |
1697 | { | 1708 | { |
1698 | if (error) { | 1709 | if (error) { |
1699 | unsigned long flags; | 1710 | unsigned long flags; |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c0acd86c8415..849e5de9ea8f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -2866,7 +2866,7 @@ void drbd_md_sync(struct drbd_conf *mdev) | |||
2866 | if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { | 2866 | if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { |
2867 | /* this was a try anyways ... */ | 2867 | /* this was a try anyways ... */ |
2868 | dev_err(DEV, "meta data update failed!\n"); | 2868 | dev_err(DEV, "meta data update failed!\n"); |
2869 | drbd_chk_io_error(mdev, 1, true); | 2869 | drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); |
2870 | } | 2870 | } |
2871 | 2871 | ||
2872 | /* Update mdev->ldev->md.la_size_sect, | 2872 | /* Update mdev->ldev->md.la_size_sect, |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 323293e88878..d4c05e26a13a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -1299,6 +1299,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1299 | * to realize a "hot spare" feature (not that I'd recommend that) */ | 1299 | * to realize a "hot spare" feature (not that I'd recommend that) */ |
1300 | wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); | 1300 | wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); |
1301 | 1301 | ||
1302 | /* make sure there is no leftover from previous force-detach attempts */ | ||
1303 | clear_bit(FORCE_DETACH, &mdev->flags); | ||
1304 | |||
1302 | /* allocation not in the IO path, drbdsetup context */ | 1305 | /* allocation not in the IO path, drbdsetup context */ |
1303 | nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); | 1306 | nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); |
1304 | if (!nbc) { | 1307 | if (!nbc) { |
@@ -1683,6 +1686,7 @@ static int adm_detach(struct drbd_conf *mdev, int force) | |||
1683 | int ret; | 1686 | int ret; |
1684 | 1687 | ||
1685 | if (force) { | 1688 | if (force) { |
1689 | set_bit(FORCE_DETACH, &mdev->flags); | ||
1686 | drbd_force_state(mdev, NS(disk, D_FAILED)); | 1690 | drbd_force_state(mdev, NS(disk, D_FAILED)); |
1687 | retcode = SS_SUCCESS; | 1691 | retcode = SS_SUCCESS; |
1688 | goto out; | 1692 | goto out; |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 891c3d41a277..e215dce4c694 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -490,7 +490,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
490 | drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); | 490 | drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); |
491 | /* fall through. */ | 491 | /* fall through. */ |
492 | case WRITE_COMPLETED_WITH_ERROR: | 492 | case WRITE_COMPLETED_WITH_ERROR: |
493 | __drbd_chk_io_error(mdev, false); | 493 | __drbd_chk_io_error(mdev, DRBD_IO_ERROR); |
494 | /* fall through. */ | 494 | /* fall through. */ |
495 | case READ_AHEAD_COMPLETED_WITH_ERROR: | 495 | case READ_AHEAD_COMPLETED_WITH_ERROR: |
496 | /* it is legal to fail READA, no __drbd_chk_io_error in that case. */ | 496 | /* it is legal to fail READA, no __drbd_chk_io_error in that case. */ |
@@ -1210,7 +1210,7 @@ void request_timer_fn(unsigned long data) | |||
1210 | time_after(now, req->start_time + dt) && | 1210 | time_after(now, req->start_time + dt) && |
1211 | !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) { | 1211 | !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) { |
1212 | dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); | 1212 | dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); |
1213 | __drbd_chk_io_error(mdev, 1); | 1213 | __drbd_chk_io_error(mdev, DRBD_FORCE_DETACH); |
1214 | } | 1214 | } |
1215 | nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et; | 1215 | nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et; |
1216 | spin_unlock_irq(&tconn->req_lock); | 1216 | spin_unlock_irq(&tconn->req_lock); |
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 84a5072d7370..c9ec7d37632c 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c | |||
@@ -1335,9 +1335,21 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1335 | 1335 | ||
1336 | was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); | 1336 | was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); |
1337 | 1337 | ||
1338 | /* Immediately allow completion of all application IO, that waits | 1338 | /* Immediately allow completion of all application IO, |
1339 | for completion from the local disk. */ | 1339 | * that waits for completion from the local disk, |
1340 | tl_abort_disk_io(mdev); | 1340 | * if this was a force-detach due to disk_timeout |
1341 | * or administrator request (drbdsetup detach --force). | ||
1342 | * Do NOT abort otherwise. | ||
1343 | * Aborting local requests may cause serious problems, | ||
1344 | * if requests are completed to upper layers already, | ||
1345 | * and then later the already submitted local bio completes. | ||
1346 | * This can cause DMA into former bio pages that meanwhile | ||
1347 | * have been re-used for other things. | ||
1348 | * So aborting local requests may cause crashes, | ||
1349 | * or even worse, silent data corruption. | ||
1350 | */ | ||
1351 | if (test_and_clear_bit(FORCE_DETACH, &mdev->flags)) | ||
1352 | tl_abort_disk_io(mdev); | ||
1341 | 1353 | ||
1342 | /* current state still has to be D_FAILED, | 1354 | /* current state still has to be D_FAILED, |
1343 | * there is only one way out: to D_DISKLESS, | 1355 | * there is only one way out: to D_DISKLESS, |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 66be3910e8d2..07a4046dd8c3 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -106,7 +106,7 @@ void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(lo | |||
106 | if (list_empty(&mdev->read_ee)) | 106 | if (list_empty(&mdev->read_ee)) |
107 | wake_up(&mdev->ee_wait); | 107 | wake_up(&mdev->ee_wait); |
108 | if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) | 108 | if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) |
109 | __drbd_chk_io_error(mdev, false); | 109 | __drbd_chk_io_error(mdev, DRBD_IO_ERROR); |
110 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | 110 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); |
111 | 111 | ||
112 | drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w); | 112 | drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w); |
@@ -148,7 +148,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel | |||
148 | do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); | 148 | do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); |
149 | 149 | ||
150 | if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) | 150 | if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) |
151 | __drbd_chk_io_error(mdev, false); | 151 | __drbd_chk_io_error(mdev, DRBD_IO_ERROR); |
152 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | 152 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); |
153 | 153 | ||
154 | if (block_id == ID_SYNCER) | 154 | if (block_id == ID_SYNCER) |