drbd: always write bitmap on detach

If we detach due to local read-error (which sets a bit in the bitmap), stay Primary, and then re-attach (which re-reads the bitmap from disk), we potentially lost the "out-of-sync" (or, "bad block") information in the bitmap. Always (try to) write out the changed bitmap pages before going diskless. That way, we don't lose the bit for the bad block, the next resync will fetch it from the peer, and rewrite it locally, which may result in block reallocation in some lower layer (or the hardware), and thereby "heal" the bad blocks. If the bitmap writeout errors out as well, we will (again: try to) mark the "we need a full sync" bit in our super block, if it was a READ error; writes are covered by the activity log already. If that superblock does not make it to disk either, we are sorry. Maybe we just lost an entire disk or controller (or iSCSI connection), and there actually are no bad blocks at all, so we don't need to re-fetch from the peer, there is no "auto-healing" necessary. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Lars Ellenberg <lars.ellenberg@linbit.com> 2012-09-22 06:26:57 -0400
committer: Jens Axboe <axboe@kernel.dk> 2012-10-30 03:39:18 -0400
commit: a2a3c74f243d5d1793f89ccdceaa6918851f7fce (patch)
tree: 3d828d28daaabf6540e7a157454683a1a678524a /drivers/block/drbd/drbd_main.c
parent: 06f10adbdb027b225fd51584a218fa8344169514 (diff)
1 files changed, 32 insertions, 9 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index d8ba5c42670f..9b833e0fb440 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1617,17 +1617,20 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
        /* first half of local IO error, failure to attach,
         * or administrative detach */
        if (os.disk != D_FAILED && ns.disk == D_FAILED) {
-                enum drbd_io_error_p eh = EP_PASS_ON;
-                int was_io_error = 0;
                /* corresponding get_ldev was in __drbd_set_state, to serialize
                 * our cleanup here with the transition to D_DISKLESS.
-                 * But is is still not save to dreference ldev here, since
+                 * But it is still not safe to dreference ldev here, we may end
-                 * we might come from an failed Attach before ldev was set. */
+                 * up here from a failed attach, before ldev was even set.  */
                if (mdev->ldev) {
-                        eh = mdev->ldev->dc.on_io_error;
+                        enum drbd_io_error_p eh = mdev->ldev->dc.on_io_error;
-                        was_io_error = drbd_test_and_clear_flag(mdev, WAS_IO_ERROR);
+                        /* In some setups, this handler triggers a suicide,
-                        if (was_io_error && eh == EP_CALL_HELPER)
+                         * basically mapping IO error to node failure, to
+                         * reduce the number of different failure scenarios.
+                         *
+                         * This handler intentionally runs before we abort IO,
+                         * notify the peer, or try to update our meta data. */
+                        if (eh == EP_CALL_HELPER && drbd_test_flag(mdev, WAS_IO_ERROR))
                                drbd_khelper(mdev, "local-io-error");
                        /* Immediately allow completion of all application IO,
@@ -1643,7 +1646,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
                         * So aborting local requests may cause crashes,
                         * or even worse, silent data corruption.
                         */
-                        if (drbd_test_and_clear_flag(mdev, FORCE_DETACH))
+                        if (drbd_test_flag(mdev, FORCE_DETACH))
                                tl_abort_disk_io(mdev);
                        /* current state still has to be D_FAILED,
@@ -4220,6 +4223,26 @@ static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused
         * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
         * the protected members anymore, though, so once put_ldev reaches zero
         * again, it will be safe to free them. */
+        /* Try to write changed bitmap pages, read errors may have just
+         * set some bits outside the area covered by the activity log.
+         *
+         * If we have an IO error during the bitmap writeout,
+         * we will want a full sync next time, just in case.
+         * (Do we want a specific meta data flag for this?)
+         *
+         * If that does not make it to stable storage either,
+         * we cannot do anything about that anymore.  */
+        if (mdev->bitmap) {
+                if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write,
+                                        "detach", BM_LOCKED_MASK)) {
+                        if (drbd_test_flag(mdev, WAS_READ_ERROR)) {
+                                drbd_md_set_flag(mdev, MDF_FULL_SYNC);
+                                drbd_md_sync(mdev);
+                        }
+                }
+        }
        drbd_force_state(mdev, NS(disk, D_DISKLESS));
        return 1;
 }
author	Lars Ellenberg <lars.ellenberg@linbit.com>	2012-09-22 06:26:57 -0400
committer	Jens Axboe <axboe@kernel.dk>	2012-10-30 03:39:18 -0400
commit	a2a3c74f243d5d1793f89ccdceaa6918851f7fce (patch)
tree	3d828d28daaabf6540e7a157454683a1a678524a /drivers/block/drbd/drbd_main.c
parent	06f10adbdb027b225fd51584a218fa8344169514 (diff)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d8ba5c42670f..9b833e0fb440 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c
@@ -1617,17 +1617,20 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1617	/* first half of local IO error, failure to attach,	1617	/* first half of local IO error, failure to attach,
1618	* or administrative detach */	1618	* or administrative detach */
1619	if (os.disk != D_FAILED && ns.disk == D_FAILED) {	1619	if (os.disk != D_FAILED && ns.disk == D_FAILED) {
1620	enum drbd_io_error_p eh = EP_PASS_ON;
1621	int was_io_error = 0;
1622	/* corresponding get_ldev was in __drbd_set_state, to serialize	1620	/* corresponding get_ldev was in __drbd_set_state, to serialize
1623	* our cleanup here with the transition to D_DISKLESS.	1621	* our cleanup here with the transition to D_DISKLESS.
1624	* But is is still not save to dreference ldev here, since	1622	* But it is still not safe to dreference ldev here, we may end
1625	* we might come from an failed Attach before ldev was set. */	1623	* up here from a failed attach, before ldev was even set. */
1626	if (mdev->ldev) {	1624	if (mdev->ldev) {
1627	eh = mdev->ldev->dc.on_io_error;	1625	enum drbd_io_error_p eh = mdev->ldev->dc.on_io_error;
1628	was_io_error = drbd_test_and_clear_flag(mdev, WAS_IO_ERROR);	1626
1629		1627	/* In some setups, this handler triggers a suicide,
1630	if (was_io_error && eh == EP_CALL_HELPER)	1628	* basically mapping IO error to node failure, to
		1629	* reduce the number of different failure scenarios.
		1630	*
		1631	* This handler intentionally runs before we abort IO,
		1632	* notify the peer, or try to update our meta data. */
		1633	if (eh == EP_CALL_HELPER && drbd_test_flag(mdev, WAS_IO_ERROR))
1631	drbd_khelper(mdev, "local-io-error");	1634	drbd_khelper(mdev, "local-io-error");
1632		1635
1633	/* Immediately allow completion of all application IO,	1636	/* Immediately allow completion of all application IO,
@@ -1643,7 +1646,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1643	* So aborting local requests may cause crashes,	1646	* So aborting local requests may cause crashes,
1644	* or even worse, silent data corruption.	1647	* or even worse, silent data corruption.
1645	*/	1648	*/
1646	if (drbd_test_and_clear_flag(mdev, FORCE_DETACH))	1649	if (drbd_test_flag(mdev, FORCE_DETACH))
1647	tl_abort_disk_io(mdev);	1650	tl_abort_disk_io(mdev);
1648		1651
1649	/* current state still has to be D_FAILED,	1652	/* current state still has to be D_FAILED,
@@ -4220,6 +4223,26 @@ static int w_go_diskless(struct drbd_conf mdev, struct drbd_work w, int unused
4220	* inc/dec it frequently. Once we are D_DISKLESS, no one will touch	4223	* inc/dec it frequently. Once we are D_DISKLESS, no one will touch
4221	* the protected members anymore, though, so once put_ldev reaches zero	4224	* the protected members anymore, though, so once put_ldev reaches zero
4222	* again, it will be safe to free them. */	4225	* again, it will be safe to free them. */
		4226
		4227	/* Try to write changed bitmap pages, read errors may have just
		4228	* set some bits outside the area covered by the activity log.
		4229	*
		4230	* If we have an IO error during the bitmap writeout,
		4231	* we will want a full sync next time, just in case.
		4232	* (Do we want a specific meta data flag for this?)
		4233	*
		4234	* If that does not make it to stable storage either,
		4235	* we cannot do anything about that anymore. */
		4236	if (mdev->bitmap) {
		4237	if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write,
		4238	"detach", BM_LOCKED_MASK)) {
		4239	if (drbd_test_flag(mdev, WAS_READ_ERROR)) {
		4240	drbd_md_set_flag(mdev, MDF_FULL_SYNC);
		4241	drbd_md_sync(mdev);
		4242	}
		4243	}
		4244	}
		4245
4223	drbd_force_state(mdev, NS(disk, D_DISKLESS));	4246	drbd_force_state(mdev, NS(disk, D_DISKLESS));
4224	return 1;	4247	return 1;
4225	}	4248	}