aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2012-06-14 12:02:52 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-07-24 08:09:53 -0400
commit0029d62434d9045bc3e8b2eb48ae696e30336e92 (patch)
treed1e2ce7a3130e5b489320b12b3568c8a58828697 /drivers/block
parent88437879fbd0ac5cde3f683f8eee455bbf83aaac (diff)
drbd: do not reset rs_pending_cnt too early
Fix asserts like block drbd0: in got_BlockAck:4634: rs_pending_cnt = -35 < 0 ! We reset the resync lru cache and related information (rs_pending_cnt), once we successfully finished a resync or online verify, or if the replication connection is lost. We also need to reset it if a resync or online verify is aborted because a lower level disk failed. In that case the replication link is still established, and we may still have packets queued in the network buffers which want to touch rs_pending_cnt. We do not have any synchronization mechanism to know for sure when all such pending resync related packets have been drained. To avoid this counter to go negative (and violate the ASSERT that it will always be >= 0), just do not reset it when we lose a disk. It is good enough to make sure it is re-initialized before the next resync can start: reset it when we re-attach a disk. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/drbd/drbd_main.c11
-rw-r--r--drivers/block/drbd/drbd_nl.c5
-rw-r--r--drivers/block/drbd/drbd_worker.c8
3 files changed, 12 insertions, 12 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 41ccb580d5ac..91a4853d9eeb 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1514,6 +1514,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1514 1514
1515 /* Do not change the order of the if above and the two below... */ 1515 /* Do not change the order of the if above and the two below... */
1516 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ 1516 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
1517 /* we probably will start a resync soon.
1518 * make sure those things are properly reset. */
1519 mdev->rs_total = 0;
1520 mdev->rs_failed = 0;
1521 atomic_set(&mdev->rs_pending_cnt, 0);
1522 drbd_rs_cancel_all(mdev);
1523
1517 drbd_send_uuids(mdev); 1524 drbd_send_uuids(mdev);
1518 drbd_send_state(mdev, ns); 1525 drbd_send_state(mdev, ns);
1519 } 1526 }
@@ -1681,10 +1688,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1681 "ASSERT FAILED: disk is %s while going diskless\n", 1688 "ASSERT FAILED: disk is %s while going diskless\n",
1682 drbd_disk_str(mdev->state.disk)); 1689 drbd_disk_str(mdev->state.disk));
1683 1690
1684 mdev->rs_total = 0;
1685 mdev->rs_failed = 0;
1686 atomic_set(&mdev->rs_pending_cnt, 0);
1687
1688 if (ns.conn >= C_CONNECTED) 1691 if (ns.conn >= C_CONNECTED)
1689 drbd_send_state(mdev, ns); 1692 drbd_send_state(mdev, ns);
1690 1693
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 03fc853be2b9..a68d9bfb731c 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -959,6 +959,11 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
959 /* make sure there is no leftover from previous force-detach attempts */ 959 /* make sure there is no leftover from previous force-detach attempts */
960 clear_bit(FORCE_DETACH, &mdev->flags); 960 clear_bit(FORCE_DETACH, &mdev->flags);
961 961
962 /* and no leftover from previously aborted resync or verify, either */
963 mdev->rs_total = 0;
964 mdev->rs_failed = 0;
965 atomic_set(&mdev->rs_pending_cnt, 0);
966
962 /* allocation not in the IO path, cqueue thread context */ 967 /* allocation not in the IO path, cqueue thread context */
963 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); 968 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
964 if (!nbc) { 969 if (!nbc) {
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index a35393f2fd1b..6bce2cc179d4 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1501,14 +1501,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1501 return; 1501 return;
1502 } 1502 }
1503 1503
1504 if (mdev->state.conn < C_AHEAD) {
1505 /* In case a previous resync run was aborted by an IO error/detach on the peer. */
1506 drbd_rs_cancel_all(mdev);
1507 /* This should be done when we abort the resync. We definitely do not
1508 want to have this for connections going back and forth between
1509 Ahead/Behind and SyncSource/SyncTarget */
1510 }
1511
1512 if (side == C_SYNC_TARGET) { 1504 if (side == C_SYNC_TARGET) {
1513 /* Since application IO was locked out during C_WF_BITMAP_T and 1505 /* Since application IO was locked out during C_WF_BITMAP_T and
1514 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET 1506 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET