aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2012-07-30 03:10:41 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-11-08 10:58:40 -0500
commita324896b173e569fb831c5caa04ccd02ec0bc9ca (patch)
treefedb4c82e66c304c6ced91a9e83538af735ddb45 /drivers/block/drbd
parent8a943170711b7a4d63528ea8eb6a41cc91e79309 (diff)
drbd: do not reset rs_pending_cnt too early
Fix asserts like block drbd0: in got_BlockAck:4634: rs_pending_cnt = -35 < 0 ! We reset the resync lru cache and related information (rs_pending_cnt), once we successfully finished a resync or online verify, or if the replication connection is lost. We also need to reset it if a resync or online verify is aborted because a lower level disk failed. In that case the replication link is still established, and we may still have packets queued in the network buffers which want to touch rs_pending_cnt. We do not have any synchronization mechanism to know for sure when all such pending resync related packets have been drained. To avoid this counter to go negative (and violate the ASSERT that it will always be >= 0), just do not reset it when we lose a disk. It is good enough to make sure it is re-initialized before the next resync can start: reset it when we re-attach a disk. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r--drivers/block/drbd/drbd_nl.c5
-rw-r--r--drivers/block/drbd/drbd_state.c11
-rw-r--r--drivers/block/drbd/drbd_worker.c8
3 files changed, 12 insertions, 12 deletions
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 05ed4804c72c..a2925dedc23f 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1309,6 +1309,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1309 /* make sure there is no leftover from previous force-detach attempts */ 1309 /* make sure there is no leftover from previous force-detach attempts */
1310 clear_bit(FORCE_DETACH, &mdev->flags); 1310 clear_bit(FORCE_DETACH, &mdev->flags);
1311 1311
1312 /* and no leftover from previously aborted resync or verify, either */
1313 mdev->rs_total = 0;
1314 mdev->rs_failed = 0;
1315 atomic_set(&mdev->rs_pending_cnt, 0);
1316
1312 /* allocation not in the IO path, drbdsetup context */ 1317 /* allocation not in the IO path, drbdsetup context */
1313 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); 1318 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1314 if (!nbc) { 1319 if (!nbc) {
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index c9ec7d37632c..ad307fb8dc28 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1216,6 +1216,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1216 /* Do not change the order of the if above and the two below... */ 1216 /* Do not change the order of the if above and the two below... */
1217 if (os.pdsk == D_DISKLESS && 1217 if (os.pdsk == D_DISKLESS &&
1218 ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) { /* attach on the peer */ 1218 ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) { /* attach on the peer */
1219 /* we probably will start a resync soon.
1220 * make sure those things are properly reset. */
1221 mdev->rs_total = 0;
1222 mdev->rs_failed = 0;
1223 atomic_set(&mdev->rs_pending_cnt, 0);
1224 drbd_rs_cancel_all(mdev);
1225
1219 drbd_send_uuids(mdev); 1226 drbd_send_uuids(mdev);
1220 drbd_send_state(mdev, ns); 1227 drbd_send_state(mdev, ns);
1221 } 1228 }
@@ -1386,10 +1393,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1386 "ASSERT FAILED: disk is %s while going diskless\n", 1393 "ASSERT FAILED: disk is %s while going diskless\n",
1387 drbd_disk_str(mdev->state.disk)); 1394 drbd_disk_str(mdev->state.disk));
1388 1395
1389 mdev->rs_total = 0;
1390 mdev->rs_failed = 0;
1391 atomic_set(&mdev->rs_pending_cnt, 0);
1392
1393 if (ns.conn >= C_CONNECTED) 1396 if (ns.conn >= C_CONNECTED)
1394 drbd_send_state(mdev, ns); 1397 drbd_send_state(mdev, ns);
1395 /* corresponding get_ldev in __drbd_set_state 1398 /* corresponding get_ldev in __drbd_set_state
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 07a4046dd8c3..9d7e1fb0f431 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1549,14 +1549,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1549 return; 1549 return;
1550 } 1550 }
1551 1551
1552 if (mdev->state.conn < C_AHEAD) {
1553 /* In case a previous resync run was aborted by an IO error/detach on the peer. */
1554 drbd_rs_cancel_all(mdev);
1555 /* This should be done when we abort the resync. We definitely do not
1556 want to have this for connections going back and forth between
1557 Ahead/Behind and SyncSource/SyncTarget */
1558 }
1559
1560 if (!test_bit(B_RS_H_DONE, &mdev->flags)) { 1552 if (!test_bit(B_RS_H_DONE, &mdev->flags)) {
1561 if (side == C_SYNC_TARGET) { 1553 if (side == C_SYNC_TARGET) {
1562 /* Since application IO was locked out during C_WF_BITMAP_T and 1554 /* Since application IO was locked out during C_WF_BITMAP_T and