aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_req.c
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2010-10-18 17:04:07 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2010-10-22 09:50:27 -0400
commit6719fb036cea56a5ee9d0ac912ed8c7cabb27f49 (patch)
treeeed4e4d3161cc6f7a4009c5c856e88c6707c88a3 /drivers/block/drbd/drbd_req.c
parent82f59cc6353889b426cf13b6596d5a3d100fa09e (diff)
drbd: fix potential data divergence after multiple failures
If we get an IO-error during an activity log transaction, if we failed to write the bitmap of the evicted extent, we must not write the transaction itself. If we failed to write the transaction, we must not even submit the corresponding bio, as its extent is not yet marked in the activity log. Otherwise, if this was a disconneted Primary (degraded cluster), which now lost its disk as well, and we later re-attach the same backend storage, we possibly "forget" to resync some parts of the disk that potentially have been changed. On the receiving side, when receiving from a peer with unhealthy disk, checking for pdsk == D_DISKLESS is not enough, we need to set out of sync and do AL transactions for everything pdsk < D_INCONSISTENT on the receiving side. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_req.c')
-rw-r--r--drivers/block/drbd/drbd_req.c19
1 files changed, 14 insertions, 5 deletions
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 9e91a2545fc8..d26b213dbf15 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -942,12 +942,21 @@ allocate_barrier:
942 if (local) { 942 if (local) {
943 req->private_bio->bi_bdev = mdev->ldev->backing_bdev; 943 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
944 944
945 if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR 945 /* State may have changed since we grabbed our reference on the
946 : rw == READ ? DRBD_FAULT_DT_RD 946 * mdev->ldev member. Double check, and short-circuit to endio.
947 : DRBD_FAULT_DT_RA)) 947 * In case the last activity log transaction failed to get on
948 * stable storage, and this is a WRITE, we may not even submit
949 * this bio. */
950 if (get_ldev(mdev)) {
951 if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR
952 : rw == READ ? DRBD_FAULT_DT_RD
953 : DRBD_FAULT_DT_RA))
954 bio_endio(req->private_bio, -EIO);
955 else
956 generic_make_request(req->private_bio);
957 put_ldev(mdev);
958 } else
948 bio_endio(req->private_bio, -EIO); 959 bio_endio(req->private_bio, -EIO);
949 else
950 generic_make_request(req->private_bio);
951 } 960 }
952 961
953 /* we need to plug ALWAYS since we possibly need to kick lo_dev. 962 /* we need to plug ALWAYS since we possibly need to kick lo_dev.