aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2010-10-18 17:04:07 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2010-10-22 09:50:27 -0400
commit6719fb036cea56a5ee9d0ac912ed8c7cabb27f49 (patch)
treeeed4e4d3161cc6f7a4009c5c856e88c6707c88a3 /drivers/block/drbd
parent82f59cc6353889b426cf13b6596d5a3d100fa09e (diff)
drbd: fix potential data divergence after multiple failures
If we get an IO-error during an activity log transaction, if we failed to write the bitmap of the evicted extent, we must not write the transaction itself. If we failed to write the transaction, we must not even submit the corresponding bio, as its extent is not yet marked in the activity log. Otherwise, if this was a disconneted Primary (degraded cluster), which now lost its disk as well, and we later re-attach the same backend storage, we possibly "forget" to resync some parts of the disk that potentially have been changed. On the receiving side, when receiving from a peer with unhealthy disk, checking for pdsk == D_DISKLESS is not enough, we need to set out of sync and do AL transactions for everything pdsk < D_INCONSISTENT on the receiving side. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r--drivers/block/drbd/drbd_actlog.c26
-rw-r--r--drivers/block/drbd/drbd_receiver.c3
-rw-r--r--drivers/block/drbd/drbd_req.c19
3 files changed, 37 insertions, 11 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index ac04ef97eac2..bd925180a2b0 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -284,18 +284,32 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
284 u32 xor_sum = 0; 284 u32 xor_sum = 0;
285 285
286 if (!get_ldev(mdev)) { 286 if (!get_ldev(mdev)) {
287 dev_err(DEV, "get_ldev() failed in w_al_write_transaction\n"); 287 dev_err(DEV,
288 "disk is %s, cannot start al transaction (-%d +%d)\n",
289 drbd_disk_str(mdev->state.disk), evicted, new_enr);
288 complete(&((struct update_al_work *)w)->event); 290 complete(&((struct update_al_work *)w)->event);
289 return 1; 291 return 1;
290 } 292 }
291 /* do we have to do a bitmap write, first? 293 /* do we have to do a bitmap write, first?
292 * TODO reduce maximum latency: 294 * TODO reduce maximum latency:
293 * submit both bios, then wait for both, 295 * submit both bios, then wait for both,
294 * instead of doing two synchronous sector writes. */ 296 * instead of doing two synchronous sector writes.
297 * For now, we must not write the transaction,
298 * if we cannot write out the bitmap of the evicted extent. */
295 if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) 299 if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
296 drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT); 300 drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT);
297 301
298 mutex_lock(&mdev->md_io_mutex); /* protects md_io_page, al_tr_cycle, ... */ 302 /* The bitmap write may have failed, causing a state change. */
303 if (mdev->state.disk < D_INCONSISTENT) {
304 dev_err(DEV,
305 "disk is %s, cannot write al transaction (-%d +%d)\n",
306 drbd_disk_str(mdev->state.disk), evicted, new_enr);
307 complete(&((struct update_al_work *)w)->event);
308 put_ldev(mdev);
309 return 1;
310 }
311
312 mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */
299 buffer = (struct al_transaction *)page_address(mdev->md_io_page); 313 buffer = (struct al_transaction *)page_address(mdev->md_io_page);
300 314
301 buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); 315 buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
@@ -739,7 +753,7 @@ void drbd_al_apply_to_bm(struct drbd_conf *mdev)
739 unsigned int enr; 753 unsigned int enr;
740 unsigned long add = 0; 754 unsigned long add = 0;
741 char ppb[10]; 755 char ppb[10];
742 int i; 756 int i, tmp;
743 757
744 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); 758 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
745 759
@@ -747,7 +761,9 @@ void drbd_al_apply_to_bm(struct drbd_conf *mdev)
747 enr = lc_element_by_index(mdev->act_log, i)->lc_number; 761 enr = lc_element_by_index(mdev->act_log, i)->lc_number;
748 if (enr == LC_FREE) 762 if (enr == LC_FREE)
749 continue; 763 continue;
750 add += drbd_bm_ALe_set_all(mdev, enr); 764 tmp = drbd_bm_ALe_set_all(mdev, enr);
765 dynamic_dev_dbg(DEV, "AL: set %d bits in extent %u\n", tmp, enr);
766 add += tmp;
751 } 767 }
752 768
753 lc_unlock(mdev->act_log); 769 lc_unlock(mdev->act_log);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 04a823b01da5..1146faa7ae38 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1995,10 +1995,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1995 break; 1995 break;
1996 } 1996 }
1997 1997
1998 if (mdev->state.pdsk == D_DISKLESS) { 1998 if (mdev->state.pdsk < D_INCONSISTENT) {
1999 /* In case we have the only disk of the cluster, */ 1999 /* In case we have the only disk of the cluster, */
2000 drbd_set_out_of_sync(mdev, e->sector, e->size); 2000 drbd_set_out_of_sync(mdev, e->sector, e->size);
2001 e->flags |= EE_CALL_AL_COMPLETE_IO; 2001 e->flags |= EE_CALL_AL_COMPLETE_IO;
2002 e->flags &= ~EE_MAY_SET_IN_SYNC;
2002 drbd_al_begin_io(mdev, e->sector); 2003 drbd_al_begin_io(mdev, e->sector);
2003 } 2004 }
2004 2005
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 9e91a2545fc8..d26b213dbf15 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -942,12 +942,21 @@ allocate_barrier:
942 if (local) { 942 if (local) {
943 req->private_bio->bi_bdev = mdev->ldev->backing_bdev; 943 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
944 944
945 if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR 945 /* State may have changed since we grabbed our reference on the
946 : rw == READ ? DRBD_FAULT_DT_RD 946 * mdev->ldev member. Double check, and short-circuit to endio.
947 : DRBD_FAULT_DT_RA)) 947 * In case the last activity log transaction failed to get on
948 * stable storage, and this is a WRITE, we may not even submit
949 * this bio. */
950 if (get_ldev(mdev)) {
951 if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR
952 : rw == READ ? DRBD_FAULT_DT_RD
953 : DRBD_FAULT_DT_RA))
954 bio_endio(req->private_bio, -EIO);
955 else
956 generic_make_request(req->private_bio);
957 put_ldev(mdev);
958 } else
948 bio_endio(req->private_bio, -EIO); 959 bio_endio(req->private_bio, -EIO);
949 else
950 generic_make_request(req->private_bio);
951 } 960 }
952 961
953 /* we need to plug ALWAYS since we possibly need to kick lo_dev. 962 /* we need to plug ALWAYS since we possibly need to kick lo_dev.