aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorAlex Lyakas <alex@zadarastorage.com>2013-06-04 13:42:21 -0400
committerNeilBrown <neilb@suse.de>2013-06-12 23:20:03 -0400
commit3056e3aec8d8ba61a0710fb78b2d562600aa2ea7 (patch)
treee64dd2c43c612972143a381ba1f281701bd43288 /drivers/md
parent6b6204ee92adb53bfd6a77cb5679282ec3820c4b (diff)
md/raid1: consider WRITE as successful only if at least one non-Faulty and non-rebuilding drive completed it.
Without that fix, the following scenario could happen: - RAID1 with drives A and B; drive B was freshly-added and is rebuilding - Drive A fails - WRITE request arrives to the array. It is failed by drive A, so r1_bio is marked as R1BIO_WriteError, but the rebuilding drive B succeeds in writing it, so the same r1_bio is marked as R1BIO_Uptodate. - r1_bio arrives to handle_write_finished, badblocks are disabled, md_error()->error() does nothing because we don't fail the last drive of raid1 - raid_end_bio_io() calls call_bio_endio() - As a result, in call_bio_endio(): if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) clear_bit(BIO_UPTODATE, &bio->bi_flags); this code doesn't clear the BIO_UPTODATE flag, and the whole master WRITE succeeds, back to the upper layer. So we returned success to the upper layer, even though we had written the data onto the rebuilding drive only. But when we want to read the data back, we would not read from the rebuilding drive, so this data is lost. [neilb - applied identical change to raid10 as well] This bug can result in lost data, so it is suitable for any -stable kernel. Cc: stable@vger.kernel.org Signed-off-by: Alex Lyakas <alex@zadarastorage.com> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid1.c12
-rw-r--r--drivers/md/raid10.c12
2 files changed, 22 insertions, 2 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 851023e2ba5d..f2db7a9d5964 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -427,7 +427,17 @@ static void raid1_end_write_request(struct bio *bio, int error)
427 427
428 r1_bio->bios[mirror] = NULL; 428 r1_bio->bios[mirror] = NULL;
429 to_put = bio; 429 to_put = bio;
430 set_bit(R1BIO_Uptodate, &r1_bio->state); 430 /*
431 * Do not set R1BIO_Uptodate if the current device is
432 * rebuilding or Faulty. This is because we cannot use
433 * such device for properly reading the data back (we could
434 * potentially use it, if the current write would have felt
435 * before rdev->recovery_offset, but for simplicity we don't
436 * check this here.
437 */
438 if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) &&
439 !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))
440 set_bit(R1BIO_Uptodate, &r1_bio->state);
431 441
432 /* Maybe we can clear some bad blocks. */ 442 /* Maybe we can clear some bad blocks. */
433 if (is_badblock(conf->mirrors[mirror].rdev, 443 if (is_badblock(conf->mirrors[mirror].rdev,
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 018741ba9310..8000ee25650d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -490,7 +490,17 @@ static void raid10_end_write_request(struct bio *bio, int error)
490 sector_t first_bad; 490 sector_t first_bad;
491 int bad_sectors; 491 int bad_sectors;
492 492
493 set_bit(R10BIO_Uptodate, &r10_bio->state); 493 /*
494 * Do not set R10BIO_Uptodate if the current device is
495 * rebuilding or Faulty. This is because we cannot use
496 * such device for properly reading the data back (we could
497 * potentially use it, if the current write would have felt
498 * before rdev->recovery_offset, but for simplicity we don't
499 * check this here.
500 */
501 if (test_bit(In_sync, &rdev->flags) &&
502 !test_bit(Faulty, &rdev->flags))
503 set_bit(R10BIO_Uptodate, &r10_bio->state);
494 504
495 /* Maybe we can clear some bad blocks. */ 505 /* Maybe we can clear some bad blocks. */
496 if (is_badblock(rdev, 506 if (is_badblock(rdev,