diff options
author | NeilBrown <neilb@suse.de> | 2011-07-27 21:33:42 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2011-07-27 21:33:42 -0400 |
commit | 3a9f28a5117e00a868dd8b4395f9a707ae56764b (patch) | |
tree | 36fe0fc7a7ccfc0da03dea546286b7bdef581246 | |
parent | d8f05d2995d467a91db1af01637e6ffd94660ca8 (diff) |
md/raid1: improve handling of read failure during recovery.
If we cannot read a block from anywhere during recovery, there is
now a better approach than just giving up.
We can record a bad block on each device and keep going - being
careful not to clear the bad block when a write succeeds as it might -
it will be a write of incorrect data.
We have now reached the state where - for raid1 - we only call
md_error if md_set_badblocks has failed.
Signed-off-by: NeilBrown <neilb@suse.de>
Reviewed-by: Namhyung Kim <namhyung@gmail.com>
-rw-r--r-- | drivers/md/raid1.c | 41 |
1 files changed, 34 insertions, 7 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e6957151233b..039e3af72929 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -1392,7 +1392,12 @@ static void end_sync_write(struct bio *bio, int error) | |||
1392 | } else if (is_badblock(conf->mirrors[mirror].rdev, | 1392 | } else if (is_badblock(conf->mirrors[mirror].rdev, |
1393 | r1_bio->sector, | 1393 | r1_bio->sector, |
1394 | r1_bio->sectors, | 1394 | r1_bio->sectors, |
1395 | &first_bad, &bad_sectors)) | 1395 | &first_bad, &bad_sectors) && |
1396 | !is_badblock(conf->mirrors[r1_bio->read_disk].rdev, | ||
1397 | r1_bio->sector, | ||
1398 | r1_bio->sectors, | ||
1399 | &first_bad, &bad_sectors) | ||
1400 | ) | ||
1396 | set_bit(R1BIO_MadeGood, &r1_bio->state); | 1401 | set_bit(R1BIO_MadeGood, &r1_bio->state); |
1397 | 1402 | ||
1398 | update_head_pos(mirror, r1_bio); | 1403 | update_head_pos(mirror, r1_bio); |
@@ -1473,16 +1478,36 @@ static int fix_sync_read_error(r1bio_t *r1_bio) | |||
1473 | 1478 | ||
1474 | if (!success) { | 1479 | if (!success) { |
1475 | char b[BDEVNAME_SIZE]; | 1480 | char b[BDEVNAME_SIZE]; |
1476 | /* Cannot read from anywhere, array is toast */ | 1481 | int abort = 0; |
1477 | md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); | 1482 | /* Cannot read from anywhere, this block is lost. |
1483 | * Record a bad block on each device. If that doesn't | ||
1484 | * work just disable and interrupt the recovery. | ||
1485 | * Don't fail devices as that won't really help. | ||
1486 | */ | ||
1478 | printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error" | 1487 | printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error" |
1479 | " for block %llu\n", | 1488 | " for block %llu\n", |
1480 | mdname(mddev), | 1489 | mdname(mddev), |
1481 | bdevname(bio->bi_bdev, b), | 1490 | bdevname(bio->bi_bdev, b), |
1482 | (unsigned long long)r1_bio->sector); | 1491 | (unsigned long long)r1_bio->sector); |
1483 | md_done_sync(mddev, r1_bio->sectors, 0); | 1492 | for (d = 0; d < conf->raid_disks; d++) { |
1484 | put_buf(r1_bio); | 1493 | rdev = conf->mirrors[d].rdev; |
1485 | return 0; | 1494 | if (!rdev || test_bit(Faulty, &rdev->flags)) |
1495 | continue; | ||
1496 | if (!rdev_set_badblocks(rdev, sect, s, 0)) | ||
1497 | abort = 1; | ||
1498 | } | ||
1499 | if (abort) { | ||
1500 | mddev->recovery_disabled = 1; | ||
1501 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
1502 | md_done_sync(mddev, r1_bio->sectors, 0); | ||
1503 | put_buf(r1_bio); | ||
1504 | return 0; | ||
1505 | } | ||
1506 | /* Try next page */ | ||
1507 | sectors -= s; | ||
1508 | sect += s; | ||
1509 | idx++; | ||
1510 | continue; | ||
1486 | } | 1511 | } |
1487 | 1512 | ||
1488 | start = d; | 1513 | start = d; |
@@ -1879,7 +1904,9 @@ static void raid1d(mddev_t *mddev) | |||
1879 | if (bio->bi_end_io == NULL) | 1904 | if (bio->bi_end_io == NULL) |
1880 | continue; | 1905 | continue; |
1881 | if (test_bit(BIO_UPTODATE, | 1906 | if (test_bit(BIO_UPTODATE, |
1882 | &bio->bi_flags)) { | 1907 | &bio->bi_flags) && |
1908 | test_bit(R1BIO_MadeGood, | ||
1909 | &r1_bio->state)) { | ||
1883 | rdev_clear_badblocks( | 1910 | rdev_clear_badblocks( |
1884 | rdev, | 1911 | rdev, |
1885 | r1_bio->sector, | 1912 | r1_bio->sector, |