diff options
author | NeilBrown <neilb@suse.de> | 2011-07-27 21:33:00 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2011-07-27 21:33:00 -0400 |
commit | d8f05d2995d467a91db1af01637e6ffd94660ca8 (patch) | |
tree | e35959bc4f6c370c982df8222eb818c720b94ffd | |
parent | cd5ff9a16f0831f68c1024d5d776075b5123b034 (diff) |
md/raid1: record badblocks found during resync etc.
If we find a bad block while writing as part of resync/recovery we
need to report that back to raid1d which must record the bad block,
or fail the device.
Similarly when fixing a read error, a further error should just
record a bad block if possible rather than failing the device.
Signed-off-by: NeilBrown <neilb@suse.de>
Reviewed-by: Namhyung Kim <namhyung@gmail.com>
-rw-r--r-- | drivers/md/raid1.c | 81 |
1 files changed, 51 insertions, 30 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index b45d641f4f33..e6957151233b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -1386,7 +1386,9 @@ static void end_sync_write(struct bio *bio, int error) | |||
1386 | s += sync_blocks; | 1386 | s += sync_blocks; |
1387 | sectors_to_go -= sync_blocks; | 1387 | sectors_to_go -= sync_blocks; |
1388 | } while (sectors_to_go > 0); | 1388 | } while (sectors_to_go > 0); |
1389 | md_error(mddev, conf->mirrors[mirror].rdev); | 1389 | set_bit(WriteErrorSeen, |
1390 | &conf->mirrors[mirror].rdev->flags); | ||
1391 | set_bit(R1BIO_WriteError, &r1_bio->state); | ||
1390 | } else if (is_badblock(conf->mirrors[mirror].rdev, | 1392 | } else if (is_badblock(conf->mirrors[mirror].rdev, |
1391 | r1_bio->sector, | 1393 | r1_bio->sector, |
1392 | r1_bio->sectors, | 1394 | r1_bio->sectors, |
@@ -1397,7 +1399,8 @@ static void end_sync_write(struct bio *bio, int error) | |||
1397 | 1399 | ||
1398 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 1400 | if (atomic_dec_and_test(&r1_bio->remaining)) { |
1399 | int s = r1_bio->sectors; | 1401 | int s = r1_bio->sectors; |
1400 | if (test_bit(R1BIO_MadeGood, &r1_bio->state)) | 1402 | if (test_bit(R1BIO_MadeGood, &r1_bio->state) || |
1403 | test_bit(R1BIO_WriteError, &r1_bio->state)) | ||
1401 | reschedule_retry(r1_bio); | 1404 | reschedule_retry(r1_bio); |
1402 | else { | 1405 | else { |
1403 | put_buf(r1_bio); | 1406 | put_buf(r1_bio); |
@@ -1406,6 +1409,20 @@ static void end_sync_write(struct bio *bio, int error) | |||
1406 | } | 1409 | } |
1407 | } | 1410 | } |
1408 | 1411 | ||
1412 | static int r1_sync_page_io(mdk_rdev_t *rdev, sector_t sector, | ||
1413 | int sectors, struct page *page, int rw) | ||
1414 | { | ||
1415 | if (sync_page_io(rdev, sector, sectors << 9, page, rw, false)) | ||
1416 | /* success */ | ||
1417 | return 1; | ||
1418 | if (rw == WRITE) | ||
1419 | set_bit(WriteErrorSeen, &rdev->flags); | ||
1420 | /* need to record an error - either for the block or the device */ | ||
1421 | if (!rdev_set_badblocks(rdev, sector, sectors, 0)) | ||
1422 | md_error(rdev->mddev, rdev); | ||
1423 | return 0; | ||
1424 | } | ||
1425 | |||
1409 | static int fix_sync_read_error(r1bio_t *r1_bio) | 1426 | static int fix_sync_read_error(r1bio_t *r1_bio) |
1410 | { | 1427 | { |
1411 | /* Try some synchronous reads of other devices to get | 1428 | /* Try some synchronous reads of other devices to get |
@@ -1477,12 +1494,11 @@ static int fix_sync_read_error(r1bio_t *r1_bio) | |||
1477 | if (r1_bio->bios[d]->bi_end_io != end_sync_read) | 1494 | if (r1_bio->bios[d]->bi_end_io != end_sync_read) |
1478 | continue; | 1495 | continue; |
1479 | rdev = conf->mirrors[d].rdev; | 1496 | rdev = conf->mirrors[d].rdev; |
1480 | if (sync_page_io(rdev, sect, s<<9, | 1497 | if (r1_sync_page_io(rdev, sect, s, |
1481 | bio->bi_io_vec[idx].bv_page, | 1498 | bio->bi_io_vec[idx].bv_page, |
1482 | WRITE, false) == 0) { | 1499 | WRITE) == 0) { |
1483 | r1_bio->bios[d]->bi_end_io = NULL; | 1500 | r1_bio->bios[d]->bi_end_io = NULL; |
1484 | rdev_dec_pending(rdev, mddev); | 1501 | rdev_dec_pending(rdev, mddev); |
1485 | md_error(mddev, rdev); | ||
1486 | } | 1502 | } |
1487 | } | 1503 | } |
1488 | d = start; | 1504 | d = start; |
@@ -1493,11 +1509,9 @@ static int fix_sync_read_error(r1bio_t *r1_bio) | |||
1493 | if (r1_bio->bios[d]->bi_end_io != end_sync_read) | 1509 | if (r1_bio->bios[d]->bi_end_io != end_sync_read) |
1494 | continue; | 1510 | continue; |
1495 | rdev = conf->mirrors[d].rdev; | 1511 | rdev = conf->mirrors[d].rdev; |
1496 | if (sync_page_io(rdev, sect, s<<9, | 1512 | if (r1_sync_page_io(rdev, sect, s, |
1497 | bio->bi_io_vec[idx].bv_page, | 1513 | bio->bi_io_vec[idx].bv_page, |
1498 | READ, false) == 0) | 1514 | READ) != 0) |
1499 | md_error(mddev, rdev); | ||
1500 | else | ||
1501 | atomic_add(s, &rdev->corrected_errors); | 1515 | atomic_add(s, &rdev->corrected_errors); |
1502 | } | 1516 | } |
1503 | sectors -= s; | 1517 | sectors -= s; |
@@ -1682,8 +1696,10 @@ static void fix_read_error(conf_t *conf, int read_disk, | |||
1682 | } while (!success && d != read_disk); | 1696 | } while (!success && d != read_disk); |
1683 | 1697 | ||
1684 | if (!success) { | 1698 | if (!success) { |
1685 | /* Cannot read from anywhere -- bye bye array */ | 1699 | /* Cannot read from anywhere - mark it bad */ |
1686 | md_error(mddev, conf->mirrors[read_disk].rdev); | 1700 | mdk_rdev_t *rdev = conf->mirrors[read_disk].rdev; |
1701 | if (!rdev_set_badblocks(rdev, sect, s, 0)) | ||
1702 | md_error(mddev, rdev); | ||
1687 | break; | 1703 | break; |
1688 | } | 1704 | } |
1689 | /* write it back and re-read */ | 1705 | /* write it back and re-read */ |
@@ -1694,13 +1710,9 @@ static void fix_read_error(conf_t *conf, int read_disk, | |||
1694 | d--; | 1710 | d--; |
1695 | rdev = conf->mirrors[d].rdev; | 1711 | rdev = conf->mirrors[d].rdev; |
1696 | if (rdev && | 1712 | if (rdev && |
1697 | test_bit(In_sync, &rdev->flags)) { | 1713 | test_bit(In_sync, &rdev->flags)) |
1698 | if (sync_page_io(rdev, sect, s<<9, | 1714 | r1_sync_page_io(rdev, sect, s, |
1699 | conf->tmppage, WRITE, false) | 1715 | conf->tmppage, WRITE); |
1700 | == 0) | ||
1701 | /* Well, this device is dead */ | ||
1702 | md_error(mddev, rdev); | ||
1703 | } | ||
1704 | } | 1716 | } |
1705 | d = start; | 1717 | d = start; |
1706 | while (d != read_disk) { | 1718 | while (d != read_disk) { |
@@ -1711,12 +1723,8 @@ static void fix_read_error(conf_t *conf, int read_disk, | |||
1711 | rdev = conf->mirrors[d].rdev; | 1723 | rdev = conf->mirrors[d].rdev; |
1712 | if (rdev && | 1724 | if (rdev && |
1713 | test_bit(In_sync, &rdev->flags)) { | 1725 | test_bit(In_sync, &rdev->flags)) { |
1714 | if (sync_page_io(rdev, sect, s<<9, | 1726 | if (r1_sync_page_io(rdev, sect, s, |
1715 | conf->tmppage, READ, false) | 1727 | conf->tmppage, READ)) { |
1716 | == 0) | ||
1717 | /* Well, this device is dead */ | ||
1718 | md_error(mddev, rdev); | ||
1719 | else { | ||
1720 | atomic_add(s, &rdev->corrected_errors); | 1728 | atomic_add(s, &rdev->corrected_errors); |
1721 | printk(KERN_INFO | 1729 | printk(KERN_INFO |
1722 | "md/raid1:%s: read error corrected " | 1730 | "md/raid1:%s: read error corrected " |
@@ -1860,20 +1868,33 @@ static void raid1d(mddev_t *mddev) | |||
1860 | mddev = r1_bio->mddev; | 1868 | mddev = r1_bio->mddev; |
1861 | conf = mddev->private; | 1869 | conf = mddev->private; |
1862 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) { | 1870 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) { |
1863 | if (test_bit(R1BIO_MadeGood, &r1_bio->state)) { | 1871 | if (test_bit(R1BIO_MadeGood, &r1_bio->state) || |
1872 | test_bit(R1BIO_WriteError, &r1_bio->state)) { | ||
1864 | int m; | 1873 | int m; |
1865 | int s = r1_bio->sectors; | 1874 | int s = r1_bio->sectors; |
1866 | for (m = 0; m < conf->raid_disks ; m++) { | 1875 | for (m = 0; m < conf->raid_disks ; m++) { |
1876 | mdk_rdev_t *rdev | ||
1877 | = conf->mirrors[m].rdev; | ||
1867 | struct bio *bio = r1_bio->bios[m]; | 1878 | struct bio *bio = r1_bio->bios[m]; |
1868 | if (bio->bi_end_io != NULL && | 1879 | if (bio->bi_end_io == NULL) |
1869 | test_bit(BIO_UPTODATE, | 1880 | continue; |
1881 | if (test_bit(BIO_UPTODATE, | ||
1870 | &bio->bi_flags)) { | 1882 | &bio->bi_flags)) { |
1871 | rdev = conf->mirrors[m].rdev; | ||
1872 | rdev_clear_badblocks( | 1883 | rdev_clear_badblocks( |
1873 | rdev, | 1884 | rdev, |
1874 | r1_bio->sector, | 1885 | r1_bio->sector, |
1875 | r1_bio->sectors); | 1886 | r1_bio->sectors); |
1876 | } | 1887 | } |
1888 | if (!test_bit(BIO_UPTODATE, | ||
1889 | &bio->bi_flags) && | ||
1890 | test_bit(R1BIO_WriteError, | ||
1891 | &r1_bio->state)) { | ||
1892 | if (!rdev_set_badblocks( | ||
1893 | rdev, | ||
1894 | r1_bio->sector, | ||
1895 | r1_bio->sectors, 0)) | ||
1896 | md_error(mddev, rdev); | ||
1897 | } | ||
1877 | } | 1898 | } |
1878 | put_buf(r1_bio); | 1899 | put_buf(r1_bio); |
1879 | md_done_sync(mddev, s, 1); | 1900 | md_done_sync(mddev, s, 1); |