aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-27 21:33:00 -0400
committerNeilBrown <neilb@suse.de>2011-07-27 21:33:00 -0400
commitd8f05d2995d467a91db1af01637e6ffd94660ca8 (patch)
treee35959bc4f6c370c982df8222eb818c720b94ffd
parentcd5ff9a16f0831f68c1024d5d776075b5123b034 (diff)
md/raid1: record badblocks found during resync etc.
If we find a bad block while writing as part of resync/recovery we need to report that back to raid1d which must record the bad block, or fail the device. Similarly when fixing a read error, a further error should just record a bad block if possible rather than failing the device. Signed-off-by: NeilBrown <neilb@suse.de> Reviewed-by: Namhyung Kim <namhyung@gmail.com>
-rw-r--r--drivers/md/raid1.c81
1 files changed, 51 insertions, 30 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b45d641f4f33..e6957151233b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1386,7 +1386,9 @@ static void end_sync_write(struct bio *bio, int error)
1386 s += sync_blocks; 1386 s += sync_blocks;
1387 sectors_to_go -= sync_blocks; 1387 sectors_to_go -= sync_blocks;
1388 } while (sectors_to_go > 0); 1388 } while (sectors_to_go > 0);
1389 md_error(mddev, conf->mirrors[mirror].rdev); 1389 set_bit(WriteErrorSeen,
1390 &conf->mirrors[mirror].rdev->flags);
1391 set_bit(R1BIO_WriteError, &r1_bio->state);
1390 } else if (is_badblock(conf->mirrors[mirror].rdev, 1392 } else if (is_badblock(conf->mirrors[mirror].rdev,
1391 r1_bio->sector, 1393 r1_bio->sector,
1392 r1_bio->sectors, 1394 r1_bio->sectors,
@@ -1397,7 +1399,8 @@ static void end_sync_write(struct bio *bio, int error)
1397 1399
1398 if (atomic_dec_and_test(&r1_bio->remaining)) { 1400 if (atomic_dec_and_test(&r1_bio->remaining)) {
1399 int s = r1_bio->sectors; 1401 int s = r1_bio->sectors;
1400 if (test_bit(R1BIO_MadeGood, &r1_bio->state)) 1402 if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
1403 test_bit(R1BIO_WriteError, &r1_bio->state))
1401 reschedule_retry(r1_bio); 1404 reschedule_retry(r1_bio);
1402 else { 1405 else {
1403 put_buf(r1_bio); 1406 put_buf(r1_bio);
@@ -1406,6 +1409,20 @@ static void end_sync_write(struct bio *bio, int error)
1406 } 1409 }
1407} 1410}
1408 1411
1412static int r1_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
1413 int sectors, struct page *page, int rw)
1414{
1415 if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
1416 /* success */
1417 return 1;
1418 if (rw == WRITE)
1419 set_bit(WriteErrorSeen, &rdev->flags);
1420 /* need to record an error - either for the block or the device */
1421 if (!rdev_set_badblocks(rdev, sector, sectors, 0))
1422 md_error(rdev->mddev, rdev);
1423 return 0;
1424}
1425
1409static int fix_sync_read_error(r1bio_t *r1_bio) 1426static int fix_sync_read_error(r1bio_t *r1_bio)
1410{ 1427{
1411 /* Try some synchronous reads of other devices to get 1428 /* Try some synchronous reads of other devices to get
@@ -1477,12 +1494,11 @@ static int fix_sync_read_error(r1bio_t *r1_bio)
1477 if (r1_bio->bios[d]->bi_end_io != end_sync_read) 1494 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1478 continue; 1495 continue;
1479 rdev = conf->mirrors[d].rdev; 1496 rdev = conf->mirrors[d].rdev;
1480 if (sync_page_io(rdev, sect, s<<9, 1497 if (r1_sync_page_io(rdev, sect, s,
1481 bio->bi_io_vec[idx].bv_page, 1498 bio->bi_io_vec[idx].bv_page,
1482 WRITE, false) == 0) { 1499 WRITE) == 0) {
1483 r1_bio->bios[d]->bi_end_io = NULL; 1500 r1_bio->bios[d]->bi_end_io = NULL;
1484 rdev_dec_pending(rdev, mddev); 1501 rdev_dec_pending(rdev, mddev);
1485 md_error(mddev, rdev);
1486 } 1502 }
1487 } 1503 }
1488 d = start; 1504 d = start;
@@ -1493,11 +1509,9 @@ static int fix_sync_read_error(r1bio_t *r1_bio)
1493 if (r1_bio->bios[d]->bi_end_io != end_sync_read) 1509 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1494 continue; 1510 continue;
1495 rdev = conf->mirrors[d].rdev; 1511 rdev = conf->mirrors[d].rdev;
1496 if (sync_page_io(rdev, sect, s<<9, 1512 if (r1_sync_page_io(rdev, sect, s,
1497 bio->bi_io_vec[idx].bv_page, 1513 bio->bi_io_vec[idx].bv_page,
1498 READ, false) == 0) 1514 READ) != 0)
1499 md_error(mddev, rdev);
1500 else
1501 atomic_add(s, &rdev->corrected_errors); 1515 atomic_add(s, &rdev->corrected_errors);
1502 } 1516 }
1503 sectors -= s; 1517 sectors -= s;
@@ -1682,8 +1696,10 @@ static void fix_read_error(conf_t *conf, int read_disk,
1682 } while (!success && d != read_disk); 1696 } while (!success && d != read_disk);
1683 1697
1684 if (!success) { 1698 if (!success) {
1685 /* Cannot read from anywhere -- bye bye array */ 1699 /* Cannot read from anywhere - mark it bad */
1686 md_error(mddev, conf->mirrors[read_disk].rdev); 1700 mdk_rdev_t *rdev = conf->mirrors[read_disk].rdev;
1701 if (!rdev_set_badblocks(rdev, sect, s, 0))
1702 md_error(mddev, rdev);
1687 break; 1703 break;
1688 } 1704 }
1689 /* write it back and re-read */ 1705 /* write it back and re-read */
@@ -1694,13 +1710,9 @@ static void fix_read_error(conf_t *conf, int read_disk,
1694 d--; 1710 d--;
1695 rdev = conf->mirrors[d].rdev; 1711 rdev = conf->mirrors[d].rdev;
1696 if (rdev && 1712 if (rdev &&
1697 test_bit(In_sync, &rdev->flags)) { 1713 test_bit(In_sync, &rdev->flags))
1698 if (sync_page_io(rdev, sect, s<<9, 1714 r1_sync_page_io(rdev, sect, s,
1699 conf->tmppage, WRITE, false) 1715 conf->tmppage, WRITE);
1700 == 0)
1701 /* Well, this device is dead */
1702 md_error(mddev, rdev);
1703 }
1704 } 1716 }
1705 d = start; 1717 d = start;
1706 while (d != read_disk) { 1718 while (d != read_disk) {
@@ -1711,12 +1723,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
1711 rdev = conf->mirrors[d].rdev; 1723 rdev = conf->mirrors[d].rdev;
1712 if (rdev && 1724 if (rdev &&
1713 test_bit(In_sync, &rdev->flags)) { 1725 test_bit(In_sync, &rdev->flags)) {
1714 if (sync_page_io(rdev, sect, s<<9, 1726 if (r1_sync_page_io(rdev, sect, s,
1715 conf->tmppage, READ, false) 1727 conf->tmppage, READ)) {
1716 == 0)
1717 /* Well, this device is dead */
1718 md_error(mddev, rdev);
1719 else {
1720 atomic_add(s, &rdev->corrected_errors); 1728 atomic_add(s, &rdev->corrected_errors);
1721 printk(KERN_INFO 1729 printk(KERN_INFO
1722 "md/raid1:%s: read error corrected " 1730 "md/raid1:%s: read error corrected "
@@ -1860,20 +1868,33 @@ static void raid1d(mddev_t *mddev)
1860 mddev = r1_bio->mddev; 1868 mddev = r1_bio->mddev;
1861 conf = mddev->private; 1869 conf = mddev->private;
1862 if (test_bit(R1BIO_IsSync, &r1_bio->state)) { 1870 if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
1863 if (test_bit(R1BIO_MadeGood, &r1_bio->state)) { 1871 if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
1872 test_bit(R1BIO_WriteError, &r1_bio->state)) {
1864 int m; 1873 int m;
1865 int s = r1_bio->sectors; 1874 int s = r1_bio->sectors;
1866 for (m = 0; m < conf->raid_disks ; m++) { 1875 for (m = 0; m < conf->raid_disks ; m++) {
1876 mdk_rdev_t *rdev
1877 = conf->mirrors[m].rdev;
1867 struct bio *bio = r1_bio->bios[m]; 1878 struct bio *bio = r1_bio->bios[m];
1868 if (bio->bi_end_io != NULL && 1879 if (bio->bi_end_io == NULL)
1869 test_bit(BIO_UPTODATE, 1880 continue;
1881 if (test_bit(BIO_UPTODATE,
1870 &bio->bi_flags)) { 1882 &bio->bi_flags)) {
1871 rdev = conf->mirrors[m].rdev;
1872 rdev_clear_badblocks( 1883 rdev_clear_badblocks(
1873 rdev, 1884 rdev,
1874 r1_bio->sector, 1885 r1_bio->sector,
1875 r1_bio->sectors); 1886 r1_bio->sectors);
1876 } 1887 }
1888 if (!test_bit(BIO_UPTODATE,
1889 &bio->bi_flags) &&
1890 test_bit(R1BIO_WriteError,
1891 &r1_bio->state)) {
1892 if (!rdev_set_badblocks(
1893 rdev,
1894 r1_bio->sector,
1895 r1_bio->sectors, 0))
1896 md_error(mddev, rdev);
1897 }
1877 } 1898 }
1878 put_buf(r1_bio); 1899 put_buf(r1_bio);
1879 md_done_sync(mddev, s, 1); 1900 md_done_sync(mddev, s, 1);