aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-27 21:39:25 -0400
committerNeilBrown <neilb@suse.de>2011-07-27 21:39:25 -0400
commit5e5702898e93eee7d69b6efde109609a89a61001 (patch)
tree87f47cc7e66da78befd16af6d479761b70f7adea /drivers/md/raid10.c
parente684e41db3bad44f1262341300b827c0d94ae220 (diff)
md/raid10: Handle read errors during recovery better.
Currently when we get a read error during recovery, we simply abort the recovery. Instead, repeat the read in page-sized blocks. On successful reads, write to the target. On read errors, record a bad block on the destination, and only if that fails do we abort the recovery. As we now retry reads we need to know where we read from. This was in bi_sector but that can be changed during a read attempt. So store the correct from_addr and to_addr in the r10_bio for later access. Signed-off-by: NeilBrown<neilb@suse.de>
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c154
1 files changed, 121 insertions, 33 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6022e2aaf3aa..fc9ebbab3f62 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1141,7 +1141,7 @@ retry_write:
1141 wake_up(&conf->wait_barrier); 1141 wake_up(&conf->wait_barrier);
1142 1142
1143 if (sectors_handled < (bio->bi_size >> 9)) { 1143 if (sectors_handled < (bio->bi_size >> 9)) {
1144 /* We need another r1_bio. It has already been counted 1144 /* We need another r10_bio. It has already been counted
1145 * in bio->bi_phys_segments. 1145 * in bio->bi_phys_segments.
1146 */ 1146 */
1147 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); 1147 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
@@ -1438,29 +1438,10 @@ static void end_sync_read(struct bio *bio, int error)
1438 } 1438 }
1439} 1439}
1440 1440
1441static void end_sync_write(struct bio *bio, int error) 1441static void end_sync_request(r10bio_t *r10_bio)
1442{ 1442{
1443 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1444 r10bio_t *r10_bio = bio->bi_private;
1445 mddev_t *mddev = r10_bio->mddev; 1443 mddev_t *mddev = r10_bio->mddev;
1446 conf_t *conf = mddev->private;
1447 int d;
1448 sector_t first_bad;
1449 int bad_sectors;
1450 int slot;
1451
1452 d = find_bio_disk(conf, r10_bio, bio, &slot);
1453
1454 if (!uptodate) {
1455 set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags);
1456 set_bit(R10BIO_WriteError, &r10_bio->state);
1457 } else if (is_badblock(conf->mirrors[d].rdev,
1458 r10_bio->devs[slot].addr,
1459 r10_bio->sectors,
1460 &first_bad, &bad_sectors))
1461 set_bit(R10BIO_MadeGood, &r10_bio->state);
1462 1444
1463 rdev_dec_pending(conf->mirrors[d].rdev, mddev);
1464 while (atomic_dec_and_test(&r10_bio->remaining)) { 1445 while (atomic_dec_and_test(&r10_bio->remaining)) {
1465 if (r10_bio->master_bio == NULL) { 1446 if (r10_bio->master_bio == NULL) {
1466 /* the primary of several recovery bios */ 1447 /* the primary of several recovery bios */
@@ -1484,6 +1465,33 @@ static void end_sync_write(struct bio *bio, int error)
1484 } 1465 }
1485} 1466}
1486 1467
1468static void end_sync_write(struct bio *bio, int error)
1469{
1470 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1471 r10bio_t *r10_bio = bio->bi_private;
1472 mddev_t *mddev = r10_bio->mddev;
1473 conf_t *conf = mddev->private;
1474 int d;
1475 sector_t first_bad;
1476 int bad_sectors;
1477 int slot;
1478
1479 d = find_bio_disk(conf, r10_bio, bio, &slot);
1480
1481 if (!uptodate) {
1482 set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags);
1483 set_bit(R10BIO_WriteError, &r10_bio->state);
1484 } else if (is_badblock(conf->mirrors[d].rdev,
1485 r10_bio->devs[slot].addr,
1486 r10_bio->sectors,
1487 &first_bad, &bad_sectors))
1488 set_bit(R10BIO_MadeGood, &r10_bio->state);
1489
1490 rdev_dec_pending(conf->mirrors[d].rdev, mddev);
1491
1492 end_sync_request(r10_bio);
1493}
1494
1487/* 1495/*
1488 * Note: sync and recover and handled very differently for raid10 1496 * Note: sync and recover and handled very differently for raid10
1489 * This code is for resync. 1497 * This code is for resync.
@@ -1600,6 +1608,84 @@ done:
1600 * The second for writing. 1608 * The second for writing.
1601 * 1609 *
1602 */ 1610 */
1611static void fix_recovery_read_error(r10bio_t *r10_bio)
1612{
1613 /* We got a read error during recovery.
1614 * We repeat the read in smaller page-sized sections.
1615 * If a read succeeds, write it to the new device or record
1616 * a bad block if we cannot.
1617 * If a read fails, record a bad block on both old and
1618 * new devices.
1619 */
1620 mddev_t *mddev = r10_bio->mddev;
1621 conf_t *conf = mddev->private;
1622 struct bio *bio = r10_bio->devs[0].bio;
1623 sector_t sect = 0;
1624 int sectors = r10_bio->sectors;
1625 int idx = 0;
1626 int dr = r10_bio->devs[0].devnum;
1627 int dw = r10_bio->devs[1].devnum;
1628
1629 while (sectors) {
1630 int s = sectors;
1631 mdk_rdev_t *rdev;
1632 sector_t addr;
1633 int ok;
1634
1635 if (s > (PAGE_SIZE>>9))
1636 s = PAGE_SIZE >> 9;
1637
1638 rdev = conf->mirrors[dr].rdev;
1639 addr = r10_bio->devs[0].addr + sect,
1640 ok = sync_page_io(rdev,
1641 addr,
1642 s << 9,
1643 bio->bi_io_vec[idx].bv_page,
1644 READ, false);
1645 if (ok) {
1646 rdev = conf->mirrors[dw].rdev;
1647 addr = r10_bio->devs[1].addr + sect;
1648 ok = sync_page_io(rdev,
1649 addr,
1650 s << 9,
1651 bio->bi_io_vec[idx].bv_page,
1652 WRITE, false);
1653 if (!ok)
1654 set_bit(WriteErrorSeen, &rdev->flags);
1655 }
1656 if (!ok) {
1657 /* We don't worry if we cannot set a bad block -
1658 * it really is bad so there is no loss in not
1659 * recording it yet
1660 */
1661 rdev_set_badblocks(rdev, addr, s, 0);
1662
1663 if (rdev != conf->mirrors[dw].rdev) {
1664 /* need bad block on destination too */
1665 mdk_rdev_t *rdev2 = conf->mirrors[dw].rdev;
1666 addr = r10_bio->devs[1].addr + sect;
1667 ok = rdev_set_badblocks(rdev2, addr, s, 0);
1668 if (!ok) {
1669 /* just abort the recovery */
1670 printk(KERN_NOTICE
1671 "md/raid10:%s: recovery aborted"
1672 " due to read error\n",
1673 mdname(mddev));
1674
1675 conf->mirrors[dw].recovery_disabled
1676 = mddev->recovery_disabled;
1677 set_bit(MD_RECOVERY_INTR,
1678 &mddev->recovery);
1679 break;
1680 }
1681 }
1682 }
1683
1684 sectors -= s;
1685 sect += s;
1686 idx++;
1687 }
1688}
1603 1689
1604static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) 1690static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
1605{ 1691{
@@ -1607,6 +1693,12 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
1607 int d; 1693 int d;
1608 struct bio *wbio; 1694 struct bio *wbio;
1609 1695
1696 if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
1697 fix_recovery_read_error(r10_bio);
1698 end_sync_request(r10_bio);
1699 return;
1700 }
1701
1610 /* 1702 /*
1611 * share the pages with the first bio 1703 * share the pages with the first bio
1612 * and submit the write request 1704 * and submit the write request
@@ -1616,16 +1708,7 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
1616 1708
1617 atomic_inc(&conf->mirrors[d].rdev->nr_pending); 1709 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
1618 md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9); 1710 md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
1619 if (test_bit(R10BIO_Uptodate, &r10_bio->state)) 1711 generic_make_request(wbio);
1620 generic_make_request(wbio);
1621 else {
1622 printk(KERN_NOTICE
1623 "md/raid10:%s: recovery aborted due to read error\n",
1624 mdname(mddev));
1625 conf->mirrors[d].recovery_disabled = mddev->recovery_disabled;
1626 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1627 bio_endio(wbio, 0);
1628 }
1629} 1712}
1630 1713
1631 1714
@@ -2339,6 +2422,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
2339 for (j=0; j<conf->copies;j++) { 2422 for (j=0; j<conf->copies;j++) {
2340 int k; 2423 int k;
2341 int d = r10_bio->devs[j].devnum; 2424 int d = r10_bio->devs[j].devnum;
2425 sector_t from_addr, to_addr;
2342 mdk_rdev_t *rdev; 2426 mdk_rdev_t *rdev;
2343 sector_t sector, first_bad; 2427 sector_t sector, first_bad;
2344 int bad_sectors; 2428 int bad_sectors;
@@ -2368,7 +2452,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
2368 bio->bi_private = r10_bio; 2452 bio->bi_private = r10_bio;
2369 bio->bi_end_io = end_sync_read; 2453 bio->bi_end_io = end_sync_read;
2370 bio->bi_rw = READ; 2454 bio->bi_rw = READ;
2371 bio->bi_sector = r10_bio->devs[j].addr + 2455 from_addr = r10_bio->devs[j].addr;
2456 bio->bi_sector = from_addr +
2372 conf->mirrors[d].rdev->data_offset; 2457 conf->mirrors[d].rdev->data_offset;
2373 bio->bi_bdev = conf->mirrors[d].rdev->bdev; 2458 bio->bi_bdev = conf->mirrors[d].rdev->bdev;
2374 atomic_inc(&conf->mirrors[d].rdev->nr_pending); 2459 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
@@ -2385,12 +2470,15 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
2385 bio->bi_private = r10_bio; 2470 bio->bi_private = r10_bio;
2386 bio->bi_end_io = end_sync_write; 2471 bio->bi_end_io = end_sync_write;
2387 bio->bi_rw = WRITE; 2472 bio->bi_rw = WRITE;
2388 bio->bi_sector = r10_bio->devs[k].addr + 2473 to_addr = r10_bio->devs[k].addr;
2474 bio->bi_sector = to_addr +
2389 conf->mirrors[i].rdev->data_offset; 2475 conf->mirrors[i].rdev->data_offset;
2390 bio->bi_bdev = conf->mirrors[i].rdev->bdev; 2476 bio->bi_bdev = conf->mirrors[i].rdev->bdev;
2391 2477
2392 r10_bio->devs[0].devnum = d; 2478 r10_bio->devs[0].devnum = d;
2479 r10_bio->devs[0].addr = from_addr;
2393 r10_bio->devs[1].devnum = i; 2480 r10_bio->devs[1].devnum = i;
2481 r10_bio->devs[1].addr = to_addr;
2394 2482
2395 break; 2483 break;
2396 } 2484 }