diff options
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 424 |
1 files changed, 203 insertions, 221 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8e9462626ec5..6e846688962f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -271,9 +271,10 @@ static void raid10_end_read_request(struct bio *bio, int error) | |||
271 | */ | 271 | */ |
272 | set_bit(R10BIO_Uptodate, &r10_bio->state); | 272 | set_bit(R10BIO_Uptodate, &r10_bio->state); |
273 | raid_end_bio_io(r10_bio); | 273 | raid_end_bio_io(r10_bio); |
274 | rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); | ||
274 | } else { | 275 | } else { |
275 | /* | 276 | /* |
276 | * oops, read error: | 277 | * oops, read error - keep the refcount on the rdev |
277 | */ | 278 | */ |
278 | char b[BDEVNAME_SIZE]; | 279 | char b[BDEVNAME_SIZE]; |
279 | if (printk_ratelimit()) | 280 | if (printk_ratelimit()) |
@@ -282,8 +283,6 @@ static void raid10_end_read_request(struct bio *bio, int error) | |||
282 | bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector); | 283 | bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector); |
283 | reschedule_retry(r10_bio); | 284 | reschedule_retry(r10_bio); |
284 | } | 285 | } |
285 | |||
286 | rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); | ||
287 | } | 286 | } |
288 | 287 | ||
289 | static void raid10_end_write_request(struct bio *bio, int error) | 288 | static void raid10_end_write_request(struct bio *bio, int error) |
@@ -488,13 +487,19 @@ static int raid10_mergeable_bvec(struct request_queue *q, | |||
488 | static int read_balance(conf_t *conf, r10bio_t *r10_bio) | 487 | static int read_balance(conf_t *conf, r10bio_t *r10_bio) |
489 | { | 488 | { |
490 | const sector_t this_sector = r10_bio->sector; | 489 | const sector_t this_sector = r10_bio->sector; |
491 | int disk, slot, nslot; | 490 | int disk, slot; |
492 | const int sectors = r10_bio->sectors; | 491 | const int sectors = r10_bio->sectors; |
493 | sector_t new_distance, current_distance; | 492 | sector_t new_distance, best_dist; |
494 | mdk_rdev_t *rdev; | 493 | mdk_rdev_t *rdev; |
494 | int do_balance; | ||
495 | int best_slot; | ||
495 | 496 | ||
496 | raid10_find_phys(conf, r10_bio); | 497 | raid10_find_phys(conf, r10_bio); |
497 | rcu_read_lock(); | 498 | rcu_read_lock(); |
499 | retry: | ||
500 | best_slot = -1; | ||
501 | best_dist = MaxSector; | ||
502 | do_balance = 1; | ||
498 | /* | 503 | /* |
499 | * Check if we can balance. We can balance on the whole | 504 | * Check if we can balance. We can balance on the whole |
500 | * device if no resync is going on (recovery is ok), or below | 505 | * device if no resync is going on (recovery is ok), or below |
@@ -502,86 +507,58 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) | |||
502 | * above the resync window. | 507 | * above the resync window. |
503 | */ | 508 | */ |
504 | if (conf->mddev->recovery_cp < MaxSector | 509 | if (conf->mddev->recovery_cp < MaxSector |
505 | && (this_sector + sectors >= conf->next_resync)) { | 510 | && (this_sector + sectors >= conf->next_resync)) |
506 | /* make sure that disk is operational */ | 511 | do_balance = 0; |
507 | slot = 0; | ||
508 | disk = r10_bio->devs[slot].devnum; | ||
509 | |||
510 | while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL || | ||
511 | r10_bio->devs[slot].bio == IO_BLOCKED || | ||
512 | !test_bit(In_sync, &rdev->flags)) { | ||
513 | slot++; | ||
514 | if (slot == conf->copies) { | ||
515 | slot = 0; | ||
516 | disk = -1; | ||
517 | break; | ||
518 | } | ||
519 | disk = r10_bio->devs[slot].devnum; | ||
520 | } | ||
521 | goto rb_out; | ||
522 | } | ||
523 | |||
524 | 512 | ||
525 | /* make sure the disk is operational */ | 513 | for (slot = 0; slot < conf->copies ; slot++) { |
526 | slot = 0; | 514 | if (r10_bio->devs[slot].bio == IO_BLOCKED) |
527 | disk = r10_bio->devs[slot].devnum; | 515 | continue; |
528 | while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL || | ||
529 | r10_bio->devs[slot].bio == IO_BLOCKED || | ||
530 | !test_bit(In_sync, &rdev->flags)) { | ||
531 | slot ++; | ||
532 | if (slot == conf->copies) { | ||
533 | disk = -1; | ||
534 | goto rb_out; | ||
535 | } | ||
536 | disk = r10_bio->devs[slot].devnum; | 516 | disk = r10_bio->devs[slot].devnum; |
537 | } | 517 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
538 | 518 | if (rdev == NULL) | |
539 | |||
540 | current_distance = abs(r10_bio->devs[slot].addr - | ||
541 | conf->mirrors[disk].head_position); | ||
542 | |||
543 | /* Find the disk whose head is closest, | ||
544 | * or - for far > 1 - find the closest to partition beginning */ | ||
545 | |||
546 | for (nslot = slot; nslot < conf->copies; nslot++) { | ||
547 | int ndisk = r10_bio->devs[nslot].devnum; | ||
548 | |||
549 | |||
550 | if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL || | ||
551 | r10_bio->devs[nslot].bio == IO_BLOCKED || | ||
552 | !test_bit(In_sync, &rdev->flags)) | ||
553 | continue; | 519 | continue; |
520 | if (!test_bit(In_sync, &rdev->flags)) | ||
521 | continue; | ||
522 | |||
523 | if (!do_balance) | ||
524 | break; | ||
554 | 525 | ||
555 | /* This optimisation is debatable, and completely destroys | 526 | /* This optimisation is debatable, and completely destroys |
556 | * sequential read speed for 'far copies' arrays. So only | 527 | * sequential read speed for 'far copies' arrays. So only |
557 | * keep it for 'near' arrays, and review those later. | 528 | * keep it for 'near' arrays, and review those later. |
558 | */ | 529 | */ |
559 | if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending)) { | 530 | if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending)) |
560 | disk = ndisk; | ||
561 | slot = nslot; | ||
562 | break; | 531 | break; |
563 | } | ||
564 | 532 | ||
565 | /* for far > 1 always use the lowest address */ | 533 | /* for far > 1 always use the lowest address */ |
566 | if (conf->far_copies > 1) | 534 | if (conf->far_copies > 1) |
567 | new_distance = r10_bio->devs[nslot].addr; | 535 | new_distance = r10_bio->devs[slot].addr; |
568 | else | 536 | else |
569 | new_distance = abs(r10_bio->devs[nslot].addr - | 537 | new_distance = abs(r10_bio->devs[slot].addr - |
570 | conf->mirrors[ndisk].head_position); | 538 | conf->mirrors[disk].head_position); |
571 | if (new_distance < current_distance) { | 539 | if (new_distance < best_dist) { |
572 | current_distance = new_distance; | 540 | best_dist = new_distance; |
573 | disk = ndisk; | 541 | best_slot = slot; |
574 | slot = nslot; | ||
575 | } | 542 | } |
576 | } | 543 | } |
544 | if (slot == conf->copies) | ||
545 | slot = best_slot; | ||
577 | 546 | ||
578 | rb_out: | 547 | if (slot >= 0) { |
579 | r10_bio->read_slot = slot; | 548 | disk = r10_bio->devs[slot].devnum; |
580 | /* conf->next_seq_sect = this_sector + sectors;*/ | 549 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
581 | 550 | if (!rdev) | |
582 | if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL) | 551 | goto retry; |
583 | atomic_inc(&conf->mirrors[disk].rdev->nr_pending); | 552 | atomic_inc(&rdev->nr_pending); |
584 | else | 553 | if (test_bit(Faulty, &rdev->flags)) { |
554 | /* Cannot risk returning a device that failed | ||
555 | * before we inc'ed nr_pending | ||
556 | */ | ||
557 | rdev_dec_pending(rdev, conf->mddev); | ||
558 | goto retry; | ||
559 | } | ||
560 | r10_bio->read_slot = slot; | ||
561 | } else | ||
585 | disk = -1; | 562 | disk = -1; |
586 | rcu_read_unlock(); | 563 | rcu_read_unlock(); |
587 | 564 | ||
@@ -1460,40 +1437,33 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1460 | int max_read_errors = atomic_read(&mddev->max_corr_read_errors); | 1437 | int max_read_errors = atomic_read(&mddev->max_corr_read_errors); |
1461 | int d = r10_bio->devs[r10_bio->read_slot].devnum; | 1438 | int d = r10_bio->devs[r10_bio->read_slot].devnum; |
1462 | 1439 | ||
1463 | rcu_read_lock(); | 1440 | /* still own a reference to this rdev, so it cannot |
1464 | rdev = rcu_dereference(conf->mirrors[d].rdev); | 1441 | * have been cleared recently. |
1465 | if (rdev) { /* If rdev is not NULL */ | 1442 | */ |
1466 | char b[BDEVNAME_SIZE]; | 1443 | rdev = conf->mirrors[d].rdev; |
1467 | int cur_read_error_count = 0; | ||
1468 | 1444 | ||
1469 | bdevname(rdev->bdev, b); | 1445 | if (test_bit(Faulty, &rdev->flags)) |
1446 | /* drive has already been failed, just ignore any | ||
1447 | more fix_read_error() attempts */ | ||
1448 | return; | ||
1470 | 1449 | ||
1471 | if (test_bit(Faulty, &rdev->flags)) { | 1450 | check_decay_read_errors(mddev, rdev); |
1472 | rcu_read_unlock(); | 1451 | atomic_inc(&rdev->read_errors); |
1473 | /* drive has already been failed, just ignore any | 1452 | if (atomic_read(&rdev->read_errors) > max_read_errors) { |
1474 | more fix_read_error() attempts */ | 1453 | char b[BDEVNAME_SIZE]; |
1475 | return; | 1454 | bdevname(rdev->bdev, b); |
1476 | } | ||
1477 | 1455 | ||
1478 | check_decay_read_errors(mddev, rdev); | 1456 | printk(KERN_NOTICE |
1479 | atomic_inc(&rdev->read_errors); | 1457 | "md/raid10:%s: %s: Raid device exceeded " |
1480 | cur_read_error_count = atomic_read(&rdev->read_errors); | 1458 | "read_error threshold [cur %d:max %d]\n", |
1481 | if (cur_read_error_count > max_read_errors) { | 1459 | mdname(mddev), b, |
1482 | rcu_read_unlock(); | 1460 | atomic_read(&rdev->read_errors), max_read_errors); |
1483 | printk(KERN_NOTICE | 1461 | printk(KERN_NOTICE |
1484 | "md/raid10:%s: %s: Raid device exceeded " | 1462 | "md/raid10:%s: %s: Failing raid device\n", |
1485 | "read_error threshold " | 1463 | mdname(mddev), b); |
1486 | "[cur %d:max %d]\n", | 1464 | md_error(mddev, conf->mirrors[d].rdev); |
1487 | mdname(mddev), | 1465 | return; |
1488 | b, cur_read_error_count, max_read_errors); | ||
1489 | printk(KERN_NOTICE | ||
1490 | "md/raid10:%s: %s: Failing raid " | ||
1491 | "device\n", mdname(mddev), b); | ||
1492 | md_error(mddev, conf->mirrors[d].rdev); | ||
1493 | return; | ||
1494 | } | ||
1495 | } | 1466 | } |
1496 | rcu_read_unlock(); | ||
1497 | 1467 | ||
1498 | while(sectors) { | 1468 | while(sectors) { |
1499 | int s = sectors; | 1469 | int s = sectors; |
@@ -1562,8 +1532,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1562 | "write failed" | 1532 | "write failed" |
1563 | " (%d sectors at %llu on %s)\n", | 1533 | " (%d sectors at %llu on %s)\n", |
1564 | mdname(mddev), s, | 1534 | mdname(mddev), s, |
1565 | (unsigned long long)(sect+ | 1535 | (unsigned long long)( |
1566 | rdev->data_offset), | 1536 | sect + rdev->data_offset), |
1567 | bdevname(rdev->bdev, b)); | 1537 | bdevname(rdev->bdev, b)); |
1568 | printk(KERN_NOTICE "md/raid10:%s: %s: failing " | 1538 | printk(KERN_NOTICE "md/raid10:%s: %s: failing " |
1569 | "drive\n", | 1539 | "drive\n", |
@@ -1599,8 +1569,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1599 | "corrected sectors" | 1569 | "corrected sectors" |
1600 | " (%d sectors at %llu on %s)\n", | 1570 | " (%d sectors at %llu on %s)\n", |
1601 | mdname(mddev), s, | 1571 | mdname(mddev), s, |
1602 | (unsigned long long)(sect+ | 1572 | (unsigned long long)( |
1603 | rdev->data_offset), | 1573 | sect + rdev->data_offset), |
1604 | bdevname(rdev->bdev, b)); | 1574 | bdevname(rdev->bdev, b)); |
1605 | printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n", | 1575 | printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n", |
1606 | mdname(mddev), | 1576 | mdname(mddev), |
@@ -1612,8 +1582,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1612 | "md/raid10:%s: read error corrected" | 1582 | "md/raid10:%s: read error corrected" |
1613 | " (%d sectors at %llu on %s)\n", | 1583 | " (%d sectors at %llu on %s)\n", |
1614 | mdname(mddev), s, | 1584 | mdname(mddev), s, |
1615 | (unsigned long long)(sect+ | 1585 | (unsigned long long)( |
1616 | rdev->data_offset), | 1586 | sect + rdev->data_offset), |
1617 | bdevname(rdev->bdev, b)); | 1587 | bdevname(rdev->bdev, b)); |
1618 | } | 1588 | } |
1619 | 1589 | ||
@@ -1663,7 +1633,8 @@ static void raid10d(mddev_t *mddev) | |||
1663 | else if (test_bit(R10BIO_IsRecover, &r10_bio->state)) | 1633 | else if (test_bit(R10BIO_IsRecover, &r10_bio->state)) |
1664 | recovery_request_write(mddev, r10_bio); | 1634 | recovery_request_write(mddev, r10_bio); |
1665 | else { | 1635 | else { |
1666 | int mirror; | 1636 | int slot = r10_bio->read_slot; |
1637 | int mirror = r10_bio->devs[slot].devnum; | ||
1667 | /* we got a read error. Maybe the drive is bad. Maybe just | 1638 | /* we got a read error. Maybe the drive is bad. Maybe just |
1668 | * the block and we can fix it. | 1639 | * the block and we can fix it. |
1669 | * We freeze all other IO, and try reading the block from | 1640 | * We freeze all other IO, and try reading the block from |
@@ -1677,9 +1648,10 @@ static void raid10d(mddev_t *mddev) | |||
1677 | fix_read_error(conf, mddev, r10_bio); | 1648 | fix_read_error(conf, mddev, r10_bio); |
1678 | unfreeze_array(conf); | 1649 | unfreeze_array(conf); |
1679 | } | 1650 | } |
1651 | rdev_dec_pending(conf->mirrors[mirror].rdev, mddev); | ||
1680 | 1652 | ||
1681 | bio = r10_bio->devs[r10_bio->read_slot].bio; | 1653 | bio = r10_bio->devs[slot].bio; |
1682 | r10_bio->devs[r10_bio->read_slot].bio = | 1654 | r10_bio->devs[slot].bio = |
1683 | mddev->ro ? IO_BLOCKED : NULL; | 1655 | mddev->ro ? IO_BLOCKED : NULL; |
1684 | mirror = read_balance(conf, r10_bio); | 1656 | mirror = read_balance(conf, r10_bio); |
1685 | if (mirror == -1) { | 1657 | if (mirror == -1) { |
@@ -1693,6 +1665,7 @@ static void raid10d(mddev_t *mddev) | |||
1693 | } else { | 1665 | } else { |
1694 | const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); | 1666 | const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); |
1695 | bio_put(bio); | 1667 | bio_put(bio); |
1668 | slot = r10_bio->read_slot; | ||
1696 | rdev = conf->mirrors[mirror].rdev; | 1669 | rdev = conf->mirrors[mirror].rdev; |
1697 | if (printk_ratelimit()) | 1670 | if (printk_ratelimit()) |
1698 | printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to" | 1671 | printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to" |
@@ -1702,8 +1675,8 @@ static void raid10d(mddev_t *mddev) | |||
1702 | (unsigned long long)r10_bio->sector); | 1675 | (unsigned long long)r10_bio->sector); |
1703 | bio = bio_clone_mddev(r10_bio->master_bio, | 1676 | bio = bio_clone_mddev(r10_bio->master_bio, |
1704 | GFP_NOIO, mddev); | 1677 | GFP_NOIO, mddev); |
1705 | r10_bio->devs[r10_bio->read_slot].bio = bio; | 1678 | r10_bio->devs[slot].bio = bio; |
1706 | bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr | 1679 | bio->bi_sector = r10_bio->devs[slot].addr |
1707 | + rdev->data_offset; | 1680 | + rdev->data_offset; |
1708 | bio->bi_bdev = rdev->bdev; | 1681 | bio->bi_bdev = rdev->bdev; |
1709 | bio->bi_rw = READ | do_sync; | 1682 | bio->bi_rw = READ | do_sync; |
@@ -1763,13 +1736,13 @@ static int init_resync(conf_t *conf) | |||
1763 | * | 1736 | * |
1764 | */ | 1737 | */ |
1765 | 1738 | ||
1766 | static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) | 1739 | static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, |
1740 | int *skipped, int go_faster) | ||
1767 | { | 1741 | { |
1768 | conf_t *conf = mddev->private; | 1742 | conf_t *conf = mddev->private; |
1769 | r10bio_t *r10_bio; | 1743 | r10bio_t *r10_bio; |
1770 | struct bio *biolist = NULL, *bio; | 1744 | struct bio *biolist = NULL, *bio; |
1771 | sector_t max_sector, nr_sectors; | 1745 | sector_t max_sector, nr_sectors; |
1772 | int disk; | ||
1773 | int i; | 1746 | int i; |
1774 | int max_sync; | 1747 | int max_sync; |
1775 | sector_t sync_blocks; | 1748 | sector_t sync_blocks; |
@@ -1858,108 +1831,114 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1858 | int j, k; | 1831 | int j, k; |
1859 | r10_bio = NULL; | 1832 | r10_bio = NULL; |
1860 | 1833 | ||
1861 | for (i=0 ; i<conf->raid_disks; i++) | 1834 | for (i=0 ; i<conf->raid_disks; i++) { |
1862 | if (conf->mirrors[i].rdev && | 1835 | int still_degraded; |
1863 | !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) { | 1836 | r10bio_t *rb2; |
1864 | int still_degraded = 0; | 1837 | sector_t sect; |
1865 | /* want to reconstruct this device */ | 1838 | int must_sync; |
1866 | r10bio_t *rb2 = r10_bio; | ||
1867 | sector_t sect = raid10_find_virt(conf, sector_nr, i); | ||
1868 | int must_sync; | ||
1869 | /* Unless we are doing a full sync, we only need | ||
1870 | * to recover the block if it is set in the bitmap | ||
1871 | */ | ||
1872 | must_sync = bitmap_start_sync(mddev->bitmap, sect, | ||
1873 | &sync_blocks, 1); | ||
1874 | if (sync_blocks < max_sync) | ||
1875 | max_sync = sync_blocks; | ||
1876 | if (!must_sync && | ||
1877 | !conf->fullsync) { | ||
1878 | /* yep, skip the sync_blocks here, but don't assume | ||
1879 | * that there will never be anything to do here | ||
1880 | */ | ||
1881 | chunks_skipped = -1; | ||
1882 | continue; | ||
1883 | } | ||
1884 | 1839 | ||
1885 | r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); | 1840 | if (conf->mirrors[i].rdev == NULL || |
1886 | raise_barrier(conf, rb2 != NULL); | 1841 | test_bit(In_sync, &conf->mirrors[i].rdev->flags)) |
1887 | atomic_set(&r10_bio->remaining, 0); | 1842 | continue; |
1888 | 1843 | ||
1889 | r10_bio->master_bio = (struct bio*)rb2; | 1844 | still_degraded = 0; |
1890 | if (rb2) | 1845 | /* want to reconstruct this device */ |
1891 | atomic_inc(&rb2->remaining); | 1846 | rb2 = r10_bio; |
1892 | r10_bio->mddev = mddev; | 1847 | sect = raid10_find_virt(conf, sector_nr, i); |
1893 | set_bit(R10BIO_IsRecover, &r10_bio->state); | 1848 | /* Unless we are doing a full sync, we only need |
1894 | r10_bio->sector = sect; | 1849 | * to recover the block if it is set in the bitmap |
1850 | */ | ||
1851 | must_sync = bitmap_start_sync(mddev->bitmap, sect, | ||
1852 | &sync_blocks, 1); | ||
1853 | if (sync_blocks < max_sync) | ||
1854 | max_sync = sync_blocks; | ||
1855 | if (!must_sync && | ||
1856 | !conf->fullsync) { | ||
1857 | /* yep, skip the sync_blocks here, but don't assume | ||
1858 | * that there will never be anything to do here | ||
1859 | */ | ||
1860 | chunks_skipped = -1; | ||
1861 | continue; | ||
1862 | } | ||
1895 | 1863 | ||
1896 | raid10_find_phys(conf, r10_bio); | 1864 | r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); |
1865 | raise_barrier(conf, rb2 != NULL); | ||
1866 | atomic_set(&r10_bio->remaining, 0); | ||
1897 | 1867 | ||
1898 | /* Need to check if the array will still be | 1868 | r10_bio->master_bio = (struct bio*)rb2; |
1899 | * degraded | 1869 | if (rb2) |
1900 | */ | 1870 | atomic_inc(&rb2->remaining); |
1901 | for (j=0; j<conf->raid_disks; j++) | 1871 | r10_bio->mddev = mddev; |
1902 | if (conf->mirrors[j].rdev == NULL || | 1872 | set_bit(R10BIO_IsRecover, &r10_bio->state); |
1903 | test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { | 1873 | r10_bio->sector = sect; |
1904 | still_degraded = 1; | ||
1905 | break; | ||
1906 | } | ||
1907 | |||
1908 | must_sync = bitmap_start_sync(mddev->bitmap, sect, | ||
1909 | &sync_blocks, still_degraded); | ||
1910 | |||
1911 | for (j=0; j<conf->copies;j++) { | ||
1912 | int d = r10_bio->devs[j].devnum; | ||
1913 | if (conf->mirrors[d].rdev && | ||
1914 | test_bit(In_sync, &conf->mirrors[d].rdev->flags)) { | ||
1915 | /* This is where we read from */ | ||
1916 | bio = r10_bio->devs[0].bio; | ||
1917 | bio->bi_next = biolist; | ||
1918 | biolist = bio; | ||
1919 | bio->bi_private = r10_bio; | ||
1920 | bio->bi_end_io = end_sync_read; | ||
1921 | bio->bi_rw = READ; | ||
1922 | bio->bi_sector = r10_bio->devs[j].addr + | ||
1923 | conf->mirrors[d].rdev->data_offset; | ||
1924 | bio->bi_bdev = conf->mirrors[d].rdev->bdev; | ||
1925 | atomic_inc(&conf->mirrors[d].rdev->nr_pending); | ||
1926 | atomic_inc(&r10_bio->remaining); | ||
1927 | /* and we write to 'i' */ | ||
1928 | |||
1929 | for (k=0; k<conf->copies; k++) | ||
1930 | if (r10_bio->devs[k].devnum == i) | ||
1931 | break; | ||
1932 | BUG_ON(k == conf->copies); | ||
1933 | bio = r10_bio->devs[1].bio; | ||
1934 | bio->bi_next = biolist; | ||
1935 | biolist = bio; | ||
1936 | bio->bi_private = r10_bio; | ||
1937 | bio->bi_end_io = end_sync_write; | ||
1938 | bio->bi_rw = WRITE; | ||
1939 | bio->bi_sector = r10_bio->devs[k].addr + | ||
1940 | conf->mirrors[i].rdev->data_offset; | ||
1941 | bio->bi_bdev = conf->mirrors[i].rdev->bdev; | ||
1942 | |||
1943 | r10_bio->devs[0].devnum = d; | ||
1944 | r10_bio->devs[1].devnum = i; | ||
1945 | 1874 | ||
1946 | break; | 1875 | raid10_find_phys(conf, r10_bio); |
1947 | } | 1876 | |
1948 | } | 1877 | /* Need to check if the array will still be |
1949 | if (j == conf->copies) { | 1878 | * degraded |
1950 | /* Cannot recover, so abort the recovery */ | 1879 | */ |
1951 | put_buf(r10_bio); | 1880 | for (j=0; j<conf->raid_disks; j++) |
1952 | if (rb2) | 1881 | if (conf->mirrors[j].rdev == NULL || |
1953 | atomic_dec(&rb2->remaining); | 1882 | test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { |
1954 | r10_bio = rb2; | 1883 | still_degraded = 1; |
1955 | if (!test_and_set_bit(MD_RECOVERY_INTR, | ||
1956 | &mddev->recovery)) | ||
1957 | printk(KERN_INFO "md/raid10:%s: insufficient " | ||
1958 | "working devices for recovery.\n", | ||
1959 | mdname(mddev)); | ||
1960 | break; | 1884 | break; |
1961 | } | 1885 | } |
1886 | |||
1887 | must_sync = bitmap_start_sync(mddev->bitmap, sect, | ||
1888 | &sync_blocks, still_degraded); | ||
1889 | |||
1890 | for (j=0; j<conf->copies;j++) { | ||
1891 | int d = r10_bio->devs[j].devnum; | ||
1892 | if (!conf->mirrors[d].rdev || | ||
1893 | !test_bit(In_sync, &conf->mirrors[d].rdev->flags)) | ||
1894 | continue; | ||
1895 | /* This is where we read from */ | ||
1896 | bio = r10_bio->devs[0].bio; | ||
1897 | bio->bi_next = biolist; | ||
1898 | biolist = bio; | ||
1899 | bio->bi_private = r10_bio; | ||
1900 | bio->bi_end_io = end_sync_read; | ||
1901 | bio->bi_rw = READ; | ||
1902 | bio->bi_sector = r10_bio->devs[j].addr + | ||
1903 | conf->mirrors[d].rdev->data_offset; | ||
1904 | bio->bi_bdev = conf->mirrors[d].rdev->bdev; | ||
1905 | atomic_inc(&conf->mirrors[d].rdev->nr_pending); | ||
1906 | atomic_inc(&r10_bio->remaining); | ||
1907 | /* and we write to 'i' */ | ||
1908 | |||
1909 | for (k=0; k<conf->copies; k++) | ||
1910 | if (r10_bio->devs[k].devnum == i) | ||
1911 | break; | ||
1912 | BUG_ON(k == conf->copies); | ||
1913 | bio = r10_bio->devs[1].bio; | ||
1914 | bio->bi_next = biolist; | ||
1915 | biolist = bio; | ||
1916 | bio->bi_private = r10_bio; | ||
1917 | bio->bi_end_io = end_sync_write; | ||
1918 | bio->bi_rw = WRITE; | ||
1919 | bio->bi_sector = r10_bio->devs[k].addr + | ||
1920 | conf->mirrors[i].rdev->data_offset; | ||
1921 | bio->bi_bdev = conf->mirrors[i].rdev->bdev; | ||
1922 | |||
1923 | r10_bio->devs[0].devnum = d; | ||
1924 | r10_bio->devs[1].devnum = i; | ||
1925 | |||
1926 | break; | ||
1927 | } | ||
1928 | if (j == conf->copies) { | ||
1929 | /* Cannot recover, so abort the recovery */ | ||
1930 | put_buf(r10_bio); | ||
1931 | if (rb2) | ||
1932 | atomic_dec(&rb2->remaining); | ||
1933 | r10_bio = rb2; | ||
1934 | if (!test_and_set_bit(MD_RECOVERY_INTR, | ||
1935 | &mddev->recovery)) | ||
1936 | printk(KERN_INFO "md/raid10:%s: insufficient " | ||
1937 | "working devices for recovery.\n", | ||
1938 | mdname(mddev)); | ||
1939 | break; | ||
1962 | } | 1940 | } |
1941 | } | ||
1963 | if (biolist == NULL) { | 1942 | if (biolist == NULL) { |
1964 | while (r10_bio) { | 1943 | while (r10_bio) { |
1965 | r10bio_t *rb2 = r10_bio; | 1944 | r10bio_t *rb2 = r10_bio; |
@@ -1977,7 +1956,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1977 | 1956 | ||
1978 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, | 1957 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, |
1979 | &sync_blocks, mddev->degraded) && | 1958 | &sync_blocks, mddev->degraded) && |
1980 | !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { | 1959 | !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, |
1960 | &mddev->recovery)) { | ||
1981 | /* We can skip this block */ | 1961 | /* We can skip this block */ |
1982 | *skipped = 1; | 1962 | *skipped = 1; |
1983 | return sync_blocks + sectors_skipped; | 1963 | return sync_blocks + sectors_skipped; |
@@ -2022,7 +2002,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
2022 | for (i=0; i<conf->copies; i++) { | 2002 | for (i=0; i<conf->copies; i++) { |
2023 | int d = r10_bio->devs[i].devnum; | 2003 | int d = r10_bio->devs[i].devnum; |
2024 | if (r10_bio->devs[i].bio->bi_end_io) | 2004 | if (r10_bio->devs[i].bio->bi_end_io) |
2025 | rdev_dec_pending(conf->mirrors[d].rdev, mddev); | 2005 | rdev_dec_pending(conf->mirrors[d].rdev, |
2006 | mddev); | ||
2026 | } | 2007 | } |
2027 | put_buf(r10_bio); | 2008 | put_buf(r10_bio); |
2028 | biolist = NULL; | 2009 | biolist = NULL; |
@@ -2047,26 +2028,27 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
2047 | do { | 2028 | do { |
2048 | struct page *page; | 2029 | struct page *page; |
2049 | int len = PAGE_SIZE; | 2030 | int len = PAGE_SIZE; |
2050 | disk = 0; | ||
2051 | if (sector_nr + (len>>9) > max_sector) | 2031 | if (sector_nr + (len>>9) > max_sector) |
2052 | len = (max_sector - sector_nr) << 9; | 2032 | len = (max_sector - sector_nr) << 9; |
2053 | if (len == 0) | 2033 | if (len == 0) |
2054 | break; | 2034 | break; |
2055 | for (bio= biolist ; bio ; bio=bio->bi_next) { | 2035 | for (bio= biolist ; bio ; bio=bio->bi_next) { |
2036 | struct bio *bio2; | ||
2056 | page = bio->bi_io_vec[bio->bi_vcnt].bv_page; | 2037 | page = bio->bi_io_vec[bio->bi_vcnt].bv_page; |
2057 | if (bio_add_page(bio, page, len, 0) == 0) { | 2038 | if (bio_add_page(bio, page, len, 0)) |
2058 | /* stop here */ | 2039 | continue; |
2059 | struct bio *bio2; | 2040 | |
2060 | bio->bi_io_vec[bio->bi_vcnt].bv_page = page; | 2041 | /* stop here */ |
2061 | for (bio2 = biolist; bio2 && bio2 != bio; bio2 = bio2->bi_next) { | 2042 | bio->bi_io_vec[bio->bi_vcnt].bv_page = page; |
2062 | /* remove last page from this bio */ | 2043 | for (bio2 = biolist; |
2063 | bio2->bi_vcnt--; | 2044 | bio2 && bio2 != bio; |
2064 | bio2->bi_size -= len; | 2045 | bio2 = bio2->bi_next) { |
2065 | bio2->bi_flags &= ~(1<< BIO_SEG_VALID); | 2046 | /* remove last page from this bio */ |
2066 | } | 2047 | bio2->bi_vcnt--; |
2067 | goto bio_full; | 2048 | bio2->bi_size -= len; |
2049 | bio2->bi_flags &= ~(1<< BIO_SEG_VALID); | ||
2068 | } | 2050 | } |
2069 | disk = i; | 2051 | goto bio_full; |
2070 | } | 2052 | } |
2071 | nr_sectors += len>>9; | 2053 | nr_sectors += len>>9; |
2072 | sector_nr += len>>9; | 2054 | sector_nr += len>>9; |