aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c424
1 files changed, 203 insertions, 221 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8e9462626ec5..6e846688962f 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -271,9 +271,10 @@ static void raid10_end_read_request(struct bio *bio, int error)
271 */ 271 */
272 set_bit(R10BIO_Uptodate, &r10_bio->state); 272 set_bit(R10BIO_Uptodate, &r10_bio->state);
273 raid_end_bio_io(r10_bio); 273 raid_end_bio_io(r10_bio);
274 rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
274 } else { 275 } else {
275 /* 276 /*
276 * oops, read error: 277 * oops, read error - keep the refcount on the rdev
277 */ 278 */
278 char b[BDEVNAME_SIZE]; 279 char b[BDEVNAME_SIZE];
279 if (printk_ratelimit()) 280 if (printk_ratelimit())
@@ -282,8 +283,6 @@ static void raid10_end_read_request(struct bio *bio, int error)
282 bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector); 283 bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector);
283 reschedule_retry(r10_bio); 284 reschedule_retry(r10_bio);
284 } 285 }
285
286 rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
287} 286}
288 287
289static void raid10_end_write_request(struct bio *bio, int error) 288static void raid10_end_write_request(struct bio *bio, int error)
@@ -488,13 +487,19 @@ static int raid10_mergeable_bvec(struct request_queue *q,
488static int read_balance(conf_t *conf, r10bio_t *r10_bio) 487static int read_balance(conf_t *conf, r10bio_t *r10_bio)
489{ 488{
490 const sector_t this_sector = r10_bio->sector; 489 const sector_t this_sector = r10_bio->sector;
491 int disk, slot, nslot; 490 int disk, slot;
492 const int sectors = r10_bio->sectors; 491 const int sectors = r10_bio->sectors;
493 sector_t new_distance, current_distance; 492 sector_t new_distance, best_dist;
494 mdk_rdev_t *rdev; 493 mdk_rdev_t *rdev;
494 int do_balance;
495 int best_slot;
495 496
496 raid10_find_phys(conf, r10_bio); 497 raid10_find_phys(conf, r10_bio);
497 rcu_read_lock(); 498 rcu_read_lock();
499retry:
500 best_slot = -1;
501 best_dist = MaxSector;
502 do_balance = 1;
498 /* 503 /*
499 * Check if we can balance. We can balance on the whole 504 * Check if we can balance. We can balance on the whole
500 * device if no resync is going on (recovery is ok), or below 505 * device if no resync is going on (recovery is ok), or below
@@ -502,86 +507,58 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
502 * above the resync window. 507 * above the resync window.
503 */ 508 */
504 if (conf->mddev->recovery_cp < MaxSector 509 if (conf->mddev->recovery_cp < MaxSector
505 && (this_sector + sectors >= conf->next_resync)) { 510 && (this_sector + sectors >= conf->next_resync))
506 /* make sure that disk is operational */ 511 do_balance = 0;
507 slot = 0;
508 disk = r10_bio->devs[slot].devnum;
509
510 while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
511 r10_bio->devs[slot].bio == IO_BLOCKED ||
512 !test_bit(In_sync, &rdev->flags)) {
513 slot++;
514 if (slot == conf->copies) {
515 slot = 0;
516 disk = -1;
517 break;
518 }
519 disk = r10_bio->devs[slot].devnum;
520 }
521 goto rb_out;
522 }
523
524 512
525 /* make sure the disk is operational */ 513 for (slot = 0; slot < conf->copies ; slot++) {
526 slot = 0; 514 if (r10_bio->devs[slot].bio == IO_BLOCKED)
527 disk = r10_bio->devs[slot].devnum; 515 continue;
528 while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
529 r10_bio->devs[slot].bio == IO_BLOCKED ||
530 !test_bit(In_sync, &rdev->flags)) {
531 slot ++;
532 if (slot == conf->copies) {
533 disk = -1;
534 goto rb_out;
535 }
536 disk = r10_bio->devs[slot].devnum; 516 disk = r10_bio->devs[slot].devnum;
537 } 517 rdev = rcu_dereference(conf->mirrors[disk].rdev);
538 518 if (rdev == NULL)
539
540 current_distance = abs(r10_bio->devs[slot].addr -
541 conf->mirrors[disk].head_position);
542
543 /* Find the disk whose head is closest,
544 * or - for far > 1 - find the closest to partition beginning */
545
546 for (nslot = slot; nslot < conf->copies; nslot++) {
547 int ndisk = r10_bio->devs[nslot].devnum;
548
549
550 if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
551 r10_bio->devs[nslot].bio == IO_BLOCKED ||
552 !test_bit(In_sync, &rdev->flags))
553 continue; 519 continue;
520 if (!test_bit(In_sync, &rdev->flags))
521 continue;
522
523 if (!do_balance)
524 break;
554 525
555 /* This optimisation is debatable, and completely destroys 526 /* This optimisation is debatable, and completely destroys
556 * sequential read speed for 'far copies' arrays. So only 527 * sequential read speed for 'far copies' arrays. So only
557 * keep it for 'near' arrays, and review those later. 528 * keep it for 'near' arrays, and review those later.
558 */ 529 */
559 if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending)) { 530 if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending))
560 disk = ndisk;
561 slot = nslot;
562 break; 531 break;
563 }
564 532
565 /* for far > 1 always use the lowest address */ 533 /* for far > 1 always use the lowest address */
566 if (conf->far_copies > 1) 534 if (conf->far_copies > 1)
567 new_distance = r10_bio->devs[nslot].addr; 535 new_distance = r10_bio->devs[slot].addr;
568 else 536 else
569 new_distance = abs(r10_bio->devs[nslot].addr - 537 new_distance = abs(r10_bio->devs[slot].addr -
570 conf->mirrors[ndisk].head_position); 538 conf->mirrors[disk].head_position);
571 if (new_distance < current_distance) { 539 if (new_distance < best_dist) {
572 current_distance = new_distance; 540 best_dist = new_distance;
573 disk = ndisk; 541 best_slot = slot;
574 slot = nslot;
575 } 542 }
576 } 543 }
544 if (slot == conf->copies)
545 slot = best_slot;
577 546
578rb_out: 547 if (slot >= 0) {
579 r10_bio->read_slot = slot; 548 disk = r10_bio->devs[slot].devnum;
580/* conf->next_seq_sect = this_sector + sectors;*/ 549 rdev = rcu_dereference(conf->mirrors[disk].rdev);
581 550 if (!rdev)
582 if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL) 551 goto retry;
583 atomic_inc(&conf->mirrors[disk].rdev->nr_pending); 552 atomic_inc(&rdev->nr_pending);
584 else 553 if (test_bit(Faulty, &rdev->flags)) {
554 /* Cannot risk returning a device that failed
555 * before we inc'ed nr_pending
556 */
557 rdev_dec_pending(rdev, conf->mddev);
558 goto retry;
559 }
560 r10_bio->read_slot = slot;
561 } else
585 disk = -1; 562 disk = -1;
586 rcu_read_unlock(); 563 rcu_read_unlock();
587 564
@@ -1460,40 +1437,33 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1460 int max_read_errors = atomic_read(&mddev->max_corr_read_errors); 1437 int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
1461 int d = r10_bio->devs[r10_bio->read_slot].devnum; 1438 int d = r10_bio->devs[r10_bio->read_slot].devnum;
1462 1439
1463 rcu_read_lock(); 1440 /* still own a reference to this rdev, so it cannot
1464 rdev = rcu_dereference(conf->mirrors[d].rdev); 1441 * have been cleared recently.
1465 if (rdev) { /* If rdev is not NULL */ 1442 */
1466 char b[BDEVNAME_SIZE]; 1443 rdev = conf->mirrors[d].rdev;
1467 int cur_read_error_count = 0;
1468 1444
1469 bdevname(rdev->bdev, b); 1445 if (test_bit(Faulty, &rdev->flags))
1446 /* drive has already been failed, just ignore any
1447 more fix_read_error() attempts */
1448 return;
1470 1449
1471 if (test_bit(Faulty, &rdev->flags)) { 1450 check_decay_read_errors(mddev, rdev);
1472 rcu_read_unlock(); 1451 atomic_inc(&rdev->read_errors);
1473 /* drive has already been failed, just ignore any 1452 if (atomic_read(&rdev->read_errors) > max_read_errors) {
1474 more fix_read_error() attempts */ 1453 char b[BDEVNAME_SIZE];
1475 return; 1454 bdevname(rdev->bdev, b);
1476 }
1477 1455
1478 check_decay_read_errors(mddev, rdev); 1456 printk(KERN_NOTICE
1479 atomic_inc(&rdev->read_errors); 1457 "md/raid10:%s: %s: Raid device exceeded "
1480 cur_read_error_count = atomic_read(&rdev->read_errors); 1458 "read_error threshold [cur %d:max %d]\n",
1481 if (cur_read_error_count > max_read_errors) { 1459 mdname(mddev), b,
1482 rcu_read_unlock(); 1460 atomic_read(&rdev->read_errors), max_read_errors);
1483 printk(KERN_NOTICE 1461 printk(KERN_NOTICE
1484 "md/raid10:%s: %s: Raid device exceeded " 1462 "md/raid10:%s: %s: Failing raid device\n",
1485 "read_error threshold " 1463 mdname(mddev), b);
1486 "[cur %d:max %d]\n", 1464 md_error(mddev, conf->mirrors[d].rdev);
1487 mdname(mddev), 1465 return;
1488 b, cur_read_error_count, max_read_errors);
1489 printk(KERN_NOTICE
1490 "md/raid10:%s: %s: Failing raid "
1491 "device\n", mdname(mddev), b);
1492 md_error(mddev, conf->mirrors[d].rdev);
1493 return;
1494 }
1495 } 1466 }
1496 rcu_read_unlock();
1497 1467
1498 while(sectors) { 1468 while(sectors) {
1499 int s = sectors; 1469 int s = sectors;
@@ -1562,8 +1532,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1562 "write failed" 1532 "write failed"
1563 " (%d sectors at %llu on %s)\n", 1533 " (%d sectors at %llu on %s)\n",
1564 mdname(mddev), s, 1534 mdname(mddev), s,
1565 (unsigned long long)(sect+ 1535 (unsigned long long)(
1566 rdev->data_offset), 1536 sect + rdev->data_offset),
1567 bdevname(rdev->bdev, b)); 1537 bdevname(rdev->bdev, b));
1568 printk(KERN_NOTICE "md/raid10:%s: %s: failing " 1538 printk(KERN_NOTICE "md/raid10:%s: %s: failing "
1569 "drive\n", 1539 "drive\n",
@@ -1599,8 +1569,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1599 "corrected sectors" 1569 "corrected sectors"
1600 " (%d sectors at %llu on %s)\n", 1570 " (%d sectors at %llu on %s)\n",
1601 mdname(mddev), s, 1571 mdname(mddev), s,
1602 (unsigned long long)(sect+ 1572 (unsigned long long)(
1603 rdev->data_offset), 1573 sect + rdev->data_offset),
1604 bdevname(rdev->bdev, b)); 1574 bdevname(rdev->bdev, b));
1605 printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n", 1575 printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n",
1606 mdname(mddev), 1576 mdname(mddev),
@@ -1612,8 +1582,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1612 "md/raid10:%s: read error corrected" 1582 "md/raid10:%s: read error corrected"
1613 " (%d sectors at %llu on %s)\n", 1583 " (%d sectors at %llu on %s)\n",
1614 mdname(mddev), s, 1584 mdname(mddev), s,
1615 (unsigned long long)(sect+ 1585 (unsigned long long)(
1616 rdev->data_offset), 1586 sect + rdev->data_offset),
1617 bdevname(rdev->bdev, b)); 1587 bdevname(rdev->bdev, b));
1618 } 1588 }
1619 1589
@@ -1663,7 +1633,8 @@ static void raid10d(mddev_t *mddev)
1663 else if (test_bit(R10BIO_IsRecover, &r10_bio->state)) 1633 else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
1664 recovery_request_write(mddev, r10_bio); 1634 recovery_request_write(mddev, r10_bio);
1665 else { 1635 else {
1666 int mirror; 1636 int slot = r10_bio->read_slot;
1637 int mirror = r10_bio->devs[slot].devnum;
1667 /* we got a read error. Maybe the drive is bad. Maybe just 1638 /* we got a read error. Maybe the drive is bad. Maybe just
1668 * the block and we can fix it. 1639 * the block and we can fix it.
1669 * We freeze all other IO, and try reading the block from 1640 * We freeze all other IO, and try reading the block from
@@ -1677,9 +1648,10 @@ static void raid10d(mddev_t *mddev)
1677 fix_read_error(conf, mddev, r10_bio); 1648 fix_read_error(conf, mddev, r10_bio);
1678 unfreeze_array(conf); 1649 unfreeze_array(conf);
1679 } 1650 }
1651 rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
1680 1652
1681 bio = r10_bio->devs[r10_bio->read_slot].bio; 1653 bio = r10_bio->devs[slot].bio;
1682 r10_bio->devs[r10_bio->read_slot].bio = 1654 r10_bio->devs[slot].bio =
1683 mddev->ro ? IO_BLOCKED : NULL; 1655 mddev->ro ? IO_BLOCKED : NULL;
1684 mirror = read_balance(conf, r10_bio); 1656 mirror = read_balance(conf, r10_bio);
1685 if (mirror == -1) { 1657 if (mirror == -1) {
@@ -1693,6 +1665,7 @@ static void raid10d(mddev_t *mddev)
1693 } else { 1665 } else {
1694 const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); 1666 const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
1695 bio_put(bio); 1667 bio_put(bio);
1668 slot = r10_bio->read_slot;
1696 rdev = conf->mirrors[mirror].rdev; 1669 rdev = conf->mirrors[mirror].rdev;
1697 if (printk_ratelimit()) 1670 if (printk_ratelimit())
1698 printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to" 1671 printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to"
@@ -1702,8 +1675,8 @@ static void raid10d(mddev_t *mddev)
1702 (unsigned long long)r10_bio->sector); 1675 (unsigned long long)r10_bio->sector);
1703 bio = bio_clone_mddev(r10_bio->master_bio, 1676 bio = bio_clone_mddev(r10_bio->master_bio,
1704 GFP_NOIO, mddev); 1677 GFP_NOIO, mddev);
1705 r10_bio->devs[r10_bio->read_slot].bio = bio; 1678 r10_bio->devs[slot].bio = bio;
1706 bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr 1679 bio->bi_sector = r10_bio->devs[slot].addr
1707 + rdev->data_offset; 1680 + rdev->data_offset;
1708 bio->bi_bdev = rdev->bdev; 1681 bio->bi_bdev = rdev->bdev;
1709 bio->bi_rw = READ | do_sync; 1682 bio->bi_rw = READ | do_sync;
@@ -1763,13 +1736,13 @@ static int init_resync(conf_t *conf)
1763 * 1736 *
1764 */ 1737 */
1765 1738
1766static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) 1739static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
1740 int *skipped, int go_faster)
1767{ 1741{
1768 conf_t *conf = mddev->private; 1742 conf_t *conf = mddev->private;
1769 r10bio_t *r10_bio; 1743 r10bio_t *r10_bio;
1770 struct bio *biolist = NULL, *bio; 1744 struct bio *biolist = NULL, *bio;
1771 sector_t max_sector, nr_sectors; 1745 sector_t max_sector, nr_sectors;
1772 int disk;
1773 int i; 1746 int i;
1774 int max_sync; 1747 int max_sync;
1775 sector_t sync_blocks; 1748 sector_t sync_blocks;
@@ -1858,108 +1831,114 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1858 int j, k; 1831 int j, k;
1859 r10_bio = NULL; 1832 r10_bio = NULL;
1860 1833
1861 for (i=0 ; i<conf->raid_disks; i++) 1834 for (i=0 ; i<conf->raid_disks; i++) {
1862 if (conf->mirrors[i].rdev && 1835 int still_degraded;
1863 !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) { 1836 r10bio_t *rb2;
1864 int still_degraded = 0; 1837 sector_t sect;
1865 /* want to reconstruct this device */ 1838 int must_sync;
1866 r10bio_t *rb2 = r10_bio;
1867 sector_t sect = raid10_find_virt(conf, sector_nr, i);
1868 int must_sync;
1869 /* Unless we are doing a full sync, we only need
1870 * to recover the block if it is set in the bitmap
1871 */
1872 must_sync = bitmap_start_sync(mddev->bitmap, sect,
1873 &sync_blocks, 1);
1874 if (sync_blocks < max_sync)
1875 max_sync = sync_blocks;
1876 if (!must_sync &&
1877 !conf->fullsync) {
1878 /* yep, skip the sync_blocks here, but don't assume
1879 * that there will never be anything to do here
1880 */
1881 chunks_skipped = -1;
1882 continue;
1883 }
1884 1839
1885 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); 1840 if (conf->mirrors[i].rdev == NULL ||
1886 raise_barrier(conf, rb2 != NULL); 1841 test_bit(In_sync, &conf->mirrors[i].rdev->flags))
1887 atomic_set(&r10_bio->remaining, 0); 1842 continue;
1888 1843
1889 r10_bio->master_bio = (struct bio*)rb2; 1844 still_degraded = 0;
1890 if (rb2) 1845 /* want to reconstruct this device */
1891 atomic_inc(&rb2->remaining); 1846 rb2 = r10_bio;
1892 r10_bio->mddev = mddev; 1847 sect = raid10_find_virt(conf, sector_nr, i);
1893 set_bit(R10BIO_IsRecover, &r10_bio->state); 1848 /* Unless we are doing a full sync, we only need
1894 r10_bio->sector = sect; 1849 * to recover the block if it is set in the bitmap
1850 */
1851 must_sync = bitmap_start_sync(mddev->bitmap, sect,
1852 &sync_blocks, 1);
1853 if (sync_blocks < max_sync)
1854 max_sync = sync_blocks;
1855 if (!must_sync &&
1856 !conf->fullsync) {
1857 /* yep, skip the sync_blocks here, but don't assume
1858 * that there will never be anything to do here
1859 */
1860 chunks_skipped = -1;
1861 continue;
1862 }
1895 1863
1896 raid10_find_phys(conf, r10_bio); 1864 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
1865 raise_barrier(conf, rb2 != NULL);
1866 atomic_set(&r10_bio->remaining, 0);
1897 1867
1898 /* Need to check if the array will still be 1868 r10_bio->master_bio = (struct bio*)rb2;
1899 * degraded 1869 if (rb2)
1900 */ 1870 atomic_inc(&rb2->remaining);
1901 for (j=0; j<conf->raid_disks; j++) 1871 r10_bio->mddev = mddev;
1902 if (conf->mirrors[j].rdev == NULL || 1872 set_bit(R10BIO_IsRecover, &r10_bio->state);
1903 test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { 1873 r10_bio->sector = sect;
1904 still_degraded = 1;
1905 break;
1906 }
1907
1908 must_sync = bitmap_start_sync(mddev->bitmap, sect,
1909 &sync_blocks, still_degraded);
1910
1911 for (j=0; j<conf->copies;j++) {
1912 int d = r10_bio->devs[j].devnum;
1913 if (conf->mirrors[d].rdev &&
1914 test_bit(In_sync, &conf->mirrors[d].rdev->flags)) {
1915 /* This is where we read from */
1916 bio = r10_bio->devs[0].bio;
1917 bio->bi_next = biolist;
1918 biolist = bio;
1919 bio->bi_private = r10_bio;
1920 bio->bi_end_io = end_sync_read;
1921 bio->bi_rw = READ;
1922 bio->bi_sector = r10_bio->devs[j].addr +
1923 conf->mirrors[d].rdev->data_offset;
1924 bio->bi_bdev = conf->mirrors[d].rdev->bdev;
1925 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
1926 atomic_inc(&r10_bio->remaining);
1927 /* and we write to 'i' */
1928
1929 for (k=0; k<conf->copies; k++)
1930 if (r10_bio->devs[k].devnum == i)
1931 break;
1932 BUG_ON(k == conf->copies);
1933 bio = r10_bio->devs[1].bio;
1934 bio->bi_next = biolist;
1935 biolist = bio;
1936 bio->bi_private = r10_bio;
1937 bio->bi_end_io = end_sync_write;
1938 bio->bi_rw = WRITE;
1939 bio->bi_sector = r10_bio->devs[k].addr +
1940 conf->mirrors[i].rdev->data_offset;
1941 bio->bi_bdev = conf->mirrors[i].rdev->bdev;
1942
1943 r10_bio->devs[0].devnum = d;
1944 r10_bio->devs[1].devnum = i;
1945 1874
1946 break; 1875 raid10_find_phys(conf, r10_bio);
1947 } 1876
1948 } 1877 /* Need to check if the array will still be
1949 if (j == conf->copies) { 1878 * degraded
1950 /* Cannot recover, so abort the recovery */ 1879 */
1951 put_buf(r10_bio); 1880 for (j=0; j<conf->raid_disks; j++)
1952 if (rb2) 1881 if (conf->mirrors[j].rdev == NULL ||
1953 atomic_dec(&rb2->remaining); 1882 test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
1954 r10_bio = rb2; 1883 still_degraded = 1;
1955 if (!test_and_set_bit(MD_RECOVERY_INTR,
1956 &mddev->recovery))
1957 printk(KERN_INFO "md/raid10:%s: insufficient "
1958 "working devices for recovery.\n",
1959 mdname(mddev));
1960 break; 1884 break;
1961 } 1885 }
1886
1887 must_sync = bitmap_start_sync(mddev->bitmap, sect,
1888 &sync_blocks, still_degraded);
1889
1890 for (j=0; j<conf->copies;j++) {
1891 int d = r10_bio->devs[j].devnum;
1892 if (!conf->mirrors[d].rdev ||
1893 !test_bit(In_sync, &conf->mirrors[d].rdev->flags))
1894 continue;
1895 /* This is where we read from */
1896 bio = r10_bio->devs[0].bio;
1897 bio->bi_next = biolist;
1898 biolist = bio;
1899 bio->bi_private = r10_bio;
1900 bio->bi_end_io = end_sync_read;
1901 bio->bi_rw = READ;
1902 bio->bi_sector = r10_bio->devs[j].addr +
1903 conf->mirrors[d].rdev->data_offset;
1904 bio->bi_bdev = conf->mirrors[d].rdev->bdev;
1905 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
1906 atomic_inc(&r10_bio->remaining);
1907 /* and we write to 'i' */
1908
1909 for (k=0; k<conf->copies; k++)
1910 if (r10_bio->devs[k].devnum == i)
1911 break;
1912 BUG_ON(k == conf->copies);
1913 bio = r10_bio->devs[1].bio;
1914 bio->bi_next = biolist;
1915 biolist = bio;
1916 bio->bi_private = r10_bio;
1917 bio->bi_end_io = end_sync_write;
1918 bio->bi_rw = WRITE;
1919 bio->bi_sector = r10_bio->devs[k].addr +
1920 conf->mirrors[i].rdev->data_offset;
1921 bio->bi_bdev = conf->mirrors[i].rdev->bdev;
1922
1923 r10_bio->devs[0].devnum = d;
1924 r10_bio->devs[1].devnum = i;
1925
1926 break;
1927 }
1928 if (j == conf->copies) {
1929 /* Cannot recover, so abort the recovery */
1930 put_buf(r10_bio);
1931 if (rb2)
1932 atomic_dec(&rb2->remaining);
1933 r10_bio = rb2;
1934 if (!test_and_set_bit(MD_RECOVERY_INTR,
1935 &mddev->recovery))
1936 printk(KERN_INFO "md/raid10:%s: insufficient "
1937 "working devices for recovery.\n",
1938 mdname(mddev));
1939 break;
1962 } 1940 }
1941 }
1963 if (biolist == NULL) { 1942 if (biolist == NULL) {
1964 while (r10_bio) { 1943 while (r10_bio) {
1965 r10bio_t *rb2 = r10_bio; 1944 r10bio_t *rb2 = r10_bio;
@@ -1977,7 +1956,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1977 1956
1978 if (!bitmap_start_sync(mddev->bitmap, sector_nr, 1957 if (!bitmap_start_sync(mddev->bitmap, sector_nr,
1979 &sync_blocks, mddev->degraded) && 1958 &sync_blocks, mddev->degraded) &&
1980 !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { 1959 !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED,
1960 &mddev->recovery)) {
1981 /* We can skip this block */ 1961 /* We can skip this block */
1982 *skipped = 1; 1962 *skipped = 1;
1983 return sync_blocks + sectors_skipped; 1963 return sync_blocks + sectors_skipped;
@@ -2022,7 +2002,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
2022 for (i=0; i<conf->copies; i++) { 2002 for (i=0; i<conf->copies; i++) {
2023 int d = r10_bio->devs[i].devnum; 2003 int d = r10_bio->devs[i].devnum;
2024 if (r10_bio->devs[i].bio->bi_end_io) 2004 if (r10_bio->devs[i].bio->bi_end_io)
2025 rdev_dec_pending(conf->mirrors[d].rdev, mddev); 2005 rdev_dec_pending(conf->mirrors[d].rdev,
2006 mddev);
2026 } 2007 }
2027 put_buf(r10_bio); 2008 put_buf(r10_bio);
2028 biolist = NULL; 2009 biolist = NULL;
@@ -2047,26 +2028,27 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
2047 do { 2028 do {
2048 struct page *page; 2029 struct page *page;
2049 int len = PAGE_SIZE; 2030 int len = PAGE_SIZE;
2050 disk = 0;
2051 if (sector_nr + (len>>9) > max_sector) 2031 if (sector_nr + (len>>9) > max_sector)
2052 len = (max_sector - sector_nr) << 9; 2032 len = (max_sector - sector_nr) << 9;
2053 if (len == 0) 2033 if (len == 0)
2054 break; 2034 break;
2055 for (bio= biolist ; bio ; bio=bio->bi_next) { 2035 for (bio= biolist ; bio ; bio=bio->bi_next) {
2036 struct bio *bio2;
2056 page = bio->bi_io_vec[bio->bi_vcnt].bv_page; 2037 page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
2057 if (bio_add_page(bio, page, len, 0) == 0) { 2038 if (bio_add_page(bio, page, len, 0))
2058 /* stop here */ 2039 continue;
2059 struct bio *bio2; 2040
2060 bio->bi_io_vec[bio->bi_vcnt].bv_page = page; 2041 /* stop here */
2061 for (bio2 = biolist; bio2 && bio2 != bio; bio2 = bio2->bi_next) { 2042 bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
2062 /* remove last page from this bio */ 2043 for (bio2 = biolist;
2063 bio2->bi_vcnt--; 2044 bio2 && bio2 != bio;
2064 bio2->bi_size -= len; 2045 bio2 = bio2->bi_next) {
2065 bio2->bi_flags &= ~(1<< BIO_SEG_VALID); 2046 /* remove last page from this bio */
2066 } 2047 bio2->bi_vcnt--;
2067 goto bio_full; 2048 bio2->bi_size -= len;
2049 bio2->bi_flags &= ~(1<< BIO_SEG_VALID);
2068 } 2050 }
2069 disk = i; 2051 goto bio_full;
2070 } 2052 }
2071 nr_sectors += len>>9; 2053 nr_sectors += len>>9;
2072 sector_nr += len>>9; 2054 sector_nr += len>>9;