aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-12-22 18:17:54 -0500
committerNeilBrown <neilb@suse.de>2011-12-22 18:17:54 -0500
commitabbf098e6e1e23d5d247b9eaaf325e67f67b0328 (patch)
treee2d8dc88c99ecc14d6d87596d95b3de9b9dc87c9 /drivers
parent96c3fd1f3802371610c620cff03f9d825707e80e (diff)
md/raid10: preferentially read from replacement device if possible.
When reading (for array reads, not for recovery etc) we read from the replacement device if it has recovered far enough. This requires storing the chosen rdev in the 'r10_bio' so we can make sure to drop the ref on the right device when the read finishes. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/raid10.c36
1 files changed, 23 insertions, 13 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 70356c130273..5b886218110e 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -324,11 +324,13 @@ static void raid10_end_read_request(struct bio *bio, int error)
324 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 324 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
325 struct r10bio *r10_bio = bio->bi_private; 325 struct r10bio *r10_bio = bio->bi_private;
326 int slot, dev; 326 int slot, dev;
327 struct md_rdev *rdev;
327 struct r10conf *conf = r10_bio->mddev->private; 328 struct r10conf *conf = r10_bio->mddev->private;
328 329
329 330
330 slot = r10_bio->read_slot; 331 slot = r10_bio->read_slot;
331 dev = r10_bio->devs[slot].devnum; 332 dev = r10_bio->devs[slot].devnum;
333 rdev = r10_bio->devs[slot].rdev;
332 /* 334 /*
333 * this branch is our 'one mirror IO has finished' event handler: 335 * this branch is our 'one mirror IO has finished' event handler:
334 */ 336 */
@@ -346,7 +348,7 @@ static void raid10_end_read_request(struct bio *bio, int error)
346 */ 348 */
347 set_bit(R10BIO_Uptodate, &r10_bio->state); 349 set_bit(R10BIO_Uptodate, &r10_bio->state);
348 raid_end_bio_io(r10_bio); 350 raid_end_bio_io(r10_bio);
349 rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); 351 rdev_dec_pending(rdev, conf->mddev);
350 } else { 352 } else {
351 /* 353 /*
352 * oops, read error - keep the refcount on the rdev 354 * oops, read error - keep the refcount on the rdev
@@ -355,7 +357,7 @@ static void raid10_end_read_request(struct bio *bio, int error)
355 printk_ratelimited(KERN_ERR 357 printk_ratelimited(KERN_ERR
356 "md/raid10:%s: %s: rescheduling sector %llu\n", 358 "md/raid10:%s: %s: rescheduling sector %llu\n",
357 mdname(conf->mddev), 359 mdname(conf->mddev),
358 bdevname(conf->mirrors[dev].rdev->bdev, b), 360 bdevname(rdev->bdev, b),
359 (unsigned long long)r10_bio->sector); 361 (unsigned long long)r10_bio->sector);
360 set_bit(R10BIO_ReadError, &r10_bio->state); 362 set_bit(R10BIO_ReadError, &r10_bio->state);
361 reschedule_retry(r10_bio); 363 reschedule_retry(r10_bio);
@@ -599,7 +601,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
599 int sectors = r10_bio->sectors; 601 int sectors = r10_bio->sectors;
600 int best_good_sectors; 602 int best_good_sectors;
601 sector_t new_distance, best_dist; 603 sector_t new_distance, best_dist;
602 struct md_rdev *rdev; 604 struct md_rdev *rdev, *best_rdev;
603 int do_balance; 605 int do_balance;
604 int best_slot; 606 int best_slot;
605 607
@@ -608,6 +610,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
608retry: 610retry:
609 sectors = r10_bio->sectors; 611 sectors = r10_bio->sectors;
610 best_slot = -1; 612 best_slot = -1;
613 best_rdev = NULL;
611 best_dist = MaxSector; 614 best_dist = MaxSector;
612 best_good_sectors = 0; 615 best_good_sectors = 0;
613 do_balance = 1; 616 do_balance = 1;
@@ -629,10 +632,16 @@ retry:
629 if (r10_bio->devs[slot].bio == IO_BLOCKED) 632 if (r10_bio->devs[slot].bio == IO_BLOCKED)
630 continue; 633 continue;
631 disk = r10_bio->devs[slot].devnum; 634 disk = r10_bio->devs[slot].devnum;
632 rdev = rcu_dereference(conf->mirrors[disk].rdev); 635 rdev = rcu_dereference(conf->mirrors[disk].replacement);
636 if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
637 r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
638 rdev = rcu_dereference(conf->mirrors[disk].rdev);
633 if (rdev == NULL) 639 if (rdev == NULL)
634 continue; 640 continue;
635 if (!test_bit(In_sync, &rdev->flags)) 641 if (test_bit(Faulty, &rdev->flags))
642 continue;
643 if (!test_bit(In_sync, &rdev->flags) &&
644 r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
636 continue; 645 continue;
637 646
638 dev_sector = r10_bio->devs[slot].addr; 647 dev_sector = r10_bio->devs[slot].addr;
@@ -657,6 +666,7 @@ retry:
657 if (good_sectors > best_good_sectors) { 666 if (good_sectors > best_good_sectors) {
658 best_good_sectors = good_sectors; 667 best_good_sectors = good_sectors;
659 best_slot = slot; 668 best_slot = slot;
669 best_rdev = rdev;
660 } 670 }
661 if (!do_balance) 671 if (!do_balance)
662 /* Must read from here */ 672 /* Must read from here */
@@ -685,16 +695,15 @@ retry:
685 if (new_distance < best_dist) { 695 if (new_distance < best_dist) {
686 best_dist = new_distance; 696 best_dist = new_distance;
687 best_slot = slot; 697 best_slot = slot;
698 best_rdev = rdev;
688 } 699 }
689 } 700 }
690 if (slot == conf->copies) 701 if (slot >= conf->copies) {
691 slot = best_slot; 702 slot = best_slot;
703 rdev = best_rdev;
704 }
692 705
693 if (slot >= 0) { 706 if (slot >= 0) {
694 disk = r10_bio->devs[slot].devnum;
695 rdev = rcu_dereference(conf->mirrors[disk].rdev);
696 if (!rdev)
697 goto retry;
698 atomic_inc(&rdev->nr_pending); 707 atomic_inc(&rdev->nr_pending);
699 if (test_bit(Faulty, &rdev->flags)) { 708 if (test_bit(Faulty, &rdev->flags)) {
700 /* Cannot risk returning a device that failed 709 /* Cannot risk returning a device that failed
@@ -990,6 +999,7 @@ read_again:
990 max_sectors); 999 max_sectors);
991 1000
992 r10_bio->devs[slot].bio = read_bio; 1001 r10_bio->devs[slot].bio = read_bio;
1002 r10_bio->devs[slot].rdev = rdev;
993 1003
994 read_bio->bi_sector = r10_bio->devs[slot].addr + 1004 read_bio->bi_sector = r10_bio->devs[slot].addr +
995 rdev->data_offset; 1005 rdev->data_offset;
@@ -2088,10 +2098,9 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
2088static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) 2098static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
2089{ 2099{
2090 int slot = r10_bio->read_slot; 2100 int slot = r10_bio->read_slot;
2091 int mirror = r10_bio->devs[slot].devnum;
2092 struct bio *bio; 2101 struct bio *bio;
2093 struct r10conf *conf = mddev->private; 2102 struct r10conf *conf = mddev->private;
2094 struct md_rdev *rdev; 2103 struct md_rdev *rdev = r10_bio->devs[slot].rdev;
2095 char b[BDEVNAME_SIZE]; 2104 char b[BDEVNAME_SIZE];
2096 unsigned long do_sync; 2105 unsigned long do_sync;
2097 int max_sectors; 2106 int max_sectors;
@@ -2109,7 +2118,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
2109 fix_read_error(conf, mddev, r10_bio); 2118 fix_read_error(conf, mddev, r10_bio);
2110 unfreeze_array(conf); 2119 unfreeze_array(conf);
2111 } 2120 }
2112 rdev_dec_pending(conf->mirrors[mirror].rdev, mddev); 2121 rdev_dec_pending(rdev, mddev);
2113 2122
2114 bio = r10_bio->devs[slot].bio; 2123 bio = r10_bio->devs[slot].bio;
2115 bdevname(bio->bi_bdev, b); 2124 bdevname(bio->bi_bdev, b);
@@ -2144,6 +2153,7 @@ read_more:
2144 r10_bio->sector - bio->bi_sector, 2153 r10_bio->sector - bio->bi_sector,
2145 max_sectors); 2154 max_sectors);
2146 r10_bio->devs[slot].bio = bio; 2155 r10_bio->devs[slot].bio = bio;
2156 r10_bio->devs[slot].rdev = rdev;
2147 bio->bi_sector = r10_bio->devs[slot].addr 2157 bio->bi_sector = r10_bio->devs[slot].addr
2148 + rdev->data_offset; 2158 + rdev->data_offset;
2149 bio->bi_bdev = rdev->bdev; 2159 bio->bi_bdev = rdev->bdev;