aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid1.c56
-rw-r--r--drivers/md/raid1.h1
2 files changed, 50 insertions, 7 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index d9869f25aa7..7aa958ed284 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -504,6 +504,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
504 unsigned int min_pending; 504 unsigned int min_pending;
505 struct md_rdev *rdev; 505 struct md_rdev *rdev;
506 int choose_first; 506 int choose_first;
507 int choose_next_idle;
507 508
508 rcu_read_lock(); 509 rcu_read_lock();
509 /* 510 /*
@@ -520,6 +521,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
520 min_pending = UINT_MAX; 521 min_pending = UINT_MAX;
521 best_good_sectors = 0; 522 best_good_sectors = 0;
522 has_nonrot_disk = 0; 523 has_nonrot_disk = 0;
524 choose_next_idle = 0;
523 525
524 if (conf->mddev->recovery_cp < MaxSector && 526 if (conf->mddev->recovery_cp < MaxSector &&
525 (this_sector + sectors >= conf->next_resync)) 527 (this_sector + sectors >= conf->next_resync))
@@ -532,6 +534,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
532 sector_t first_bad; 534 sector_t first_bad;
533 int bad_sectors; 535 int bad_sectors;
534 unsigned int pending; 536 unsigned int pending;
537 bool nonrot;
535 538
536 rdev = rcu_dereference(conf->mirrors[disk].rdev); 539 rdev = rcu_dereference(conf->mirrors[disk].rdev);
537 if (r1_bio->bios[disk] == IO_BLOCKED 540 if (r1_bio->bios[disk] == IO_BLOCKED
@@ -590,18 +593,52 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
590 } else 593 } else
591 best_good_sectors = sectors; 594 best_good_sectors = sectors;
592 595
593 has_nonrot_disk |= blk_queue_nonrot(bdev_get_queue(rdev->bdev)); 596 nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev));
597 has_nonrot_disk |= nonrot;
594 pending = atomic_read(&rdev->nr_pending); 598 pending = atomic_read(&rdev->nr_pending);
595 dist = abs(this_sector - conf->mirrors[disk].head_position); 599 dist = abs(this_sector - conf->mirrors[disk].head_position);
596 if (choose_first 600 if (choose_first) {
597 /* Don't change to another disk for sequential reads */
598 || conf->mirrors[disk].next_seq_sect == this_sector
599 || dist == 0
600 /* If device is idle, use it */
601 || pending == 0) {
602 best_disk = disk; 601 best_disk = disk;
603 break; 602 break;
604 } 603 }
604 /* Don't change to another disk for sequential reads */
605 if (conf->mirrors[disk].next_seq_sect == this_sector
606 || dist == 0) {
607 int opt_iosize = bdev_io_opt(rdev->bdev) >> 9;
608 struct raid1_info *mirror = &conf->mirrors[disk];
609
610 best_disk = disk;
611 /*
612 * If buffered sequential IO size exceeds optimal
613 * iosize, check if there is idle disk. If yes, choose
614 * the idle disk. read_balance could already choose an
615 * idle disk before noticing it's a sequential IO in
616 * this disk. This doesn't matter because this disk
617 * will idle, next time it will be utilized after the
618 * first disk has IO size exceeds optimal iosize. In
619 * this way, iosize of the first disk will be optimal
620 * iosize at least. iosize of the second disk might be
621 * small, but not a big deal since when the second disk
622 * starts IO, the first disk is likely still busy.
623 */
624 if (nonrot && opt_iosize > 0 &&
625 mirror->seq_start != MaxSector &&
626 mirror->next_seq_sect > opt_iosize &&
627 mirror->next_seq_sect - opt_iosize >=
628 mirror->seq_start) {
629 choose_next_idle = 1;
630 continue;
631 }
632 break;
633 }
634 /* If device is idle, use it */
635 if (pending == 0) {
636 best_disk = disk;
637 break;
638 }
639
640 if (choose_next_idle)
641 continue;
605 642
606 if (min_pending > pending) { 643 if (min_pending > pending) {
607 min_pending = pending; 644 min_pending = pending;
@@ -640,6 +677,10 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
640 goto retry; 677 goto retry;
641 } 678 }
642 sectors = best_good_sectors; 679 sectors = best_good_sectors;
680
681 if (conf->mirrors[best_disk].next_seq_sect != this_sector)
682 conf->mirrors[best_disk].seq_start = this_sector;
683
643 conf->mirrors[best_disk].next_seq_sect = this_sector + sectors; 684 conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
644 } 685 }
645 rcu_read_unlock(); 686 rcu_read_unlock();
@@ -2605,6 +2646,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2605 mddev->merge_check_needed = 1; 2646 mddev->merge_check_needed = 1;
2606 2647
2607 disk->head_position = 0; 2648 disk->head_position = 0;
2649 disk->seq_start = MaxSector;
2608 } 2650 }
2609 conf->raid_disks = mddev->raid_disks; 2651 conf->raid_disks = mddev->raid_disks;
2610 conf->mddev = mddev; 2652 conf->mddev = mddev;
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index 3770b4a2766..0ff3715fb7e 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -9,6 +9,7 @@ struct raid1_info {
9 * we try to keep sequential reads one the same device 9 * we try to keep sequential reads one the same device
10 */ 10 */
11 sector_t next_seq_sect; 11 sector_t next_seq_sect;
12 sector_t seq_start;
12}; 13};
13 14
14/* 15/*