diff options
-rw-r--r-- | drivers/md/raid1.c | 56 | ||||
-rw-r--r-- | drivers/md/raid1.h | 1 |
2 files changed, 50 insertions, 7 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index d9869f25aa7..7aa958ed284 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -504,6 +504,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
504 | unsigned int min_pending; | 504 | unsigned int min_pending; |
505 | struct md_rdev *rdev; | 505 | struct md_rdev *rdev; |
506 | int choose_first; | 506 | int choose_first; |
507 | int choose_next_idle; | ||
507 | 508 | ||
508 | rcu_read_lock(); | 509 | rcu_read_lock(); |
509 | /* | 510 | /* |
@@ -520,6 +521,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
520 | min_pending = UINT_MAX; | 521 | min_pending = UINT_MAX; |
521 | best_good_sectors = 0; | 522 | best_good_sectors = 0; |
522 | has_nonrot_disk = 0; | 523 | has_nonrot_disk = 0; |
524 | choose_next_idle = 0; | ||
523 | 525 | ||
524 | if (conf->mddev->recovery_cp < MaxSector && | 526 | if (conf->mddev->recovery_cp < MaxSector && |
525 | (this_sector + sectors >= conf->next_resync)) | 527 | (this_sector + sectors >= conf->next_resync)) |
@@ -532,6 +534,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
532 | sector_t first_bad; | 534 | sector_t first_bad; |
533 | int bad_sectors; | 535 | int bad_sectors; |
534 | unsigned int pending; | 536 | unsigned int pending; |
537 | bool nonrot; | ||
535 | 538 | ||
536 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | 539 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
537 | if (r1_bio->bios[disk] == IO_BLOCKED | 540 | if (r1_bio->bios[disk] == IO_BLOCKED |
@@ -590,18 +593,52 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
590 | } else | 593 | } else |
591 | best_good_sectors = sectors; | 594 | best_good_sectors = sectors; |
592 | 595 | ||
593 | has_nonrot_disk |= blk_queue_nonrot(bdev_get_queue(rdev->bdev)); | 596 | nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev)); |
597 | has_nonrot_disk |= nonrot; | ||
594 | pending = atomic_read(&rdev->nr_pending); | 598 | pending = atomic_read(&rdev->nr_pending); |
595 | dist = abs(this_sector - conf->mirrors[disk].head_position); | 599 | dist = abs(this_sector - conf->mirrors[disk].head_position); |
596 | if (choose_first | 600 | if (choose_first) { |
597 | /* Don't change to another disk for sequential reads */ | ||
598 | || conf->mirrors[disk].next_seq_sect == this_sector | ||
599 | || dist == 0 | ||
600 | /* If device is idle, use it */ | ||
601 | || pending == 0) { | ||
602 | best_disk = disk; | 601 | best_disk = disk; |
603 | break; | 602 | break; |
604 | } | 603 | } |
604 | /* Don't change to another disk for sequential reads */ | ||
605 | if (conf->mirrors[disk].next_seq_sect == this_sector | ||
606 | || dist == 0) { | ||
607 | int opt_iosize = bdev_io_opt(rdev->bdev) >> 9; | ||
608 | struct raid1_info *mirror = &conf->mirrors[disk]; | ||
609 | |||
610 | best_disk = disk; | ||
611 | /* | ||
612 | * If buffered sequential IO size exceeds optimal | ||
613 | * iosize, check if there is idle disk. If yes, choose | ||
614 | * the idle disk. read_balance could already choose an | ||
615 | * idle disk before noticing it's a sequential IO in | ||
616 | * this disk. This doesn't matter because this disk | ||
617 | * will idle, next time it will be utilized after the | ||
618 | * first disk has IO size exceeds optimal iosize. In | ||
619 | * this way, iosize of the first disk will be optimal | ||
620 | * iosize at least. iosize of the second disk might be | ||
621 | * small, but not a big deal since when the second disk | ||
622 | * starts IO, the first disk is likely still busy. | ||
623 | */ | ||
624 | if (nonrot && opt_iosize > 0 && | ||
625 | mirror->seq_start != MaxSector && | ||
626 | mirror->next_seq_sect > opt_iosize && | ||
627 | mirror->next_seq_sect - opt_iosize >= | ||
628 | mirror->seq_start) { | ||
629 | choose_next_idle = 1; | ||
630 | continue; | ||
631 | } | ||
632 | break; | ||
633 | } | ||
634 | /* If device is idle, use it */ | ||
635 | if (pending == 0) { | ||
636 | best_disk = disk; | ||
637 | break; | ||
638 | } | ||
639 | |||
640 | if (choose_next_idle) | ||
641 | continue; | ||
605 | 642 | ||
606 | if (min_pending > pending) { | 643 | if (min_pending > pending) { |
607 | min_pending = pending; | 644 | min_pending = pending; |
@@ -640,6 +677,10 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
640 | goto retry; | 677 | goto retry; |
641 | } | 678 | } |
642 | sectors = best_good_sectors; | 679 | sectors = best_good_sectors; |
680 | |||
681 | if (conf->mirrors[best_disk].next_seq_sect != this_sector) | ||
682 | conf->mirrors[best_disk].seq_start = this_sector; | ||
683 | |||
643 | conf->mirrors[best_disk].next_seq_sect = this_sector + sectors; | 684 | conf->mirrors[best_disk].next_seq_sect = this_sector + sectors; |
644 | } | 685 | } |
645 | rcu_read_unlock(); | 686 | rcu_read_unlock(); |
@@ -2605,6 +2646,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2605 | mddev->merge_check_needed = 1; | 2646 | mddev->merge_check_needed = 1; |
2606 | 2647 | ||
2607 | disk->head_position = 0; | 2648 | disk->head_position = 0; |
2649 | disk->seq_start = MaxSector; | ||
2608 | } | 2650 | } |
2609 | conf->raid_disks = mddev->raid_disks; | 2651 | conf->raid_disks = mddev->raid_disks; |
2610 | conf->mddev = mddev; | 2652 | conf->mddev = mddev; |
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 3770b4a2766..0ff3715fb7e 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h | |||
@@ -9,6 +9,7 @@ struct raid1_info { | |||
9 | * we try to keep sequential reads one the same device | 9 | * we try to keep sequential reads one the same device |
10 | */ | 10 | */ |
11 | sector_t next_seq_sect; | 11 | sector_t next_seq_sect; |
12 | sector_t seq_start; | ||
12 | }; | 13 | }; |
13 | 14 | ||
14 | /* | 15 | /* |