diff options
| -rw-r--r-- | drivers/md/raid1.c | 56 | ||||
| -rw-r--r-- | drivers/md/raid1.h | 1 |
2 files changed, 50 insertions, 7 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index d9869f25aa75..7aa958ed2847 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -504,6 +504,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
| 504 | unsigned int min_pending; | 504 | unsigned int min_pending; |
| 505 | struct md_rdev *rdev; | 505 | struct md_rdev *rdev; |
| 506 | int choose_first; | 506 | int choose_first; |
| 507 | int choose_next_idle; | ||
| 507 | 508 | ||
| 508 | rcu_read_lock(); | 509 | rcu_read_lock(); |
| 509 | /* | 510 | /* |
| @@ -520,6 +521,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
| 520 | min_pending = UINT_MAX; | 521 | min_pending = UINT_MAX; |
| 521 | best_good_sectors = 0; | 522 | best_good_sectors = 0; |
| 522 | has_nonrot_disk = 0; | 523 | has_nonrot_disk = 0; |
| 524 | choose_next_idle = 0; | ||
| 523 | 525 | ||
| 524 | if (conf->mddev->recovery_cp < MaxSector && | 526 | if (conf->mddev->recovery_cp < MaxSector && |
| 525 | (this_sector + sectors >= conf->next_resync)) | 527 | (this_sector + sectors >= conf->next_resync)) |
| @@ -532,6 +534,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
| 532 | sector_t first_bad; | 534 | sector_t first_bad; |
| 533 | int bad_sectors; | 535 | int bad_sectors; |
| 534 | unsigned int pending; | 536 | unsigned int pending; |
| 537 | bool nonrot; | ||
| 535 | 538 | ||
| 536 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | 539 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
| 537 | if (r1_bio->bios[disk] == IO_BLOCKED | 540 | if (r1_bio->bios[disk] == IO_BLOCKED |
| @@ -590,18 +593,52 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
| 590 | } else | 593 | } else |
| 591 | best_good_sectors = sectors; | 594 | best_good_sectors = sectors; |
| 592 | 595 | ||
| 593 | has_nonrot_disk |= blk_queue_nonrot(bdev_get_queue(rdev->bdev)); | 596 | nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev)); |
| 597 | has_nonrot_disk |= nonrot; | ||
| 594 | pending = atomic_read(&rdev->nr_pending); | 598 | pending = atomic_read(&rdev->nr_pending); |
| 595 | dist = abs(this_sector - conf->mirrors[disk].head_position); | 599 | dist = abs(this_sector - conf->mirrors[disk].head_position); |
| 596 | if (choose_first | 600 | if (choose_first) { |
| 597 | /* Don't change to another disk for sequential reads */ | ||
| 598 | || conf->mirrors[disk].next_seq_sect == this_sector | ||
| 599 | || dist == 0 | ||
| 600 | /* If device is idle, use it */ | ||
| 601 | || pending == 0) { | ||
| 602 | best_disk = disk; | 601 | best_disk = disk; |
| 603 | break; | 602 | break; |
| 604 | } | 603 | } |
| 604 | /* Don't change to another disk for sequential reads */ | ||
| 605 | if (conf->mirrors[disk].next_seq_sect == this_sector | ||
| 606 | || dist == 0) { | ||
| 607 | int opt_iosize = bdev_io_opt(rdev->bdev) >> 9; | ||
| 608 | struct raid1_info *mirror = &conf->mirrors[disk]; | ||
| 609 | |||
| 610 | best_disk = disk; | ||
| 611 | /* | ||
| 612 | * If buffered sequential IO size exceeds optimal | ||
| 613 | * iosize, check if there is idle disk. If yes, choose | ||
| 614 | * the idle disk. read_balance could already choose an | ||
| 615 | * idle disk before noticing it's a sequential IO in | ||
| 616 | * this disk. This doesn't matter because this disk | ||
| 617 | * will idle, next time it will be utilized after the | ||
| 618 | * first disk has IO size exceeds optimal iosize. In | ||
| 619 | * this way, iosize of the first disk will be optimal | ||
| 620 | * iosize at least. iosize of the second disk might be | ||
| 621 | * small, but not a big deal since when the second disk | ||
| 622 | * starts IO, the first disk is likely still busy. | ||
| 623 | */ | ||
| 624 | if (nonrot && opt_iosize > 0 && | ||
| 625 | mirror->seq_start != MaxSector && | ||
| 626 | mirror->next_seq_sect > opt_iosize && | ||
| 627 | mirror->next_seq_sect - opt_iosize >= | ||
| 628 | mirror->seq_start) { | ||
| 629 | choose_next_idle = 1; | ||
| 630 | continue; | ||
| 631 | } | ||
| 632 | break; | ||
| 633 | } | ||
| 634 | /* If device is idle, use it */ | ||
| 635 | if (pending == 0) { | ||
| 636 | best_disk = disk; | ||
| 637 | break; | ||
| 638 | } | ||
| 639 | |||
| 640 | if (choose_next_idle) | ||
| 641 | continue; | ||
| 605 | 642 | ||
| 606 | if (min_pending > pending) { | 643 | if (min_pending > pending) { |
| 607 | min_pending = pending; | 644 | min_pending = pending; |
| @@ -640,6 +677,10 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
| 640 | goto retry; | 677 | goto retry; |
| 641 | } | 678 | } |
| 642 | sectors = best_good_sectors; | 679 | sectors = best_good_sectors; |
| 680 | |||
| 681 | if (conf->mirrors[best_disk].next_seq_sect != this_sector) | ||
| 682 | conf->mirrors[best_disk].seq_start = this_sector; | ||
| 683 | |||
| 643 | conf->mirrors[best_disk].next_seq_sect = this_sector + sectors; | 684 | conf->mirrors[best_disk].next_seq_sect = this_sector + sectors; |
| 644 | } | 685 | } |
| 645 | rcu_read_unlock(); | 686 | rcu_read_unlock(); |
| @@ -2605,6 +2646,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
| 2605 | mddev->merge_check_needed = 1; | 2646 | mddev->merge_check_needed = 1; |
| 2606 | 2647 | ||
| 2607 | disk->head_position = 0; | 2648 | disk->head_position = 0; |
| 2649 | disk->seq_start = MaxSector; | ||
| 2608 | } | 2650 | } |
| 2609 | conf->raid_disks = mddev->raid_disks; | 2651 | conf->raid_disks = mddev->raid_disks; |
| 2610 | conf->mddev = mddev; | 2652 | conf->mddev = mddev; |
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 3770b4a27662..0ff3715fb7eb 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h | |||
| @@ -9,6 +9,7 @@ struct raid1_info { | |||
| 9 | * we try to keep sequential reads one the same device | 9 | * we try to keep sequential reads one the same device |
| 10 | */ | 10 | */ |
| 11 | sector_t next_seq_sect; | 11 | sector_t next_seq_sect; |
| 12 | sector_t seq_start; | ||
| 12 | }; | 13 | }; |
| 13 | 14 | ||
| 14 | /* | 15 | /* |
