aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/raid1.c34
1 files changed, 31 insertions, 3 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index fb96c0c2db40..d9869f25aa75 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -497,9 +497,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
497 const sector_t this_sector = r1_bio->sector; 497 const sector_t this_sector = r1_bio->sector;
498 int sectors; 498 int sectors;
499 int best_good_sectors; 499 int best_good_sectors;
500 int best_disk; 500 int best_disk, best_dist_disk, best_pending_disk;
501 int has_nonrot_disk;
501 int disk; 502 int disk;
502 sector_t best_dist; 503 sector_t best_dist;
504 unsigned int min_pending;
503 struct md_rdev *rdev; 505 struct md_rdev *rdev;
504 int choose_first; 506 int choose_first;
505 507
@@ -512,8 +514,12 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
512 retry: 514 retry:
513 sectors = r1_bio->sectors; 515 sectors = r1_bio->sectors;
514 best_disk = -1; 516 best_disk = -1;
517 best_dist_disk = -1;
515 best_dist = MaxSector; 518 best_dist = MaxSector;
519 best_pending_disk = -1;
520 min_pending = UINT_MAX;
516 best_good_sectors = 0; 521 best_good_sectors = 0;
522 has_nonrot_disk = 0;
517 523
518 if (conf->mddev->recovery_cp < MaxSector && 524 if (conf->mddev->recovery_cp < MaxSector &&
519 (this_sector + sectors >= conf->next_resync)) 525 (this_sector + sectors >= conf->next_resync))
@@ -525,6 +531,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
525 sector_t dist; 531 sector_t dist;
526 sector_t first_bad; 532 sector_t first_bad;
527 int bad_sectors; 533 int bad_sectors;
534 unsigned int pending;
528 535
529 rdev = rcu_dereference(conf->mirrors[disk].rdev); 536 rdev = rcu_dereference(conf->mirrors[disk].rdev);
530 if (r1_bio->bios[disk] == IO_BLOCKED 537 if (r1_bio->bios[disk] == IO_BLOCKED
@@ -583,22 +590,43 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
583 } else 590 } else
584 best_good_sectors = sectors; 591 best_good_sectors = sectors;
585 592
593 has_nonrot_disk |= blk_queue_nonrot(bdev_get_queue(rdev->bdev));
594 pending = atomic_read(&rdev->nr_pending);
586 dist = abs(this_sector - conf->mirrors[disk].head_position); 595 dist = abs(this_sector - conf->mirrors[disk].head_position);
587 if (choose_first 596 if (choose_first
588 /* Don't change to another disk for sequential reads */ 597 /* Don't change to another disk for sequential reads */
589 || conf->mirrors[disk].next_seq_sect == this_sector 598 || conf->mirrors[disk].next_seq_sect == this_sector
590 || dist == 0 599 || dist == 0
591 /* If device is idle, use it */ 600 /* If device is idle, use it */
592 || atomic_read(&rdev->nr_pending) == 0) { 601 || pending == 0) {
593 best_disk = disk; 602 best_disk = disk;
594 break; 603 break;
595 } 604 }
605
606 if (min_pending > pending) {
607 min_pending = pending;
608 best_pending_disk = disk;
609 }
610
596 if (dist < best_dist) { 611 if (dist < best_dist) {
597 best_dist = dist; 612 best_dist = dist;
598 best_disk = disk; 613 best_dist_disk = disk;
599 } 614 }
600 } 615 }
601 616
617 /*
618 * If all disks are rotational, choose the closest disk. If any disk is
619 * non-rotational, choose the disk with less pending request even the
620 * disk is rotational, which might/might not be optimal for raids with
621 * mixed ratation/non-rotational disks depending on workload.
622 */
623 if (best_disk == -1) {
624 if (has_nonrot_disk)
625 best_disk = best_pending_disk;
626 else
627 best_disk = best_dist_disk;
628 }
629
602 if (best_disk >= 0) { 630 if (best_disk >= 0) {
603 rdev = rcu_dereference(conf->mirrors[best_disk].rdev); 631 rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
604 if (!rdev) 632 if (!rdev)