diff options
| -rw-r--r-- | drivers/md/raid1.c | 34 |
1 files changed, 31 insertions, 3 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fb96c0c2db40..d9869f25aa75 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -497,9 +497,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
| 497 | const sector_t this_sector = r1_bio->sector; | 497 | const sector_t this_sector = r1_bio->sector; |
| 498 | int sectors; | 498 | int sectors; |
| 499 | int best_good_sectors; | 499 | int best_good_sectors; |
| 500 | int best_disk; | 500 | int best_disk, best_dist_disk, best_pending_disk; |
| 501 | int has_nonrot_disk; | ||
| 501 | int disk; | 502 | int disk; |
| 502 | sector_t best_dist; | 503 | sector_t best_dist; |
| 504 | unsigned int min_pending; | ||
| 503 | struct md_rdev *rdev; | 505 | struct md_rdev *rdev; |
| 504 | int choose_first; | 506 | int choose_first; |
| 505 | 507 | ||
| @@ -512,8 +514,12 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
| 512 | retry: | 514 | retry: |
| 513 | sectors = r1_bio->sectors; | 515 | sectors = r1_bio->sectors; |
| 514 | best_disk = -1; | 516 | best_disk = -1; |
| 517 | best_dist_disk = -1; | ||
| 515 | best_dist = MaxSector; | 518 | best_dist = MaxSector; |
| 519 | best_pending_disk = -1; | ||
| 520 | min_pending = UINT_MAX; | ||
| 516 | best_good_sectors = 0; | 521 | best_good_sectors = 0; |
| 522 | has_nonrot_disk = 0; | ||
| 517 | 523 | ||
| 518 | if (conf->mddev->recovery_cp < MaxSector && | 524 | if (conf->mddev->recovery_cp < MaxSector && |
| 519 | (this_sector + sectors >= conf->next_resync)) | 525 | (this_sector + sectors >= conf->next_resync)) |
| @@ -525,6 +531,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
| 525 | sector_t dist; | 531 | sector_t dist; |
| 526 | sector_t first_bad; | 532 | sector_t first_bad; |
| 527 | int bad_sectors; | 533 | int bad_sectors; |
| 534 | unsigned int pending; | ||
| 528 | 535 | ||
| 529 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | 536 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
| 530 | if (r1_bio->bios[disk] == IO_BLOCKED | 537 | if (r1_bio->bios[disk] == IO_BLOCKED |
| @@ -583,22 +590,43 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
| 583 | } else | 590 | } else |
| 584 | best_good_sectors = sectors; | 591 | best_good_sectors = sectors; |
| 585 | 592 | ||
| 593 | has_nonrot_disk |= blk_queue_nonrot(bdev_get_queue(rdev->bdev)); | ||
| 594 | pending = atomic_read(&rdev->nr_pending); | ||
| 586 | dist = abs(this_sector - conf->mirrors[disk].head_position); | 595 | dist = abs(this_sector - conf->mirrors[disk].head_position); |
| 587 | if (choose_first | 596 | if (choose_first |
| 588 | /* Don't change to another disk for sequential reads */ | 597 | /* Don't change to another disk for sequential reads */ |
| 589 | || conf->mirrors[disk].next_seq_sect == this_sector | 598 | || conf->mirrors[disk].next_seq_sect == this_sector |
| 590 | || dist == 0 | 599 | || dist == 0 |
| 591 | /* If device is idle, use it */ | 600 | /* If device is idle, use it */ |
| 592 | || atomic_read(&rdev->nr_pending) == 0) { | 601 | || pending == 0) { |
| 593 | best_disk = disk; | 602 | best_disk = disk; |
| 594 | break; | 603 | break; |
| 595 | } | 604 | } |
| 605 | |||
| 606 | if (min_pending > pending) { | ||
| 607 | min_pending = pending; | ||
| 608 | best_pending_disk = disk; | ||
| 609 | } | ||
| 610 | |||
| 596 | if (dist < best_dist) { | 611 | if (dist < best_dist) { |
| 597 | best_dist = dist; | 612 | best_dist = dist; |
| 598 | best_disk = disk; | 613 | best_dist_disk = disk; |
| 599 | } | 614 | } |
| 600 | } | 615 | } |
| 601 | 616 | ||
| 617 | /* | ||
| 618 | * If all disks are rotational, choose the closest disk. If any disk is | ||
| 619 | * non-rotational, choose the disk with less pending request even the | ||
| 620 | * disk is rotational, which might/might not be optimal for raids with | ||
| 621 | * mixed ratation/non-rotational disks depending on workload. | ||
| 622 | */ | ||
| 623 | if (best_disk == -1) { | ||
| 624 | if (has_nonrot_disk) | ||
| 625 | best_disk = best_pending_disk; | ||
| 626 | else | ||
| 627 | best_disk = best_dist_disk; | ||
| 628 | } | ||
| 629 | |||
| 602 | if (best_disk >= 0) { | 630 | if (best_disk >= 0) { |
| 603 | rdev = rcu_dereference(conf->mirrors[best_disk].rdev); | 631 | rdev = rcu_dereference(conf->mirrors[best_disk].rdev); |
| 604 | if (!rdev) | 632 | if (!rdev) |
