aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid10.c110
1 files changed, 44 insertions, 66 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8e9462626ec..8ea0acad606 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -488,13 +488,19 @@ static int raid10_mergeable_bvec(struct request_queue *q,
488static int read_balance(conf_t *conf, r10bio_t *r10_bio) 488static int read_balance(conf_t *conf, r10bio_t *r10_bio)
489{ 489{
490 const sector_t this_sector = r10_bio->sector; 490 const sector_t this_sector = r10_bio->sector;
491 int disk, slot, nslot; 491 int disk, slot;
492 const int sectors = r10_bio->sectors; 492 const int sectors = r10_bio->sectors;
493 sector_t new_distance, current_distance; 493 sector_t new_distance, best_dist;
494 mdk_rdev_t *rdev; 494 mdk_rdev_t *rdev;
495 int do_balance;
496 int best_slot;
495 497
496 raid10_find_phys(conf, r10_bio); 498 raid10_find_phys(conf, r10_bio);
497 rcu_read_lock(); 499 rcu_read_lock();
500retry:
501 best_slot = -1;
502 best_dist = MaxSector;
503 do_balance = 1;
498 /* 504 /*
499 * Check if we can balance. We can balance on the whole 505 * Check if we can balance. We can balance on the whole
500 * device if no resync is going on (recovery is ok), or below 506 * device if no resync is going on (recovery is ok), or below
@@ -502,86 +508,58 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
502 * above the resync window. 508 * above the resync window.
503 */ 509 */
504 if (conf->mddev->recovery_cp < MaxSector 510 if (conf->mddev->recovery_cp < MaxSector
505 && (this_sector + sectors >= conf->next_resync)) { 511 && (this_sector + sectors >= conf->next_resync))
506 /* make sure that disk is operational */ 512 do_balance = 0;
507 slot = 0;
508 disk = r10_bio->devs[slot].devnum;
509
510 while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
511 r10_bio->devs[slot].bio == IO_BLOCKED ||
512 !test_bit(In_sync, &rdev->flags)) {
513 slot++;
514 if (slot == conf->copies) {
515 slot = 0;
516 disk = -1;
517 break;
518 }
519 disk = r10_bio->devs[slot].devnum;
520 }
521 goto rb_out;
522 }
523
524 513
525 /* make sure the disk is operational */ 514 for (slot = 0; slot < conf->copies ; slot++) {
526 slot = 0; 515 if (r10_bio->devs[slot].bio == IO_BLOCKED)
527 disk = r10_bio->devs[slot].devnum; 516 continue;
528 while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
529 r10_bio->devs[slot].bio == IO_BLOCKED ||
530 !test_bit(In_sync, &rdev->flags)) {
531 slot ++;
532 if (slot == conf->copies) {
533 disk = -1;
534 goto rb_out;
535 }
536 disk = r10_bio->devs[slot].devnum; 517 disk = r10_bio->devs[slot].devnum;
537 } 518 rdev = rcu_dereference(conf->mirrors[disk].rdev);
538 519 if (rdev == NULL)
539 520 continue;
540 current_distance = abs(r10_bio->devs[slot].addr - 521 if (!test_bit(In_sync, &rdev->flags))
541 conf->mirrors[disk].head_position);
542
543 /* Find the disk whose head is closest,
544 * or - for far > 1 - find the closest to partition beginning */
545
546 for (nslot = slot; nslot < conf->copies; nslot++) {
547 int ndisk = r10_bio->devs[nslot].devnum;
548
549
550 if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
551 r10_bio->devs[nslot].bio == IO_BLOCKED ||
552 !test_bit(In_sync, &rdev->flags))
553 continue; 522 continue;
554 523
524 if (!do_balance)
525 break;
526
555 /* This optimisation is debatable, and completely destroys 527 /* This optimisation is debatable, and completely destroys
556 * sequential read speed for 'far copies' arrays. So only 528 * sequential read speed for 'far copies' arrays. So only
557 * keep it for 'near' arrays, and review those later. 529 * keep it for 'near' arrays, and review those later.
558 */ 530 */
559 if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending)) { 531 if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending))
560 disk = ndisk;
561 slot = nslot;
562 break; 532 break;
563 }
564 533
565 /* for far > 1 always use the lowest address */ 534 /* for far > 1 always use the lowest address */
566 if (conf->far_copies > 1) 535 if (conf->far_copies > 1)
567 new_distance = r10_bio->devs[nslot].addr; 536 new_distance = r10_bio->devs[slot].addr;
568 else 537 else
569 new_distance = abs(r10_bio->devs[nslot].addr - 538 new_distance = abs(r10_bio->devs[slot].addr -
570 conf->mirrors[ndisk].head_position); 539 conf->mirrors[disk].head_position);
571 if (new_distance < current_distance) { 540 if (new_distance < best_dist) {
572 current_distance = new_distance; 541 best_dist = new_distance;
573 disk = ndisk; 542 best_slot = slot;
574 slot = nslot;
575 } 543 }
576 } 544 }
545 if (slot == conf->copies)
546 slot = best_slot;
577 547
578rb_out: 548 if (slot >= 0) {
579 r10_bio->read_slot = slot; 549 disk = r10_bio->devs[slot].devnum;
580/* conf->next_seq_sect = this_sector + sectors;*/ 550 rdev = rcu_dereference(conf->mirrors[disk].rdev);
581 551 if (!rdev)
582 if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL) 552 goto retry;
583 atomic_inc(&conf->mirrors[disk].rdev->nr_pending); 553 atomic_inc(&rdev->nr_pending);
584 else 554 if (test_bit(Faulty, &rdev->flags)) {
555 /* Cannot risk returning a device that failed
556 * before we inc'ed nr_pending
557 */
558 rdev_dec_pending(rdev, conf->mddev);
559 goto retry;
560 }
561 r10_bio->read_slot = slot;
562 } else
585 disk = -1; 563 disk = -1;
586 rcu_read_unlock(); 564 rcu_read_unlock();
587 565