aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-05-11 00:34:56 -0400
committerNeilBrown <neilb@suse.de>2011-05-11 00:34:56 -0400
commit76073054c95b12af6bd0cc9b9462a265b45ba38f (patch)
tree78f830289dd8bb5337a7d3efa442ae44abd4dbab /drivers/md
parent56d9912106b0974ffb6dd264c80c7e816677e998 (diff)
md/raid1: clean up read_balance.
read_balance has two loops which both look for a 'best' device based on slightly different criteria. This is clumsy and makes is hard to add extra criteria. So replace it all with a single loop that combines everything. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid1.c83
1 files changed, 34 insertions, 49 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 2b7a7ff401d..f0b0c79b389 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -411,10 +411,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
411{ 411{
412 const sector_t this_sector = r1_bio->sector; 412 const sector_t this_sector = r1_bio->sector;
413 const int sectors = r1_bio->sectors; 413 const int sectors = r1_bio->sectors;
414 int new_disk = -1;
415 int start_disk; 414 int start_disk;
415 int best_disk;
416 int i; 416 int i;
417 sector_t new_distance, current_distance; 417 sector_t best_dist;
418 mdk_rdev_t *rdev; 418 mdk_rdev_t *rdev;
419 int choose_first; 419 int choose_first;
420 420
@@ -425,6 +425,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
425 * We take the first readable disk when above the resync window. 425 * We take the first readable disk when above the resync window.
426 */ 426 */
427 retry: 427 retry:
428 best_disk = -1;
429 best_dist = MaxSector;
428 if (conf->mddev->recovery_cp < MaxSector && 430 if (conf->mddev->recovery_cp < MaxSector &&
429 (this_sector + sectors >= conf->next_resync)) { 431 (this_sector + sectors >= conf->next_resync)) {
430 choose_first = 1; 432 choose_first = 1;
@@ -434,8 +436,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
434 start_disk = conf->last_used; 436 start_disk = conf->last_used;
435 } 437 }
436 438
437 /* make sure the disk is operational */
438 for (i = 0 ; i < conf->raid_disks ; i++) { 439 for (i = 0 ; i < conf->raid_disks ; i++) {
440 sector_t dist;
439 int disk = start_disk + i; 441 int disk = start_disk + i;
440 if (disk >= conf->raid_disks) 442 if (disk >= conf->raid_disks)
441 disk -= conf->raid_disks; 443 disk -= conf->raid_disks;
@@ -443,60 +445,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
443 rdev = rcu_dereference(conf->mirrors[disk].rdev); 445 rdev = rcu_dereference(conf->mirrors[disk].rdev);
444 if (r1_bio->bios[disk] == IO_BLOCKED 446 if (r1_bio->bios[disk] == IO_BLOCKED
445 || rdev == NULL 447 || rdev == NULL
446 || !test_bit(In_sync, &rdev->flags)) 448 || test_bit(Faulty, &rdev->flags))
447 continue; 449 continue;
448 450 if (!test_bit(In_sync, &rdev->flags) &&
449 new_disk = disk; 451 rdev->recovery_offset < this_sector + sectors)
450 if (!test_bit(WriteMostly, &rdev->flags))
451 break;
452 }
453
454 if (new_disk < 0 || choose_first)
455 goto rb_out;
456
457 /*
458 * Don't change to another disk for sequential reads:
459 */
460 if (conf->next_seq_sect == this_sector)
461 goto rb_out;
462 if (this_sector == conf->mirrors[new_disk].head_position)
463 goto rb_out;
464
465 current_distance = abs(this_sector
466 - conf->mirrors[new_disk].head_position);
467
468 /* look for a better disk - i.e. head is closer */
469 start_disk = new_disk;
470 for (i = 1; i < conf->raid_disks; i++) {
471 int disk = start_disk + 1;
472 if (disk >= conf->raid_disks)
473 disk -= conf->raid_disks;
474
475 rdev = rcu_dereference(conf->mirrors[disk].rdev);
476 if (r1_bio->bios[disk] == IO_BLOCKED
477 || rdev == NULL
478 || !test_bit(In_sync, &rdev->flags)
479 || test_bit(WriteMostly, &rdev->flags))
480 continue; 452 continue;
481 453 if (test_bit(WriteMostly, &rdev->flags)) {
482 if (!atomic_read(&rdev->nr_pending)) { 454 /* Don't balance among write-mostly, just
483 new_disk = disk; 455 * use the first as a last resort */
456 if (best_disk < 0)
457 best_disk = disk;
458 continue;
459 }
460 /* This is a reasonable device to use. It might
461 * even be best.
462 */
463 dist = abs(this_sector - conf->mirrors[disk].head_position);
464 if (choose_first
465 /* Don't change to another disk for sequential reads */
466 || conf->next_seq_sect == this_sector
467 || dist == 0
468 /* If device is idle, use it */
469 || atomic_read(&rdev->nr_pending) == 0) {
470 best_disk = disk;
484 break; 471 break;
485 } 472 }
486 new_distance = abs(this_sector - conf->mirrors[disk].head_position); 473 if (dist < best_dist) {
487 if (new_distance < current_distance) { 474 best_dist = dist;
488 current_distance = new_distance; 475 best_disk = disk;
489 new_disk = disk;
490 } 476 }
491 } 477 }
492 478
493 rb_out: 479 if (best_disk >= 0) {
494 if (new_disk >= 0) { 480 rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
495 rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
496 if (!rdev) 481 if (!rdev)
497 goto retry; 482 goto retry;
498 atomic_inc(&rdev->nr_pending); 483 atomic_inc(&rdev->nr_pending);
499 if (!test_bit(In_sync, &rdev->flags)) { 484 if (test_bit(Faulty, &rdev->flags)) {
500 /* cannot risk returning a device that failed 485 /* cannot risk returning a device that failed
501 * before we inc'ed nr_pending 486 * before we inc'ed nr_pending
502 */ 487 */
@@ -504,11 +489,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
504 goto retry; 489 goto retry;
505 } 490 }
506 conf->next_seq_sect = this_sector + sectors; 491 conf->next_seq_sect = this_sector + sectors;
507 conf->last_used = new_disk; 492 conf->last_used = best_disk;
508 } 493 }
509 rcu_read_unlock(); 494 rcu_read_unlock();
510 495
511 return new_disk; 496 return best_disk;
512} 497}
513 498
514static int raid1_congested(void *data, int bits) 499static int raid1_congested(void *data, int bits)