diff options
author | NeilBrown <neilb@suse.de> | 2011-05-11 00:34:56 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2011-05-11 00:34:56 -0400 |
commit | 76073054c95b12af6bd0cc9b9462a265b45ba38f (patch) | |
tree | 78f830289dd8bb5337a7d3efa442ae44abd4dbab /drivers/md | |
parent | 56d9912106b0974ffb6dd264c80c7e816677e998 (diff) |
md/raid1: clean up read_balance.
read_balance has two loops which both look for a 'best'
device based on slightly different criteria.
This is clumsy and makes is hard to add extra criteria.
So replace it all with a single loop that combines everything.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid1.c | 83 |
1 files changed, 34 insertions, 49 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 2b7a7ff401dc..f0b0c79b3899 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -411,10 +411,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
411 | { | 411 | { |
412 | const sector_t this_sector = r1_bio->sector; | 412 | const sector_t this_sector = r1_bio->sector; |
413 | const int sectors = r1_bio->sectors; | 413 | const int sectors = r1_bio->sectors; |
414 | int new_disk = -1; | ||
415 | int start_disk; | 414 | int start_disk; |
415 | int best_disk; | ||
416 | int i; | 416 | int i; |
417 | sector_t new_distance, current_distance; | 417 | sector_t best_dist; |
418 | mdk_rdev_t *rdev; | 418 | mdk_rdev_t *rdev; |
419 | int choose_first; | 419 | int choose_first; |
420 | 420 | ||
@@ -425,6 +425,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
425 | * We take the first readable disk when above the resync window. | 425 | * We take the first readable disk when above the resync window. |
426 | */ | 426 | */ |
427 | retry: | 427 | retry: |
428 | best_disk = -1; | ||
429 | best_dist = MaxSector; | ||
428 | if (conf->mddev->recovery_cp < MaxSector && | 430 | if (conf->mddev->recovery_cp < MaxSector && |
429 | (this_sector + sectors >= conf->next_resync)) { | 431 | (this_sector + sectors >= conf->next_resync)) { |
430 | choose_first = 1; | 432 | choose_first = 1; |
@@ -434,8 +436,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
434 | start_disk = conf->last_used; | 436 | start_disk = conf->last_used; |
435 | } | 437 | } |
436 | 438 | ||
437 | /* make sure the disk is operational */ | ||
438 | for (i = 0 ; i < conf->raid_disks ; i++) { | 439 | for (i = 0 ; i < conf->raid_disks ; i++) { |
440 | sector_t dist; | ||
439 | int disk = start_disk + i; | 441 | int disk = start_disk + i; |
440 | if (disk >= conf->raid_disks) | 442 | if (disk >= conf->raid_disks) |
441 | disk -= conf->raid_disks; | 443 | disk -= conf->raid_disks; |
@@ -443,60 +445,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
443 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | 445 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
444 | if (r1_bio->bios[disk] == IO_BLOCKED | 446 | if (r1_bio->bios[disk] == IO_BLOCKED |
445 | || rdev == NULL | 447 | || rdev == NULL |
446 | || !test_bit(In_sync, &rdev->flags)) | 448 | || test_bit(Faulty, &rdev->flags)) |
447 | continue; | 449 | continue; |
448 | 450 | if (!test_bit(In_sync, &rdev->flags) && | |
449 | new_disk = disk; | 451 | rdev->recovery_offset < this_sector + sectors) |
450 | if (!test_bit(WriteMostly, &rdev->flags)) | ||
451 | break; | ||
452 | } | ||
453 | |||
454 | if (new_disk < 0 || choose_first) | ||
455 | goto rb_out; | ||
456 | |||
457 | /* | ||
458 | * Don't change to another disk for sequential reads: | ||
459 | */ | ||
460 | if (conf->next_seq_sect == this_sector) | ||
461 | goto rb_out; | ||
462 | if (this_sector == conf->mirrors[new_disk].head_position) | ||
463 | goto rb_out; | ||
464 | |||
465 | current_distance = abs(this_sector | ||
466 | - conf->mirrors[new_disk].head_position); | ||
467 | |||
468 | /* look for a better disk - i.e. head is closer */ | ||
469 | start_disk = new_disk; | ||
470 | for (i = 1; i < conf->raid_disks; i++) { | ||
471 | int disk = start_disk + 1; | ||
472 | if (disk >= conf->raid_disks) | ||
473 | disk -= conf->raid_disks; | ||
474 | |||
475 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | ||
476 | if (r1_bio->bios[disk] == IO_BLOCKED | ||
477 | || rdev == NULL | ||
478 | || !test_bit(In_sync, &rdev->flags) | ||
479 | || test_bit(WriteMostly, &rdev->flags)) | ||
480 | continue; | 452 | continue; |
481 | 453 | if (test_bit(WriteMostly, &rdev->flags)) { | |
482 | if (!atomic_read(&rdev->nr_pending)) { | 454 | /* Don't balance among write-mostly, just |
483 | new_disk = disk; | 455 | * use the first as a last resort */ |
456 | if (best_disk < 0) | ||
457 | best_disk = disk; | ||
458 | continue; | ||
459 | } | ||
460 | /* This is a reasonable device to use. It might | ||
461 | * even be best. | ||
462 | */ | ||
463 | dist = abs(this_sector - conf->mirrors[disk].head_position); | ||
464 | if (choose_first | ||
465 | /* Don't change to another disk for sequential reads */ | ||
466 | || conf->next_seq_sect == this_sector | ||
467 | || dist == 0 | ||
468 | /* If device is idle, use it */ | ||
469 | || atomic_read(&rdev->nr_pending) == 0) { | ||
470 | best_disk = disk; | ||
484 | break; | 471 | break; |
485 | } | 472 | } |
486 | new_distance = abs(this_sector - conf->mirrors[disk].head_position); | 473 | if (dist < best_dist) { |
487 | if (new_distance < current_distance) { | 474 | best_dist = dist; |
488 | current_distance = new_distance; | 475 | best_disk = disk; |
489 | new_disk = disk; | ||
490 | } | 476 | } |
491 | } | 477 | } |
492 | 478 | ||
493 | rb_out: | 479 | if (best_disk >= 0) { |
494 | if (new_disk >= 0) { | 480 | rdev = rcu_dereference(conf->mirrors[best_disk].rdev); |
495 | rdev = rcu_dereference(conf->mirrors[new_disk].rdev); | ||
496 | if (!rdev) | 481 | if (!rdev) |
497 | goto retry; | 482 | goto retry; |
498 | atomic_inc(&rdev->nr_pending); | 483 | atomic_inc(&rdev->nr_pending); |
499 | if (!test_bit(In_sync, &rdev->flags)) { | 484 | if (test_bit(Faulty, &rdev->flags)) { |
500 | /* cannot risk returning a device that failed | 485 | /* cannot risk returning a device that failed |
501 | * before we inc'ed nr_pending | 486 | * before we inc'ed nr_pending |
502 | */ | 487 | */ |
@@ -504,11 +489,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
504 | goto retry; | 489 | goto retry; |
505 | } | 490 | } |
506 | conf->next_seq_sect = this_sector + sectors; | 491 | conf->next_seq_sect = this_sector + sectors; |
507 | conf->last_used = new_disk; | 492 | conf->last_used = best_disk; |
508 | } | 493 | } |
509 | rcu_read_unlock(); | 494 | rcu_read_unlock(); |
510 | 495 | ||
511 | return new_disk; | 496 | return best_disk; |
512 | } | 497 | } |
513 | 498 | ||
514 | static int raid1_congested(void *data, int bits) | 499 | static int raid1_congested(void *data, int bits) |