aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorShaohua Li <shli@kernel.org>2012-07-30 20:03:53 -0400
committerNeilBrown <neilb@suse.de>2012-07-30 20:03:53 -0400
commitbe4d3280b17bc51f23ec6ebb345728f302f80a0c (patch)
tree1fc4e6d04fd57667b6b496ae6165d08f59405743 /drivers/md
parentcc4d1efdd017083bbcbaf23feb4cdc717fa7dab8 (diff)
md/raid1: make sequential read detection per disk based
Currently the sequential read detection is global wide. It's natural to make it per disk based, which can improve the detection for concurrent multiple sequential reads. And next patch will make SSD read balance not use distance based algorithm, where this change help detect truly sequential read for SSD. Signed-off-by: Shaohua Li <shli@fusionio.com> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid1.c35
-rw-r--r--drivers/md/raid1.h11
2 files changed, 12 insertions, 34 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index d3d3568b4fb1..fb96c0c2db40 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -497,9 +497,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
497 const sector_t this_sector = r1_bio->sector; 497 const sector_t this_sector = r1_bio->sector;
498 int sectors; 498 int sectors;
499 int best_good_sectors; 499 int best_good_sectors;
500 int start_disk;
501 int best_disk; 500 int best_disk;
502 int i; 501 int disk;
503 sector_t best_dist; 502 sector_t best_dist;
504 struct md_rdev *rdev; 503 struct md_rdev *rdev;
505 int choose_first; 504 int choose_first;
@@ -517,23 +516,16 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
517 best_good_sectors = 0; 516 best_good_sectors = 0;
518 517
519 if (conf->mddev->recovery_cp < MaxSector && 518 if (conf->mddev->recovery_cp < MaxSector &&
520 (this_sector + sectors >= conf->next_resync)) { 519 (this_sector + sectors >= conf->next_resync))
521 choose_first = 1; 520 choose_first = 1;
522 start_disk = 0; 521 else
523 } else {
524 choose_first = 0; 522 choose_first = 0;
525 start_disk = conf->last_used;
526 }
527 523
528 for (i = 0 ; i < conf->raid_disks * 2 ; i++) { 524 for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
529 sector_t dist; 525 sector_t dist;
530 sector_t first_bad; 526 sector_t first_bad;
531 int bad_sectors; 527 int bad_sectors;
532 528
533 int disk = start_disk + i;
534 if (disk >= conf->raid_disks * 2)
535 disk -= conf->raid_disks * 2;
536
537 rdev = rcu_dereference(conf->mirrors[disk].rdev); 529 rdev = rcu_dereference(conf->mirrors[disk].rdev);
538 if (r1_bio->bios[disk] == IO_BLOCKED 530 if (r1_bio->bios[disk] == IO_BLOCKED
539 || rdev == NULL 531 || rdev == NULL
@@ -594,7 +586,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
594 dist = abs(this_sector - conf->mirrors[disk].head_position); 586 dist = abs(this_sector - conf->mirrors[disk].head_position);
595 if (choose_first 587 if (choose_first
596 /* Don't change to another disk for sequential reads */ 588 /* Don't change to another disk for sequential reads */
597 || conf->next_seq_sect == this_sector 589 || conf->mirrors[disk].next_seq_sect == this_sector
598 || dist == 0 590 || dist == 0
599 /* If device is idle, use it */ 591 /* If device is idle, use it */
600 || atomic_read(&rdev->nr_pending) == 0) { 592 || atomic_read(&rdev->nr_pending) == 0) {
@@ -620,8 +612,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
620 goto retry; 612 goto retry;
621 } 613 }
622 sectors = best_good_sectors; 614 sectors = best_good_sectors;
623 conf->next_seq_sect = this_sector + sectors; 615 conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
624 conf->last_used = best_disk;
625 } 616 }
626 rcu_read_unlock(); 617 rcu_read_unlock();
627 *max_sectors = sectors; 618 *max_sectors = sectors;
@@ -2599,7 +2590,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2599 conf->recovery_disabled = mddev->recovery_disabled - 1; 2590 conf->recovery_disabled = mddev->recovery_disabled - 1;
2600 2591
2601 err = -EIO; 2592 err = -EIO;
2602 conf->last_used = -1;
2603 for (i = 0; i < conf->raid_disks * 2; i++) { 2593 for (i = 0; i < conf->raid_disks * 2; i++) {
2604 2594
2605 disk = conf->mirrors + i; 2595 disk = conf->mirrors + i;
@@ -2625,19 +2615,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2625 if (disk->rdev && 2615 if (disk->rdev &&
2626 (disk->rdev->saved_raid_disk < 0)) 2616 (disk->rdev->saved_raid_disk < 0))
2627 conf->fullsync = 1; 2617 conf->fullsync = 1;
2628 } else if (conf->last_used < 0) 2618 }
2629 /*
2630 * The first working device is used as a
2631 * starting point to read balancing.
2632 */
2633 conf->last_used = i;
2634 } 2619 }
2635 2620
2636 if (conf->last_used < 0) {
2637 printk(KERN_ERR "md/raid1:%s: no operational mirrors\n",
2638 mdname(mddev));
2639 goto abort;
2640 }
2641 err = -ENOMEM; 2621 err = -ENOMEM;
2642 conf->thread = md_register_thread(raid1d, mddev, "raid1"); 2622 conf->thread = md_register_thread(raid1d, mddev, "raid1");
2643 if (!conf->thread) { 2623 if (!conf->thread) {
@@ -2894,7 +2874,6 @@ static int raid1_reshape(struct mddev *mddev)
2894 conf->raid_disks = mddev->raid_disks = raid_disks; 2874 conf->raid_disks = mddev->raid_disks = raid_disks;
2895 mddev->delta_disks = 0; 2875 mddev->delta_disks = 0;
2896 2876
2897 conf->last_used = 0; /* just make sure it is in-range */
2898 lower_barrier(conf); 2877 lower_barrier(conf);
2899 2878
2900 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 2879 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index 4e3613daaea2..3770b4a27662 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -4,6 +4,11 @@
4struct raid1_info { 4struct raid1_info {
5 struct md_rdev *rdev; 5 struct md_rdev *rdev;
6 sector_t head_position; 6 sector_t head_position;
7
8 /* When choose the best device for a read (read_balance())
9 * we try to keep sequential reads one the same device
10 */
11 sector_t next_seq_sect;
7}; 12};
8 13
9/* 14/*
@@ -29,12 +34,6 @@ struct r1conf {
29 */ 34 */
30 int raid_disks; 35 int raid_disks;
31 36
32 /* When choose the best device for a read (read_balance())
33 * we try to keep sequential reads one the same device
34 * using 'last_used' and 'next_seq_sect'
35 */
36 int last_used;
37 sector_t next_seq_sect;
38 /* During resync, read_balancing is only allowed on the part 37 /* During resync, read_balancing is only allowed on the part
39 * of the array that has been resynced. 'next_resync' tells us 38 * of the array that has been resynced. 'next_resync' tells us
40 * where that is. 39 * where that is.