diff options
author | NeilBrown <neilb@suse.de> | 2011-10-11 01:50:01 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2011-10-11 01:50:01 -0400 |
commit | 34db0cd60f8a1f4ab73d118a8be3797c20388223 (patch) | |
tree | 45d1d629d6b9195d47d30980595686c9a3ee92f9 /drivers/md/raid1.c | |
parent | 84fc4b56db85cb9e05326424049973a2036c9940 (diff) |
md: add proper write-congestion reporting to RAID1 and RAID10.
RAID1 and RAID10 handle write requests by queuing them for handling by
a separate thread. This is because when a write-intent-bitmap is
active we might need to update the bitmap first, so it is good to
queue a lot of writes, then do one big bitmap update for them all.
However writeback request devices to appear to be congested after a
while so it can make some guesstimate of throughput. The infinite
queue defeats that (note that RAID5 has already has a finite queue so
it doesn't suffer from this problem).
So impose a limit on the number of pending write requests. By default
it is 1024 which seems to be generally suitable. Make it configurable
via module option just in case someone finds a regression.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r-- | drivers/md/raid1.c | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e023a25acf54..d8957d74fd25 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -45,6 +45,11 @@ | |||
45 | */ | 45 | */ |
46 | #define NR_RAID1_BIOS 256 | 46 | #define NR_RAID1_BIOS 256 |
47 | 47 | ||
48 | /* When there are this many requests queue to be written by | ||
49 | * the raid1 thread, we become 'congested' to provide back-pressure | ||
50 | * for writeback. | ||
51 | */ | ||
52 | static int max_queued_requests = 1024; | ||
48 | 53 | ||
49 | static void allow_barrier(struct r1conf *conf); | 54 | static void allow_barrier(struct r1conf *conf); |
50 | static void lower_barrier(struct r1conf *conf); | 55 | static void lower_barrier(struct r1conf *conf); |
@@ -598,6 +603,10 @@ int md_raid1_congested(struct mddev *mddev, int bits) | |||
598 | struct r1conf *conf = mddev->private; | 603 | struct r1conf *conf = mddev->private; |
599 | int i, ret = 0; | 604 | int i, ret = 0; |
600 | 605 | ||
606 | if ((bits & (1 << BDI_async_congested)) && | ||
607 | conf->pending_count >= max_queued_requests) | ||
608 | return 1; | ||
609 | |||
601 | rcu_read_lock(); | 610 | rcu_read_lock(); |
602 | for (i = 0; i < mddev->raid_disks; i++) { | 611 | for (i = 0; i < mddev->raid_disks; i++) { |
603 | struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); | 612 | struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); |
@@ -638,10 +647,12 @@ static void flush_pending_writes(struct r1conf *conf) | |||
638 | if (conf->pending_bio_list.head) { | 647 | if (conf->pending_bio_list.head) { |
639 | struct bio *bio; | 648 | struct bio *bio; |
640 | bio = bio_list_get(&conf->pending_bio_list); | 649 | bio = bio_list_get(&conf->pending_bio_list); |
650 | conf->pending_count = 0; | ||
641 | spin_unlock_irq(&conf->device_lock); | 651 | spin_unlock_irq(&conf->device_lock); |
642 | /* flush any pending bitmap writes to | 652 | /* flush any pending bitmap writes to |
643 | * disk before proceeding w/ I/O */ | 653 | * disk before proceeding w/ I/O */ |
644 | bitmap_unplug(conf->mddev->bitmap); | 654 | bitmap_unplug(conf->mddev->bitmap); |
655 | wake_up(&conf->wait_barrier); | ||
645 | 656 | ||
646 | while (bio) { /* submit pending writes */ | 657 | while (bio) { /* submit pending writes */ |
647 | struct bio *next = bio->bi_next; | 658 | struct bio *next = bio->bi_next; |
@@ -945,6 +956,11 @@ read_again: | |||
945 | /* | 956 | /* |
946 | * WRITE: | 957 | * WRITE: |
947 | */ | 958 | */ |
959 | if (conf->pending_count >= max_queued_requests) { | ||
960 | md_wakeup_thread(mddev->thread); | ||
961 | wait_event(conf->wait_barrier, | ||
962 | conf->pending_count < max_queued_requests); | ||
963 | } | ||
948 | /* first select target devices under rcu_lock and | 964 | /* first select target devices under rcu_lock and |
949 | * inc refcount on their rdev. Record them by setting | 965 | * inc refcount on their rdev. Record them by setting |
950 | * bios[x] to bio | 966 | * bios[x] to bio |
@@ -1108,6 +1124,7 @@ read_again: | |||
1108 | atomic_inc(&r1_bio->remaining); | 1124 | atomic_inc(&r1_bio->remaining); |
1109 | spin_lock_irqsave(&conf->device_lock, flags); | 1125 | spin_lock_irqsave(&conf->device_lock, flags); |
1110 | bio_list_add(&conf->pending_bio_list, mbio); | 1126 | bio_list_add(&conf->pending_bio_list, mbio); |
1127 | conf->pending_count++; | ||
1111 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1128 | spin_unlock_irqrestore(&conf->device_lock, flags); |
1112 | } | 1129 | } |
1113 | /* Mustn't call r1_bio_write_done before this next test, | 1130 | /* Mustn't call r1_bio_write_done before this next test, |
@@ -2418,6 +2435,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2418 | init_waitqueue_head(&conf->wait_barrier); | 2435 | init_waitqueue_head(&conf->wait_barrier); |
2419 | 2436 | ||
2420 | bio_list_init(&conf->pending_bio_list); | 2437 | bio_list_init(&conf->pending_bio_list); |
2438 | conf->pending_count = 0; | ||
2421 | 2439 | ||
2422 | conf->last_used = -1; | 2440 | conf->last_used = -1; |
2423 | for (i = 0; i < conf->raid_disks; i++) { | 2441 | for (i = 0; i < conf->raid_disks; i++) { |
@@ -2776,3 +2794,5 @@ MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD"); | |||
2776 | MODULE_ALIAS("md-personality-3"); /* RAID1 */ | 2794 | MODULE_ALIAS("md-personality-3"); /* RAID1 */ |
2777 | MODULE_ALIAS("md-raid1"); | 2795 | MODULE_ALIAS("md-raid1"); |
2778 | MODULE_ALIAS("md-level-1"); | 2796 | MODULE_ALIAS("md-level-1"); |
2797 | |||
2798 | module_param(max_queued_requests, int, S_IRUGO|S_IWUSR); | ||