aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2005-11-18 04:11:01 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-18 10:49:46 -0500
commitc0e485216de80046dd0d448b7cd530dbfc31402f (patch)
tree069326485a1919936231a5a4d64d6adb8105d3a2
parent34ef75f09f09611b62ae11ae36c3c7b0a6a8a9cb (diff)
[PATCH] md: fix is_mddev_idle calculation now that disk/sector accounting happens when request completes
md needs to monitor the rate of requests to its devices when doing resync/recovery so that it can back-off when there is non-resync IO. It does this by comparing resync IO, which it counts, with total IO which is taken from disk_stats. disk_stats were recently changed to account sectors when a request completes instead of when it is queued. This upsets md's calculations. We could do the sync_io accounting at the end of requests too, but that has problems. If an underlying device is an md array, the accounting will still be done when the request is submitted. This could be changed for some raid levels, but it cannot be changed for raid0 or linear without substantial code changes. So instead, we increase the error that is_mddev_idle allows, up to the maximum amount of resync IO that can be in flight at any time. The calculation is current fragile as each personality as different limits for in-flight resync. This should be fixed up. For now, this simple patch fixes the problem. Increasing the error margin decreases the sensitivity to non-resync IO. To partially compensate for this, the time to wait when non-resync IO is detected is increased so that less steady IO is required to keep the resync at bay. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/md.c17
1 files changed, 13 insertions, 4 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f3fed662f32e..78c7418478d6 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3846,11 +3846,20 @@ static int is_mddev_idle(mddev_t *mddev)
3846 curr_events = disk_stat_read(disk, sectors[0]) + 3846 curr_events = disk_stat_read(disk, sectors[0]) +
3847 disk_stat_read(disk, sectors[1]) - 3847 disk_stat_read(disk, sectors[1]) -
3848 atomic_read(&disk->sync_io); 3848 atomic_read(&disk->sync_io);
3849 /* Allow some slack between valud of curr_events and last_events, 3849 /* The difference between curr_events and last_events
3850 * as there are some uninteresting races. 3850 * will be affected by any new non-sync IO (making
3851 * curr_events bigger) and any difference in the amount of
3852 * in-flight syncio (making current_events bigger or smaller)
3853 * The amount in-flight is currently limited to
3854 * 32*64K in raid1/10 and 256*PAGE_SIZE in raid5/6
3855 * which is at most 4096 sectors.
3856 * These numbers are fairly fragile and should be made
3857 * more robust, probably by enforcing the
3858 * 'window size' that md_do_sync sort-of uses.
3859 *
3851 * Note: the following is an unsigned comparison. 3860 * Note: the following is an unsigned comparison.
3852 */ 3861 */
3853 if ((curr_events - rdev->last_events + 32) > 64) { 3862 if ((curr_events - rdev->last_events + 4096) > 8192) {
3854 rdev->last_events = curr_events; 3863 rdev->last_events = curr_events;
3855 idle = 0; 3864 idle = 0;
3856 } 3865 }
@@ -4109,7 +4118,7 @@ static void md_do_sync(mddev_t *mddev)
4109 if (currspeed > sysctl_speed_limit_min) { 4118 if (currspeed > sysctl_speed_limit_min) {
4110 if ((currspeed > sysctl_speed_limit_max) || 4119 if ((currspeed > sysctl_speed_limit_max) ||
4111 !is_mddev_idle(mddev)) { 4120 !is_mddev_idle(mddev)) {
4112 msleep(250); 4121 msleep(500);
4113 goto repeat; 4122 goto repeat;
4114 } 4123 }
4115 } 4124 }