aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorWu Fengguang <fengguang.wu@intel.com>2011-08-26 17:53:24 -0400
committerWu Fengguang <fengguang.wu@intel.com>2011-10-03 09:08:57 -0400
commit7381131cbcf7e15d201a0ffd782a4698efe4e740 (patch)
tree83f00c40d0a3fcd41ff2e6681a5da70dd155628a /mm
parentbe3ffa276446e1b691a2bf84e7621e5a6fb49db9 (diff)
writeback: stabilize bdi->dirty_ratelimit
There are some imperfections in balanced_dirty_ratelimit. 1) large fluctuations The dirty_rate used for computing balanced_dirty_ratelimit is merely averaged in the past 200ms (very small comparing to the 3s estimation period for write_bw), which makes rather dispersed distribution of balanced_dirty_ratelimit. It's pretty hard to average out the singular points by increasing the estimation period. Considering that the averaging technique will introduce very undesirable time lags, I give it up totally. (btw, the 3s write_bw averaging time lag is much more acceptable because its impact is one-way and therefore won't lead to oscillations.) The more practical way is filtering -- most singular balanced_dirty_ratelimit points can be filtered out by remembering some prev_balanced_rate and prev_prev_balanced_rate. However the more reliable way is to guard balanced_dirty_ratelimit with task_ratelimit. 2) due to truncates and fs redirties, the (write_bw <=> dirty_rate) match could become unbalanced, which may lead to large systematical errors in balanced_dirty_ratelimit. The truncates, due to its possibly bumpy nature, can hardly be compensated smoothly. So let's face it. When some over-estimated balanced_dirty_ratelimit brings dirty_ratelimit high, dirty pages will go higher than the setpoint. task_ratelimit will in turn become lower than dirty_ratelimit. So if we consider both balanced_dirty_ratelimit and task_ratelimit and update dirty_ratelimit only when they are on the same side of dirty_ratelimit, the systematical errors in balanced_dirty_ratelimit won't be able to bring dirty_ratelimit far away. The balanced_dirty_ratelimit estimation may also be inaccurate near @limit or @freerun, however is less an issue. 3) since we ultimately want to - keep the fluctuations of task ratelimit as small as possible - keep the dirty pages around the setpoint as long time as possible the update policy used for (2) also serves the above goals nicely: if for some reason the dirty pages are high (task_ratelimit < dirty_ratelimit), and dirty_ratelimit is low (dirty_ratelimit < balanced_dirty_ratelimit), there is no point to bring up dirty_ratelimit in a hurry only to hurt both the above two goals. So, we make use of task_ratelimit to limit the update of dirty_ratelimit in two ways: 1) avoid changing dirty rate when it's against the position control target (the adjusted rate will slow down the progress of dirty pages going back to setpoint). 2) limit the step size. task_ratelimit is changing values step by step, leaving a consistent trace comparing to the randomly jumping balanced_dirty_ratelimit. task_ratelimit also has the nice smaller errors in stable state and typically larger errors when there are big errors in rate. So it's a pretty good limiting factor for the step size of dirty_ratelimit. Note that bdi->dirty_ratelimit is always tracking balanced_dirty_ratelimit. task_ratelimit is merely used as a limiting factor. Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c1
-rw-r--r--mm/page-writeback.c71
2 files changed, 71 insertions, 1 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index ba20f94cde93..5dcaa3c756d1 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -686,6 +686,7 @@ int bdi_init(struct backing_dev_info *bdi)
686 bdi->bw_time_stamp = jiffies; 686 bdi->bw_time_stamp = jiffies;
687 bdi->written_stamp = 0; 687 bdi->written_stamp = 0;
688 688
689 bdi->balanced_dirty_ratelimit = INIT_BW;
689 bdi->dirty_ratelimit = INIT_BW; 690 bdi->dirty_ratelimit = INIT_BW;
690 bdi->write_bandwidth = INIT_BW; 691 bdi->write_bandwidth = INIT_BW;
691 bdi->avg_write_bandwidth = INIT_BW; 692 bdi->avg_write_bandwidth = INIT_BW;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 1721b6523c04..d4a6e91bd9e5 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -792,12 +792,17 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
792 unsigned long dirtied, 792 unsigned long dirtied,
793 unsigned long elapsed) 793 unsigned long elapsed)
794{ 794{
795 unsigned long freerun = dirty_freerun_ceiling(thresh, bg_thresh);
796 unsigned long limit = hard_dirty_limit(thresh);
797 unsigned long setpoint = (freerun + limit) / 2;
795 unsigned long write_bw = bdi->avg_write_bandwidth; 798 unsigned long write_bw = bdi->avg_write_bandwidth;
796 unsigned long dirty_ratelimit = bdi->dirty_ratelimit; 799 unsigned long dirty_ratelimit = bdi->dirty_ratelimit;
797 unsigned long dirty_rate; 800 unsigned long dirty_rate;
798 unsigned long task_ratelimit; 801 unsigned long task_ratelimit;
799 unsigned long balanced_dirty_ratelimit; 802 unsigned long balanced_dirty_ratelimit;
800 unsigned long pos_ratio; 803 unsigned long pos_ratio;
804 unsigned long step;
805 unsigned long x;
801 806
802 /* 807 /*
803 * The dirty rate will match the writeout rate in long term, except 808 * The dirty rate will match the writeout rate in long term, except
@@ -847,7 +852,71 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
847 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw, 852 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
848 dirty_rate | 1); 853 dirty_rate | 1);
849 854
850 bdi->dirty_ratelimit = max(balanced_dirty_ratelimit, 1UL); 855 /*
856 * We could safely do this and return immediately:
857 *
858 * bdi->dirty_ratelimit = balanced_dirty_ratelimit;
859 *
860 * However to get a more stable dirty_ratelimit, the below elaborated
861 * code makes use of task_ratelimit to filter out sigular points and
862 * limit the step size.
863 *
864 * The below code essentially only uses the relative value of
865 *
866 * task_ratelimit - dirty_ratelimit
867 * = (pos_ratio - 1) * dirty_ratelimit
868 *
869 * which reflects the direction and size of dirty position error.
870 */
871
872 /*
873 * dirty_ratelimit will follow balanced_dirty_ratelimit iff
874 * task_ratelimit is on the same side of dirty_ratelimit, too.
875 * For example, when
876 * - dirty_ratelimit > balanced_dirty_ratelimit
877 * - dirty_ratelimit > task_ratelimit (dirty pages are above setpoint)
878 * lowering dirty_ratelimit will help meet both the position and rate
879 * control targets. Otherwise, don't update dirty_ratelimit if it will
880 * only help meet the rate target. After all, what the users ultimately
881 * feel and care are stable dirty rate and small position error.
882 *
883 * |task_ratelimit - dirty_ratelimit| is used to limit the step size
884 * and filter out the sigular points of balanced_dirty_ratelimit. Which
885 * keeps jumping around randomly and can even leap far away at times
886 * due to the small 200ms estimation period of dirty_rate (we want to
887 * keep that period small to reduce time lags).
888 */
889 step = 0;
890 if (dirty < setpoint) {
891 x = min(bdi->balanced_dirty_ratelimit,
892 min(balanced_dirty_ratelimit, task_ratelimit));
893 if (dirty_ratelimit < x)
894 step = x - dirty_ratelimit;
895 } else {
896 x = max(bdi->balanced_dirty_ratelimit,
897 max(balanced_dirty_ratelimit, task_ratelimit));
898 if (dirty_ratelimit > x)
899 step = dirty_ratelimit - x;
900 }
901
902 /*
903 * Don't pursue 100% rate matching. It's impossible since the balanced
904 * rate itself is constantly fluctuating. So decrease the track speed
905 * when it gets close to the target. Helps eliminate pointless tremors.
906 */
907 step >>= dirty_ratelimit / (2 * step + 1);
908 /*
909 * Limit the tracking speed to avoid overshooting.
910 */
911 step = (step + 7) / 8;
912
913 if (dirty_ratelimit < balanced_dirty_ratelimit)
914 dirty_ratelimit += step;
915 else
916 dirty_ratelimit -= step;
917
918 bdi->dirty_ratelimit = max(dirty_ratelimit, 1UL);
919 bdi->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
851} 920}
852 921
853void __bdi_update_bandwidth(struct backing_dev_info *bdi, 922void __bdi_update_bandwidth(struct backing_dev_info *bdi,