aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/backing-dev.h3
-rw-r--r--mm/backing-dev.c1
-rw-r--r--mm/page-writeback.c71
3 files changed, 74 insertions, 1 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index dff0ff78e878..c3b92010d894 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -83,8 +83,11 @@ struct backing_dev_info {
83 /* 83 /*
84 * The base dirty throttle rate, re-calculated on every 200ms. 84 * The base dirty throttle rate, re-calculated on every 200ms.
85 * All the bdi tasks' dirty rate will be curbed under it. 85 * All the bdi tasks' dirty rate will be curbed under it.
86 * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit
87 * in small steps and is much more smooth/stable than the latter.
86 */ 88 */
87 unsigned long dirty_ratelimit; 89 unsigned long dirty_ratelimit;
90 unsigned long balanced_dirty_ratelimit;
88 91
89 struct prop_local_percpu completions; 92 struct prop_local_percpu completions;
90 int dirty_exceeded; 93 int dirty_exceeded;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index ba20f94cde93..5dcaa3c756d1 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -686,6 +686,7 @@ int bdi_init(struct backing_dev_info *bdi)
686 bdi->bw_time_stamp = jiffies; 686 bdi->bw_time_stamp = jiffies;
687 bdi->written_stamp = 0; 687 bdi->written_stamp = 0;
688 688
689 bdi->balanced_dirty_ratelimit = INIT_BW;
689 bdi->dirty_ratelimit = INIT_BW; 690 bdi->dirty_ratelimit = INIT_BW;
690 bdi->write_bandwidth = INIT_BW; 691 bdi->write_bandwidth = INIT_BW;
691 bdi->avg_write_bandwidth = INIT_BW; 692 bdi->avg_write_bandwidth = INIT_BW;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 1721b6523c04..d4a6e91bd9e5 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -792,12 +792,17 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
792 unsigned long dirtied, 792 unsigned long dirtied,
793 unsigned long elapsed) 793 unsigned long elapsed)
794{ 794{
795 unsigned long freerun = dirty_freerun_ceiling(thresh, bg_thresh);
796 unsigned long limit = hard_dirty_limit(thresh);
797 unsigned long setpoint = (freerun + limit) / 2;
795 unsigned long write_bw = bdi->avg_write_bandwidth; 798 unsigned long write_bw = bdi->avg_write_bandwidth;
796 unsigned long dirty_ratelimit = bdi->dirty_ratelimit; 799 unsigned long dirty_ratelimit = bdi->dirty_ratelimit;
797 unsigned long dirty_rate; 800 unsigned long dirty_rate;
798 unsigned long task_ratelimit; 801 unsigned long task_ratelimit;
799 unsigned long balanced_dirty_ratelimit; 802 unsigned long balanced_dirty_ratelimit;
800 unsigned long pos_ratio; 803 unsigned long pos_ratio;
804 unsigned long step;
805 unsigned long x;
801 806
802 /* 807 /*
803 * The dirty rate will match the writeout rate in long term, except 808 * The dirty rate will match the writeout rate in long term, except
@@ -847,7 +852,71 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
847 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw, 852 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
848 dirty_rate | 1); 853 dirty_rate | 1);
849 854
850 bdi->dirty_ratelimit = max(balanced_dirty_ratelimit, 1UL); 855 /*
856 * We could safely do this and return immediately:
857 *
858 * bdi->dirty_ratelimit = balanced_dirty_ratelimit;
859 *
860 * However to get a more stable dirty_ratelimit, the below elaborated
861 * code makes use of task_ratelimit to filter out sigular points and
862 * limit the step size.
863 *
864 * The below code essentially only uses the relative value of
865 *
866 * task_ratelimit - dirty_ratelimit
867 * = (pos_ratio - 1) * dirty_ratelimit
868 *
869 * which reflects the direction and size of dirty position error.
870 */
871
872 /*
873 * dirty_ratelimit will follow balanced_dirty_ratelimit iff
874 * task_ratelimit is on the same side of dirty_ratelimit, too.
875 * For example, when
876 * - dirty_ratelimit > balanced_dirty_ratelimit
877 * - dirty_ratelimit > task_ratelimit (dirty pages are above setpoint)
878 * lowering dirty_ratelimit will help meet both the position and rate
879 * control targets. Otherwise, don't update dirty_ratelimit if it will
880 * only help meet the rate target. After all, what the users ultimately
881 * feel and care are stable dirty rate and small position error.
882 *
883 * |task_ratelimit - dirty_ratelimit| is used to limit the step size
884 * and filter out the sigular points of balanced_dirty_ratelimit. Which
885 * keeps jumping around randomly and can even leap far away at times
886 * due to the small 200ms estimation period of dirty_rate (we want to
887 * keep that period small to reduce time lags).
888 */
889 step = 0;
890 if (dirty < setpoint) {
891 x = min(bdi->balanced_dirty_ratelimit,
892 min(balanced_dirty_ratelimit, task_ratelimit));
893 if (dirty_ratelimit < x)
894 step = x - dirty_ratelimit;
895 } else {
896 x = max(bdi->balanced_dirty_ratelimit,
897 max(balanced_dirty_ratelimit, task_ratelimit));
898 if (dirty_ratelimit > x)
899 step = dirty_ratelimit - x;
900 }
901
902 /*
903 * Don't pursue 100% rate matching. It's impossible since the balanced
904 * rate itself is constantly fluctuating. So decrease the track speed
905 * when it gets close to the target. Helps eliminate pointless tremors.
906 */
907 step >>= dirty_ratelimit / (2 * step + 1);
908 /*
909 * Limit the tracking speed to avoid overshooting.
910 */
911 step = (step + 7) / 8;
912
913 if (dirty_ratelimit < balanced_dirty_ratelimit)
914 dirty_ratelimit += step;
915 else
916 dirty_ratelimit -= step;
917
918 bdi->dirty_ratelimit = max(dirty_ratelimit, 1UL);
919 bdi->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
851} 920}
852 921
853void __bdi_update_bandwidth(struct backing_dev_info *bdi, 922void __bdi_update_bandwidth(struct backing_dev_info *bdi,