aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r--mm/page-writeback.c83
1 files changed, 81 insertions, 2 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 4b954c9fe846..1721b6523c04 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -777,6 +777,79 @@ static void global_update_bandwidth(unsigned long thresh,
777 spin_unlock(&dirty_lock); 777 spin_unlock(&dirty_lock);
778} 778}
779 779
780/*
781 * Maintain bdi->dirty_ratelimit, the base dirty throttle rate.
782 *
783 * Normal bdi tasks will be curbed at or below it in long term.
784 * Obviously it should be around (write_bw / N) when there are N dd tasks.
785 */
786static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
787 unsigned long thresh,
788 unsigned long bg_thresh,
789 unsigned long dirty,
790 unsigned long bdi_thresh,
791 unsigned long bdi_dirty,
792 unsigned long dirtied,
793 unsigned long elapsed)
794{
795 unsigned long write_bw = bdi->avg_write_bandwidth;
796 unsigned long dirty_ratelimit = bdi->dirty_ratelimit;
797 unsigned long dirty_rate;
798 unsigned long task_ratelimit;
799 unsigned long balanced_dirty_ratelimit;
800 unsigned long pos_ratio;
801
802 /*
803 * The dirty rate will match the writeout rate in long term, except
804 * when dirty pages are truncated by userspace or re-dirtied by FS.
805 */
806 dirty_rate = (dirtied - bdi->dirtied_stamp) * HZ / elapsed;
807
808 pos_ratio = bdi_position_ratio(bdi, thresh, bg_thresh, dirty,
809 bdi_thresh, bdi_dirty);
810 /*
811 * task_ratelimit reflects each dd's dirty rate for the past 200ms.
812 */
813 task_ratelimit = (u64)dirty_ratelimit *
814 pos_ratio >> RATELIMIT_CALC_SHIFT;
815 task_ratelimit++; /* it helps rampup dirty_ratelimit from tiny values */
816
817 /*
818 * A linear estimation of the "balanced" throttle rate. The theory is,
819 * if there are N dd tasks, each throttled at task_ratelimit, the bdi's
820 * dirty_rate will be measured to be (N * task_ratelimit). So the below
821 * formula will yield the balanced rate limit (write_bw / N).
822 *
823 * Note that the expanded form is not a pure rate feedback:
824 * rate_(i+1) = rate_(i) * (write_bw / dirty_rate) (1)
825 * but also takes pos_ratio into account:
826 * rate_(i+1) = rate_(i) * (write_bw / dirty_rate) * pos_ratio (2)
827 *
828 * (1) is not realistic because pos_ratio also takes part in balancing
829 * the dirty rate. Consider the state
830 * pos_ratio = 0.5 (3)
831 * rate = 2 * (write_bw / N) (4)
832 * If (1) is used, it will stuck in that state! Because each dd will
833 * be throttled at
834 * task_ratelimit = pos_ratio * rate = (write_bw / N) (5)
835 * yielding
836 * dirty_rate = N * task_ratelimit = write_bw (6)
837 * put (6) into (1) we get
838 * rate_(i+1) = rate_(i) (7)
839 *
840 * So we end up using (2) to always keep
841 * rate_(i+1) ~= (write_bw / N) (8)
842 * regardless of the value of pos_ratio. As long as (8) is satisfied,
843 * pos_ratio is able to drive itself to 1.0, which is not only where
844 * the dirty count meet the setpoint, but also where the slope of
845 * pos_ratio is most flat and hence task_ratelimit is least fluctuated.
846 */
847 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
848 dirty_rate | 1);
849
850 bdi->dirty_ratelimit = max(balanced_dirty_ratelimit, 1UL);
851}
852
780void __bdi_update_bandwidth(struct backing_dev_info *bdi, 853void __bdi_update_bandwidth(struct backing_dev_info *bdi,
781 unsigned long thresh, 854 unsigned long thresh,
782 unsigned long bg_thresh, 855 unsigned long bg_thresh,
@@ -787,6 +860,7 @@ void __bdi_update_bandwidth(struct backing_dev_info *bdi,
787{ 860{
788 unsigned long now = jiffies; 861 unsigned long now = jiffies;
789 unsigned long elapsed = now - bdi->bw_time_stamp; 862 unsigned long elapsed = now - bdi->bw_time_stamp;
863 unsigned long dirtied;
790 unsigned long written; 864 unsigned long written;
791 865
792 /* 866 /*
@@ -795,6 +869,7 @@ void __bdi_update_bandwidth(struct backing_dev_info *bdi,
795 if (elapsed < BANDWIDTH_INTERVAL) 869 if (elapsed < BANDWIDTH_INTERVAL)
796 return; 870 return;
797 871
872 dirtied = percpu_counter_read(&bdi->bdi_stat[BDI_DIRTIED]);
798 written = percpu_counter_read(&bdi->bdi_stat[BDI_WRITTEN]); 873 written = percpu_counter_read(&bdi->bdi_stat[BDI_WRITTEN]);
799 874
800 /* 875 /*
@@ -804,12 +879,16 @@ void __bdi_update_bandwidth(struct backing_dev_info *bdi,
804 if (elapsed > HZ && time_before(bdi->bw_time_stamp, start_time)) 879 if (elapsed > HZ && time_before(bdi->bw_time_stamp, start_time))
805 goto snapshot; 880 goto snapshot;
806 881
807 if (thresh) 882 if (thresh) {
808 global_update_bandwidth(thresh, dirty, now); 883 global_update_bandwidth(thresh, dirty, now);
809 884 bdi_update_dirty_ratelimit(bdi, thresh, bg_thresh, dirty,
885 bdi_thresh, bdi_dirty,
886 dirtied, elapsed);
887 }
810 bdi_update_write_bandwidth(bdi, elapsed, written); 888 bdi_update_write_bandwidth(bdi, elapsed, written);
811 889
812snapshot: 890snapshot:
891 bdi->dirtied_stamp = dirtied;
813 bdi->written_stamp = written; 892 bdi->written_stamp = written;
814 bdi->bw_time_stamp = now; 893 bdi->bw_time_stamp = now;
815} 894}