diff options
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r-- | mm/page-writeback.c | 83 |
1 files changed, 81 insertions, 2 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4b954c9fe846..1721b6523c04 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -777,6 +777,79 @@ static void global_update_bandwidth(unsigned long thresh, | |||
777 | spin_unlock(&dirty_lock); | 777 | spin_unlock(&dirty_lock); |
778 | } | 778 | } |
779 | 779 | ||
780 | /* | ||
781 | * Maintain bdi->dirty_ratelimit, the base dirty throttle rate. | ||
782 | * | ||
783 | * Normal bdi tasks will be curbed at or below it in long term. | ||
784 | * Obviously it should be around (write_bw / N) when there are N dd tasks. | ||
785 | */ | ||
786 | static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi, | ||
787 | unsigned long thresh, | ||
788 | unsigned long bg_thresh, | ||
789 | unsigned long dirty, | ||
790 | unsigned long bdi_thresh, | ||
791 | unsigned long bdi_dirty, | ||
792 | unsigned long dirtied, | ||
793 | unsigned long elapsed) | ||
794 | { | ||
795 | unsigned long write_bw = bdi->avg_write_bandwidth; | ||
796 | unsigned long dirty_ratelimit = bdi->dirty_ratelimit; | ||
797 | unsigned long dirty_rate; | ||
798 | unsigned long task_ratelimit; | ||
799 | unsigned long balanced_dirty_ratelimit; | ||
800 | unsigned long pos_ratio; | ||
801 | |||
802 | /* | ||
803 | * The dirty rate will match the writeout rate in long term, except | ||
804 | * when dirty pages are truncated by userspace or re-dirtied by FS. | ||
805 | */ | ||
806 | dirty_rate = (dirtied - bdi->dirtied_stamp) * HZ / elapsed; | ||
807 | |||
808 | pos_ratio = bdi_position_ratio(bdi, thresh, bg_thresh, dirty, | ||
809 | bdi_thresh, bdi_dirty); | ||
810 | /* | ||
811 | * task_ratelimit reflects each dd's dirty rate for the past 200ms. | ||
812 | */ | ||
813 | task_ratelimit = (u64)dirty_ratelimit * | ||
814 | pos_ratio >> RATELIMIT_CALC_SHIFT; | ||
815 | task_ratelimit++; /* it helps rampup dirty_ratelimit from tiny values */ | ||
816 | |||
817 | /* | ||
818 | * A linear estimation of the "balanced" throttle rate. The theory is, | ||
819 | * if there are N dd tasks, each throttled at task_ratelimit, the bdi's | ||
820 | * dirty_rate will be measured to be (N * task_ratelimit). So the below | ||
821 | * formula will yield the balanced rate limit (write_bw / N). | ||
822 | * | ||
823 | * Note that the expanded form is not a pure rate feedback: | ||
824 | * rate_(i+1) = rate_(i) * (write_bw / dirty_rate) (1) | ||
825 | * but also takes pos_ratio into account: | ||
826 | * rate_(i+1) = rate_(i) * (write_bw / dirty_rate) * pos_ratio (2) | ||
827 | * | ||
828 | * (1) is not realistic because pos_ratio also takes part in balancing | ||
829 | * the dirty rate. Consider the state | ||
830 | * pos_ratio = 0.5 (3) | ||
831 | * rate = 2 * (write_bw / N) (4) | ||
832 | * If (1) is used, it will stuck in that state! Because each dd will | ||
833 | * be throttled at | ||
834 | * task_ratelimit = pos_ratio * rate = (write_bw / N) (5) | ||
835 | * yielding | ||
836 | * dirty_rate = N * task_ratelimit = write_bw (6) | ||
837 | * put (6) into (1) we get | ||
838 | * rate_(i+1) = rate_(i) (7) | ||
839 | * | ||
840 | * So we end up using (2) to always keep | ||
841 | * rate_(i+1) ~= (write_bw / N) (8) | ||
842 | * regardless of the value of pos_ratio. As long as (8) is satisfied, | ||
843 | * pos_ratio is able to drive itself to 1.0, which is not only where | ||
844 | * the dirty count meet the setpoint, but also where the slope of | ||
845 | * pos_ratio is most flat and hence task_ratelimit is least fluctuated. | ||
846 | */ | ||
847 | balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw, | ||
848 | dirty_rate | 1); | ||
849 | |||
850 | bdi->dirty_ratelimit = max(balanced_dirty_ratelimit, 1UL); | ||
851 | } | ||
852 | |||
780 | void __bdi_update_bandwidth(struct backing_dev_info *bdi, | 853 | void __bdi_update_bandwidth(struct backing_dev_info *bdi, |
781 | unsigned long thresh, | 854 | unsigned long thresh, |
782 | unsigned long bg_thresh, | 855 | unsigned long bg_thresh, |
@@ -787,6 +860,7 @@ void __bdi_update_bandwidth(struct backing_dev_info *bdi, | |||
787 | { | 860 | { |
788 | unsigned long now = jiffies; | 861 | unsigned long now = jiffies; |
789 | unsigned long elapsed = now - bdi->bw_time_stamp; | 862 | unsigned long elapsed = now - bdi->bw_time_stamp; |
863 | unsigned long dirtied; | ||
790 | unsigned long written; | 864 | unsigned long written; |
791 | 865 | ||
792 | /* | 866 | /* |
@@ -795,6 +869,7 @@ void __bdi_update_bandwidth(struct backing_dev_info *bdi, | |||
795 | if (elapsed < BANDWIDTH_INTERVAL) | 869 | if (elapsed < BANDWIDTH_INTERVAL) |
796 | return; | 870 | return; |
797 | 871 | ||
872 | dirtied = percpu_counter_read(&bdi->bdi_stat[BDI_DIRTIED]); | ||
798 | written = percpu_counter_read(&bdi->bdi_stat[BDI_WRITTEN]); | 873 | written = percpu_counter_read(&bdi->bdi_stat[BDI_WRITTEN]); |
799 | 874 | ||
800 | /* | 875 | /* |
@@ -804,12 +879,16 @@ void __bdi_update_bandwidth(struct backing_dev_info *bdi, | |||
804 | if (elapsed > HZ && time_before(bdi->bw_time_stamp, start_time)) | 879 | if (elapsed > HZ && time_before(bdi->bw_time_stamp, start_time)) |
805 | goto snapshot; | 880 | goto snapshot; |
806 | 881 | ||
807 | if (thresh) | 882 | if (thresh) { |
808 | global_update_bandwidth(thresh, dirty, now); | 883 | global_update_bandwidth(thresh, dirty, now); |
809 | 884 | bdi_update_dirty_ratelimit(bdi, thresh, bg_thresh, dirty, | |
885 | bdi_thresh, bdi_dirty, | ||
886 | dirtied, elapsed); | ||
887 | } | ||
810 | bdi_update_write_bandwidth(bdi, elapsed, written); | 888 | bdi_update_write_bandwidth(bdi, elapsed, written); |
811 | 889 | ||
812 | snapshot: | 890 | snapshot: |
891 | bdi->dirtied_stamp = dirtied; | ||
813 | bdi->written_stamp = written; | 892 | bdi->written_stamp = written; |
814 | bdi->bw_time_stamp = now; | 893 | bdi->bw_time_stamp = now; |
815 | } | 894 | } |