summaryrefslogtreecommitdiffstats
path: root/mm/page-writeback.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2015-05-22 17:13:28 -0400
committerJens Axboe <axboe@fb.com>2015-06-02 10:33:34 -0400
commita88a341a73be4ef035ca26170c849f002797da27 (patch)
tree3b9ba08daa3fdfb00d02ba21dfbce702a03826bc /mm/page-writeback.c
parent93f78d882865cb90020d0f80a9523c99cf46924c (diff)
writeback: move bandwidth related fields from backing_dev_info into bdi_writeback
Currently, a bdi (backing_dev_info) embeds single wb (bdi_writeback) and the role of the separation is unclear. For cgroup support for writeback IOs, a bdi will be updated to host multiple wb's where each wb serves writeback IOs of a different cgroup on the bdi. To achieve that, a wb should carry all states necessary for servicing writeback IOs for a cgroup independently. This patch moves bandwidth related fields from backing_dev_info into bdi_writeback. * The moved fields are: bw_time_stamp, dirtied_stamp, written_stamp, write_bandwidth, avg_write_bandwidth, dirty_ratelimit, balanced_dirty_ratelimit, completions and dirty_exceeded. * writeback_chunk_size() and over_bground_thresh() now take @wb instead of @bdi. * bdi_writeout_fraction(bdi, ...) -> wb_writeout_fraction(wb, ...) bdi_dirty_limit(bdi, ...) -> wb_dirty_limit(wb, ...) bdi_position_ration(bdi, ...) -> wb_position_ratio(wb, ...) bdi_update_writebandwidth(bdi, ...) -> wb_update_write_bandwidth(wb, ...) [__]bdi_update_bandwidth(bdi, ...) -> [__]wb_update_bandwidth(wb, ...) bdi_{max|min}_pause(bdi, ...) -> wb_{max|min}_pause(wb, ...) bdi_dirty_limits(bdi, ...) -> wb_dirty_limits(wb, ...) * Init/exits of the relocated fields are moved to bdi_wb_init/exit() respectively. Note that explicit zeroing is dropped in the process as wb's are cleared in entirety anyway. * As there's still only one bdi_writeback per backing_dev_info, all uses of bdi->stat[] are mechanically replaced with bdi->wb.stat[] introducing no behavior changes. v2: Typo in description fixed as suggested by Jan. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Jan Kara <jack@suse.cz> Cc: Jens Axboe <axboe@kernel.dk> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Jaegeuk Kim <jaegeuk@kernel.org> Cc: Steven Whitehouse <swhiteho@redhat.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r--mm/page-writeback.c262
1 files changed, 132 insertions, 130 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index dc673a035413..cd39ee91b7bb 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -399,7 +399,7 @@ static unsigned long wp_next_time(unsigned long cur_time)
399static inline void __wb_writeout_inc(struct bdi_writeback *wb) 399static inline void __wb_writeout_inc(struct bdi_writeback *wb)
400{ 400{
401 __inc_wb_stat(wb, WB_WRITTEN); 401 __inc_wb_stat(wb, WB_WRITTEN);
402 __fprop_inc_percpu_max(&writeout_completions, &wb->bdi->completions, 402 __fprop_inc_percpu_max(&writeout_completions, &wb->completions,
403 wb->bdi->max_prop_frac); 403 wb->bdi->max_prop_frac);
404 /* First event after period switching was turned off? */ 404 /* First event after period switching was turned off? */
405 if (!unlikely(writeout_period_time)) { 405 if (!unlikely(writeout_period_time)) {
@@ -427,10 +427,10 @@ EXPORT_SYMBOL_GPL(wb_writeout_inc);
427/* 427/*
428 * Obtain an accurate fraction of the BDI's portion. 428 * Obtain an accurate fraction of the BDI's portion.
429 */ 429 */
430static void bdi_writeout_fraction(struct backing_dev_info *bdi, 430static void wb_writeout_fraction(struct bdi_writeback *wb,
431 long *numerator, long *denominator) 431 long *numerator, long *denominator)
432{ 432{
433 fprop_fraction_percpu(&writeout_completions, &bdi->completions, 433 fprop_fraction_percpu(&writeout_completions, &wb->completions,
434 numerator, denominator); 434 numerator, denominator);
435} 435}
436 436
@@ -516,11 +516,11 @@ static unsigned long hard_dirty_limit(unsigned long thresh)
516} 516}
517 517
518/** 518/**
519 * bdi_dirty_limit - @bdi's share of dirty throttling threshold 519 * wb_dirty_limit - @wb's share of dirty throttling threshold
520 * @bdi: the backing_dev_info to query 520 * @wb: bdi_writeback to query
521 * @dirty: global dirty limit in pages 521 * @dirty: global dirty limit in pages
522 * 522 *
523 * Returns @bdi's dirty limit in pages. The term "dirty" in the context of 523 * Returns @wb's dirty limit in pages. The term "dirty" in the context of
524 * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages. 524 * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages.
525 * 525 *
526 * Note that balance_dirty_pages() will only seriously take it as a hard limit 526 * Note that balance_dirty_pages() will only seriously take it as a hard limit
@@ -528,34 +528,35 @@ static unsigned long hard_dirty_limit(unsigned long thresh)
528 * control. For example, when the device is completely stalled due to some error 528 * control. For example, when the device is completely stalled due to some error
529 * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key. 529 * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key.
530 * In the other normal situations, it acts more gently by throttling the tasks 530 * In the other normal situations, it acts more gently by throttling the tasks
531 * more (rather than completely block them) when the bdi dirty pages go high. 531 * more (rather than completely block them) when the wb dirty pages go high.
532 * 532 *
533 * It allocates high/low dirty limits to fast/slow devices, in order to prevent 533 * It allocates high/low dirty limits to fast/slow devices, in order to prevent
534 * - starving fast devices 534 * - starving fast devices
535 * - piling up dirty pages (that will take long time to sync) on slow devices 535 * - piling up dirty pages (that will take long time to sync) on slow devices
536 * 536 *
537 * The bdi's share of dirty limit will be adapting to its throughput and 537 * The wb's share of dirty limit will be adapting to its throughput and
538 * bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set. 538 * bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set.
539 */ 539 */
540unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty) 540unsigned long wb_dirty_limit(struct bdi_writeback *wb, unsigned long dirty)
541{ 541{
542 u64 bdi_dirty; 542 struct backing_dev_info *bdi = wb->bdi;
543 u64 wb_dirty;
543 long numerator, denominator; 544 long numerator, denominator;
544 545
545 /* 546 /*
546 * Calculate this BDI's share of the dirty ratio. 547 * Calculate this BDI's share of the dirty ratio.
547 */ 548 */
548 bdi_writeout_fraction(bdi, &numerator, &denominator); 549 wb_writeout_fraction(wb, &numerator, &denominator);
549 550
550 bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100; 551 wb_dirty = (dirty * (100 - bdi_min_ratio)) / 100;
551 bdi_dirty *= numerator; 552 wb_dirty *= numerator;
552 do_div(bdi_dirty, denominator); 553 do_div(wb_dirty, denominator);
553 554
554 bdi_dirty += (dirty * bdi->min_ratio) / 100; 555 wb_dirty += (dirty * bdi->min_ratio) / 100;
555 if (bdi_dirty > (dirty * bdi->max_ratio) / 100) 556 if (wb_dirty > (dirty * bdi->max_ratio) / 100)
556 bdi_dirty = dirty * bdi->max_ratio / 100; 557 wb_dirty = dirty * bdi->max_ratio / 100;
557 558
558 return bdi_dirty; 559 return wb_dirty;
559} 560}
560 561
561/* 562/*
@@ -664,14 +665,14 @@ static long long pos_ratio_polynom(unsigned long setpoint,
664 * card's bdi_dirty may rush to many times higher than bdi_setpoint. 665 * card's bdi_dirty may rush to many times higher than bdi_setpoint.
665 * - the bdi dirty thresh drops quickly due to change of JBOD workload 666 * - the bdi dirty thresh drops quickly due to change of JBOD workload
666 */ 667 */
667static unsigned long bdi_position_ratio(struct backing_dev_info *bdi, 668static unsigned long wb_position_ratio(struct bdi_writeback *wb,
668 unsigned long thresh, 669 unsigned long thresh,
669 unsigned long bg_thresh, 670 unsigned long bg_thresh,
670 unsigned long dirty, 671 unsigned long dirty,
671 unsigned long bdi_thresh, 672 unsigned long bdi_thresh,
672 unsigned long bdi_dirty) 673 unsigned long bdi_dirty)
673{ 674{
674 unsigned long write_bw = bdi->avg_write_bandwidth; 675 unsigned long write_bw = wb->avg_write_bandwidth;
675 unsigned long freerun = dirty_freerun_ceiling(thresh, bg_thresh); 676 unsigned long freerun = dirty_freerun_ceiling(thresh, bg_thresh);
676 unsigned long limit = hard_dirty_limit(thresh); 677 unsigned long limit = hard_dirty_limit(thresh);
677 unsigned long x_intercept; 678 unsigned long x_intercept;
@@ -702,12 +703,12 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
702 * consume arbitrary amount of RAM because it is accounted in 703 * consume arbitrary amount of RAM because it is accounted in
703 * NR_WRITEBACK_TEMP which is not involved in calculating "nr_dirty". 704 * NR_WRITEBACK_TEMP which is not involved in calculating "nr_dirty".
704 * 705 *
705 * Here, in bdi_position_ratio(), we calculate pos_ratio based on 706 * Here, in wb_position_ratio(), we calculate pos_ratio based on
706 * two values: bdi_dirty and bdi_thresh. Let's consider an example: 707 * two values: bdi_dirty and bdi_thresh. Let's consider an example:
707 * total amount of RAM is 16GB, bdi->max_ratio is equal to 1%, global 708 * total amount of RAM is 16GB, bdi->max_ratio is equal to 1%, global
708 * limits are set by default to 10% and 20% (background and throttle). 709 * limits are set by default to 10% and 20% (background and throttle).
709 * Then bdi_thresh is 1% of 20% of 16GB. This amounts to ~8K pages. 710 * Then bdi_thresh is 1% of 20% of 16GB. This amounts to ~8K pages.
710 * bdi_dirty_limit(bdi, bg_thresh) is about ~4K pages. bdi_setpoint is 711 * wb_dirty_limit(wb, bg_thresh) is about ~4K pages. bdi_setpoint is
711 * about ~6K pages (as the average of background and throttle bdi 712 * about ~6K pages (as the average of background and throttle bdi
712 * limits). The 3rd order polynomial will provide positive feedback if 713 * limits). The 3rd order polynomial will provide positive feedback if
713 * bdi_dirty is under bdi_setpoint and vice versa. 714 * bdi_dirty is under bdi_setpoint and vice versa.
@@ -717,7 +718,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
717 * much earlier than global "freerun" is reached (~23MB vs. ~2.3GB 718 * much earlier than global "freerun" is reached (~23MB vs. ~2.3GB
718 * in the example above). 719 * in the example above).
719 */ 720 */
720 if (unlikely(bdi->capabilities & BDI_CAP_STRICTLIMIT)) { 721 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
721 long long bdi_pos_ratio; 722 long long bdi_pos_ratio;
722 unsigned long bdi_bg_thresh; 723 unsigned long bdi_bg_thresh;
723 724
@@ -842,13 +843,13 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
842 return pos_ratio; 843 return pos_ratio;
843} 844}
844 845
845static void bdi_update_write_bandwidth(struct backing_dev_info *bdi, 846static void wb_update_write_bandwidth(struct bdi_writeback *wb,
846 unsigned long elapsed, 847 unsigned long elapsed,
847 unsigned long written) 848 unsigned long written)
848{ 849{
849 const unsigned long period = roundup_pow_of_two(3 * HZ); 850 const unsigned long period = roundup_pow_of_two(3 * HZ);
850 unsigned long avg = bdi->avg_write_bandwidth; 851 unsigned long avg = wb->avg_write_bandwidth;
851 unsigned long old = bdi->write_bandwidth; 852 unsigned long old = wb->write_bandwidth;
852 u64 bw; 853 u64 bw;
853 854
854 /* 855 /*
@@ -861,14 +862,14 @@ static void bdi_update_write_bandwidth(struct backing_dev_info *bdi,
861 * @written may have decreased due to account_page_redirty(). 862 * @written may have decreased due to account_page_redirty().
862 * Avoid underflowing @bw calculation. 863 * Avoid underflowing @bw calculation.
863 */ 864 */
864 bw = written - min(written, bdi->written_stamp); 865 bw = written - min(written, wb->written_stamp);
865 bw *= HZ; 866 bw *= HZ;
866 if (unlikely(elapsed > period)) { 867 if (unlikely(elapsed > period)) {
867 do_div(bw, elapsed); 868 do_div(bw, elapsed);
868 avg = bw; 869 avg = bw;
869 goto out; 870 goto out;
870 } 871 }
871 bw += (u64)bdi->write_bandwidth * (period - elapsed); 872 bw += (u64)wb->write_bandwidth * (period - elapsed);
872 bw >>= ilog2(period); 873 bw >>= ilog2(period);
873 874
874 /* 875 /*
@@ -881,8 +882,8 @@ static void bdi_update_write_bandwidth(struct backing_dev_info *bdi,
881 avg += (old - avg) >> 3; 882 avg += (old - avg) >> 3;
882 883
883out: 884out:
884 bdi->write_bandwidth = bw; 885 wb->write_bandwidth = bw;
885 bdi->avg_write_bandwidth = avg; 886 wb->avg_write_bandwidth = avg;
886} 887}
887 888
888/* 889/*
@@ -947,20 +948,20 @@ static void global_update_bandwidth(unsigned long thresh,
947 * Normal bdi tasks will be curbed at or below it in long term. 948 * Normal bdi tasks will be curbed at or below it in long term.
948 * Obviously it should be around (write_bw / N) when there are N dd tasks. 949 * Obviously it should be around (write_bw / N) when there are N dd tasks.
949 */ 950 */
950static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi, 951static void wb_update_dirty_ratelimit(struct bdi_writeback *wb,
951 unsigned long thresh, 952 unsigned long thresh,
952 unsigned long bg_thresh, 953 unsigned long bg_thresh,
953 unsigned long dirty, 954 unsigned long dirty,
954 unsigned long bdi_thresh, 955 unsigned long bdi_thresh,
955 unsigned long bdi_dirty, 956 unsigned long bdi_dirty,
956 unsigned long dirtied, 957 unsigned long dirtied,
957 unsigned long elapsed) 958 unsigned long elapsed)
958{ 959{
959 unsigned long freerun = dirty_freerun_ceiling(thresh, bg_thresh); 960 unsigned long freerun = dirty_freerun_ceiling(thresh, bg_thresh);
960 unsigned long limit = hard_dirty_limit(thresh); 961 unsigned long limit = hard_dirty_limit(thresh);
961 unsigned long setpoint = (freerun + limit) / 2; 962 unsigned long setpoint = (freerun + limit) / 2;
962 unsigned long write_bw = bdi->avg_write_bandwidth; 963 unsigned long write_bw = wb->avg_write_bandwidth;
963 unsigned long dirty_ratelimit = bdi->dirty_ratelimit; 964 unsigned long dirty_ratelimit = wb->dirty_ratelimit;
964 unsigned long dirty_rate; 965 unsigned long dirty_rate;
965 unsigned long task_ratelimit; 966 unsigned long task_ratelimit;
966 unsigned long balanced_dirty_ratelimit; 967 unsigned long balanced_dirty_ratelimit;
@@ -972,10 +973,10 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
972 * The dirty rate will match the writeout rate in long term, except 973 * The dirty rate will match the writeout rate in long term, except
973 * when dirty pages are truncated by userspace or re-dirtied by FS. 974 * when dirty pages are truncated by userspace or re-dirtied by FS.
974 */ 975 */
975 dirty_rate = (dirtied - bdi->dirtied_stamp) * HZ / elapsed; 976 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
976 977
977 pos_ratio = bdi_position_ratio(bdi, thresh, bg_thresh, dirty, 978 pos_ratio = wb_position_ratio(wb, thresh, bg_thresh, dirty,
978 bdi_thresh, bdi_dirty); 979 bdi_thresh, bdi_dirty);
979 /* 980 /*
980 * task_ratelimit reflects each dd's dirty rate for the past 200ms. 981 * task_ratelimit reflects each dd's dirty rate for the past 200ms.
981 */ 982 */
@@ -1059,31 +1060,31 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
1059 1060
1060 /* 1061 /*
1061 * For strictlimit case, calculations above were based on bdi counters 1062 * For strictlimit case, calculations above were based on bdi counters
1062 * and limits (starting from pos_ratio = bdi_position_ratio() and up to 1063 * and limits (starting from pos_ratio = wb_position_ratio() and up to
1063 * balanced_dirty_ratelimit = task_ratelimit * write_bw / dirty_rate). 1064 * balanced_dirty_ratelimit = task_ratelimit * write_bw / dirty_rate).
1064 * Hence, to calculate "step" properly, we have to use bdi_dirty as 1065 * Hence, to calculate "step" properly, we have to use bdi_dirty as
1065 * "dirty" and bdi_setpoint as "setpoint". 1066 * "dirty" and bdi_setpoint as "setpoint".
1066 * 1067 *
1067 * We rampup dirty_ratelimit forcibly if bdi_dirty is low because 1068 * We rampup dirty_ratelimit forcibly if bdi_dirty is low because
1068 * it's possible that bdi_thresh is close to zero due to inactivity 1069 * it's possible that bdi_thresh is close to zero due to inactivity
1069 * of backing device (see the implementation of bdi_dirty_limit()). 1070 * of backing device (see the implementation of wb_dirty_limit()).
1070 */ 1071 */
1071 if (unlikely(bdi->capabilities & BDI_CAP_STRICTLIMIT)) { 1072 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1072 dirty = bdi_dirty; 1073 dirty = bdi_dirty;
1073 if (bdi_dirty < 8) 1074 if (bdi_dirty < 8)
1074 setpoint = bdi_dirty + 1; 1075 setpoint = bdi_dirty + 1;
1075 else 1076 else
1076 setpoint = (bdi_thresh + 1077 setpoint = (bdi_thresh +
1077 bdi_dirty_limit(bdi, bg_thresh)) / 2; 1078 wb_dirty_limit(wb, bg_thresh)) / 2;
1078 } 1079 }
1079 1080
1080 if (dirty < setpoint) { 1081 if (dirty < setpoint) {
1081 x = min3(bdi->balanced_dirty_ratelimit, 1082 x = min3(wb->balanced_dirty_ratelimit,
1082 balanced_dirty_ratelimit, task_ratelimit); 1083 balanced_dirty_ratelimit, task_ratelimit);
1083 if (dirty_ratelimit < x) 1084 if (dirty_ratelimit < x)
1084 step = x - dirty_ratelimit; 1085 step = x - dirty_ratelimit;
1085 } else { 1086 } else {
1086 x = max3(bdi->balanced_dirty_ratelimit, 1087 x = max3(wb->balanced_dirty_ratelimit,
1087 balanced_dirty_ratelimit, task_ratelimit); 1088 balanced_dirty_ratelimit, task_ratelimit);
1088 if (dirty_ratelimit > x) 1089 if (dirty_ratelimit > x)
1089 step = dirty_ratelimit - x; 1090 step = dirty_ratelimit - x;
@@ -1105,22 +1106,22 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
1105 else 1106 else
1106 dirty_ratelimit -= step; 1107 dirty_ratelimit -= step;
1107 1108
1108 bdi->dirty_ratelimit = max(dirty_ratelimit, 1UL); 1109 wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
1109 bdi->balanced_dirty_ratelimit = balanced_dirty_ratelimit; 1110 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1110 1111
1111 trace_bdi_dirty_ratelimit(bdi, dirty_rate, task_ratelimit); 1112 trace_bdi_dirty_ratelimit(wb->bdi, dirty_rate, task_ratelimit);
1112} 1113}
1113 1114
1114void __bdi_update_bandwidth(struct backing_dev_info *bdi, 1115void __wb_update_bandwidth(struct bdi_writeback *wb,
1115 unsigned long thresh, 1116 unsigned long thresh,
1116 unsigned long bg_thresh, 1117 unsigned long bg_thresh,
1117 unsigned long dirty, 1118 unsigned long dirty,
1118 unsigned long bdi_thresh, 1119 unsigned long bdi_thresh,
1119 unsigned long bdi_dirty, 1120 unsigned long bdi_dirty,
1120 unsigned long start_time) 1121 unsigned long start_time)
1121{ 1122{
1122 unsigned long now = jiffies; 1123 unsigned long now = jiffies;
1123 unsigned long elapsed = now - bdi->bw_time_stamp; 1124 unsigned long elapsed = now - wb->bw_time_stamp;
1124 unsigned long dirtied; 1125 unsigned long dirtied;
1125 unsigned long written; 1126 unsigned long written;
1126 1127
@@ -1130,44 +1131,44 @@ void __bdi_update_bandwidth(struct backing_dev_info *bdi,
1130 if (elapsed < BANDWIDTH_INTERVAL) 1131 if (elapsed < BANDWIDTH_INTERVAL)
1131 return; 1132 return;
1132 1133
1133 dirtied = percpu_counter_read(&bdi->wb.stat[WB_DIRTIED]); 1134 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
1134 written = percpu_counter_read(&bdi->wb.stat[WB_WRITTEN]); 1135 written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
1135 1136
1136 /* 1137 /*
1137 * Skip quiet periods when disk bandwidth is under-utilized. 1138 * Skip quiet periods when disk bandwidth is under-utilized.
1138 * (at least 1s idle time between two flusher runs) 1139 * (at least 1s idle time between two flusher runs)
1139 */ 1140 */
1140 if (elapsed > HZ && time_before(bdi->bw_time_stamp, start_time)) 1141 if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time))
1141 goto snapshot; 1142 goto snapshot;
1142 1143
1143 if (thresh) { 1144 if (thresh) {
1144 global_update_bandwidth(thresh, dirty, now); 1145 global_update_bandwidth(thresh, dirty, now);
1145 bdi_update_dirty_ratelimit(bdi, thresh, bg_thresh, dirty, 1146 wb_update_dirty_ratelimit(wb, thresh, bg_thresh, dirty,
1146 bdi_thresh, bdi_dirty, 1147 bdi_thresh, bdi_dirty,
1147 dirtied, elapsed); 1148 dirtied, elapsed);
1148 } 1149 }
1149 bdi_update_write_bandwidth(bdi, elapsed, written); 1150 wb_update_write_bandwidth(wb, elapsed, written);
1150 1151
1151snapshot: 1152snapshot:
1152 bdi->dirtied_stamp = dirtied; 1153 wb->dirtied_stamp = dirtied;
1153 bdi->written_stamp = written; 1154 wb->written_stamp = written;
1154 bdi->bw_time_stamp = now; 1155 wb->bw_time_stamp = now;
1155} 1156}
1156 1157
1157static void bdi_update_bandwidth(struct backing_dev_info *bdi, 1158static void wb_update_bandwidth(struct bdi_writeback *wb,
1158 unsigned long thresh, 1159 unsigned long thresh,
1159 unsigned long bg_thresh, 1160 unsigned long bg_thresh,
1160 unsigned long dirty, 1161 unsigned long dirty,
1161 unsigned long bdi_thresh, 1162 unsigned long bdi_thresh,
1162 unsigned long bdi_dirty, 1163 unsigned long bdi_dirty,
1163 unsigned long start_time) 1164 unsigned long start_time)
1164{ 1165{
1165 if (time_is_after_eq_jiffies(bdi->bw_time_stamp + BANDWIDTH_INTERVAL)) 1166 if (time_is_after_eq_jiffies(wb->bw_time_stamp + BANDWIDTH_INTERVAL))
1166 return; 1167 return;
1167 spin_lock(&bdi->wb.list_lock); 1168 spin_lock(&wb->list_lock);
1168 __bdi_update_bandwidth(bdi, thresh, bg_thresh, dirty, 1169 __wb_update_bandwidth(wb, thresh, bg_thresh, dirty,
1169 bdi_thresh, bdi_dirty, start_time); 1170 bdi_thresh, bdi_dirty, start_time);
1170 spin_unlock(&bdi->wb.list_lock); 1171 spin_unlock(&wb->list_lock);
1171} 1172}
1172 1173
1173/* 1174/*
@@ -1187,10 +1188,10 @@ static unsigned long dirty_poll_interval(unsigned long dirty,
1187 return 1; 1188 return 1;
1188} 1189}
1189 1190
1190static unsigned long bdi_max_pause(struct backing_dev_info *bdi, 1191static unsigned long wb_max_pause(struct bdi_writeback *wb,
1191 unsigned long bdi_dirty) 1192 unsigned long bdi_dirty)
1192{ 1193{
1193 unsigned long bw = bdi->avg_write_bandwidth; 1194 unsigned long bw = wb->avg_write_bandwidth;
1194 unsigned long t; 1195 unsigned long t;
1195 1196
1196 /* 1197 /*
@@ -1206,14 +1207,14 @@ static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
1206 return min_t(unsigned long, t, MAX_PAUSE); 1207 return min_t(unsigned long, t, MAX_PAUSE);
1207} 1208}
1208 1209
1209static long bdi_min_pause(struct backing_dev_info *bdi, 1210static long wb_min_pause(struct bdi_writeback *wb,
1210 long max_pause, 1211 long max_pause,
1211 unsigned long task_ratelimit, 1212 unsigned long task_ratelimit,
1212 unsigned long dirty_ratelimit, 1213 unsigned long dirty_ratelimit,
1213 int *nr_dirtied_pause) 1214 int *nr_dirtied_pause)
1214{ 1215{
1215 long hi = ilog2(bdi->avg_write_bandwidth); 1216 long hi = ilog2(wb->avg_write_bandwidth);
1216 long lo = ilog2(bdi->dirty_ratelimit); 1217 long lo = ilog2(wb->dirty_ratelimit);
1217 long t; /* target pause */ 1218 long t; /* target pause */
1218 long pause; /* estimated next pause */ 1219 long pause; /* estimated next pause */
1219 int pages; /* target nr_dirtied_pause */ 1220 int pages; /* target nr_dirtied_pause */
@@ -1281,14 +1282,13 @@ static long bdi_min_pause(struct backing_dev_info *bdi,
1281 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t; 1282 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
1282} 1283}
1283 1284
1284static inline void bdi_dirty_limits(struct backing_dev_info *bdi, 1285static inline void wb_dirty_limits(struct bdi_writeback *wb,
1285 unsigned long dirty_thresh, 1286 unsigned long dirty_thresh,
1286 unsigned long background_thresh, 1287 unsigned long background_thresh,
1287 unsigned long *bdi_dirty, 1288 unsigned long *bdi_dirty,
1288 unsigned long *bdi_thresh, 1289 unsigned long *bdi_thresh,
1289 unsigned long *bdi_bg_thresh) 1290 unsigned long *bdi_bg_thresh)
1290{ 1291{
1291 struct bdi_writeback *wb = &bdi->wb;
1292 unsigned long wb_reclaimable; 1292 unsigned long wb_reclaimable;
1293 1293
1294 /* 1294 /*
@@ -1301,10 +1301,10 @@ static inline void bdi_dirty_limits(struct backing_dev_info *bdi,
1301 * In this case we don't want to hard throttle the USB key 1301 * In this case we don't want to hard throttle the USB key
1302 * dirtiers for 100 seconds until bdi_dirty drops under 1302 * dirtiers for 100 seconds until bdi_dirty drops under
1303 * bdi_thresh. Instead the auxiliary bdi control line in 1303 * bdi_thresh. Instead the auxiliary bdi control line in
1304 * bdi_position_ratio() will let the dirtier task progress 1304 * wb_position_ratio() will let the dirtier task progress
1305 * at some rate <= (write_bw / 2) for bringing down bdi_dirty. 1305 * at some rate <= (write_bw / 2) for bringing down bdi_dirty.
1306 */ 1306 */
1307 *bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); 1307 *bdi_thresh = wb_dirty_limit(wb, dirty_thresh);
1308 1308
1309 if (bdi_bg_thresh) 1309 if (bdi_bg_thresh)
1310 *bdi_bg_thresh = dirty_thresh ? div_u64((u64)*bdi_thresh * 1310 *bdi_bg_thresh = dirty_thresh ? div_u64((u64)*bdi_thresh *
@@ -1354,6 +1354,7 @@ static void balance_dirty_pages(struct address_space *mapping,
1354 unsigned long dirty_ratelimit; 1354 unsigned long dirty_ratelimit;
1355 unsigned long pos_ratio; 1355 unsigned long pos_ratio;
1356 struct backing_dev_info *bdi = inode_to_bdi(mapping->host); 1356 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
1357 struct bdi_writeback *wb = &bdi->wb;
1357 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT; 1358 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
1358 unsigned long start_time = jiffies; 1359 unsigned long start_time = jiffies;
1359 1360
@@ -1378,8 +1379,8 @@ static void balance_dirty_pages(struct address_space *mapping,
1378 global_dirty_limits(&background_thresh, &dirty_thresh); 1379 global_dirty_limits(&background_thresh, &dirty_thresh);
1379 1380
1380 if (unlikely(strictlimit)) { 1381 if (unlikely(strictlimit)) {
1381 bdi_dirty_limits(bdi, dirty_thresh, background_thresh, 1382 wb_dirty_limits(wb, dirty_thresh, background_thresh,
1382 &bdi_dirty, &bdi_thresh, &bg_thresh); 1383 &bdi_dirty, &bdi_thresh, &bg_thresh);
1383 1384
1384 dirty = bdi_dirty; 1385 dirty = bdi_dirty;
1385 thresh = bdi_thresh; 1386 thresh = bdi_thresh;
@@ -1410,28 +1411,28 @@ static void balance_dirty_pages(struct address_space *mapping,
1410 bdi_start_background_writeback(bdi); 1411 bdi_start_background_writeback(bdi);
1411 1412
1412 if (!strictlimit) 1413 if (!strictlimit)
1413 bdi_dirty_limits(bdi, dirty_thresh, background_thresh, 1414 wb_dirty_limits(wb, dirty_thresh, background_thresh,
1414 &bdi_dirty, &bdi_thresh, NULL); 1415 &bdi_dirty, &bdi_thresh, NULL);
1415 1416
1416 dirty_exceeded = (bdi_dirty > bdi_thresh) && 1417 dirty_exceeded = (bdi_dirty > bdi_thresh) &&
1417 ((nr_dirty > dirty_thresh) || strictlimit); 1418 ((nr_dirty > dirty_thresh) || strictlimit);
1418 if (dirty_exceeded && !bdi->dirty_exceeded) 1419 if (dirty_exceeded && !wb->dirty_exceeded)
1419 bdi->dirty_exceeded = 1; 1420 wb->dirty_exceeded = 1;
1420 1421
1421 bdi_update_bandwidth(bdi, dirty_thresh, background_thresh, 1422 wb_update_bandwidth(wb, dirty_thresh, background_thresh,
1422 nr_dirty, bdi_thresh, bdi_dirty, 1423 nr_dirty, bdi_thresh, bdi_dirty,
1423 start_time); 1424 start_time);
1424 1425
1425 dirty_ratelimit = bdi->dirty_ratelimit; 1426 dirty_ratelimit = wb->dirty_ratelimit;
1426 pos_ratio = bdi_position_ratio(bdi, dirty_thresh, 1427 pos_ratio = wb_position_ratio(wb, dirty_thresh,
1427 background_thresh, nr_dirty, 1428 background_thresh, nr_dirty,
1428 bdi_thresh, bdi_dirty); 1429 bdi_thresh, bdi_dirty);
1429 task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >> 1430 task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >>
1430 RATELIMIT_CALC_SHIFT; 1431 RATELIMIT_CALC_SHIFT;
1431 max_pause = bdi_max_pause(bdi, bdi_dirty); 1432 max_pause = wb_max_pause(wb, bdi_dirty);
1432 min_pause = bdi_min_pause(bdi, max_pause, 1433 min_pause = wb_min_pause(wb, max_pause,
1433 task_ratelimit, dirty_ratelimit, 1434 task_ratelimit, dirty_ratelimit,
1434 &nr_dirtied_pause); 1435 &nr_dirtied_pause);
1435 1436
1436 if (unlikely(task_ratelimit == 0)) { 1437 if (unlikely(task_ratelimit == 0)) {
1437 period = max_pause; 1438 period = max_pause;
@@ -1515,15 +1516,15 @@ pause:
1515 * more page. However bdi_dirty has accounting errors. So use 1516 * more page. However bdi_dirty has accounting errors. So use
1516 * the larger and more IO friendly wb_stat_error. 1517 * the larger and more IO friendly wb_stat_error.
1517 */ 1518 */
1518 if (bdi_dirty <= wb_stat_error(&bdi->wb)) 1519 if (bdi_dirty <= wb_stat_error(wb))
1519 break; 1520 break;
1520 1521
1521 if (fatal_signal_pending(current)) 1522 if (fatal_signal_pending(current))
1522 break; 1523 break;
1523 } 1524 }
1524 1525
1525 if (!dirty_exceeded && bdi->dirty_exceeded) 1526 if (!dirty_exceeded && wb->dirty_exceeded)
1526 bdi->dirty_exceeded = 0; 1527 wb->dirty_exceeded = 0;
1527 1528
1528 if (writeback_in_progress(bdi)) 1529 if (writeback_in_progress(bdi))
1529 return; 1530 return;
@@ -1577,6 +1578,7 @@ DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
1577void balance_dirty_pages_ratelimited(struct address_space *mapping) 1578void balance_dirty_pages_ratelimited(struct address_space *mapping)
1578{ 1579{
1579 struct backing_dev_info *bdi = inode_to_bdi(mapping->host); 1580 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
1581 struct bdi_writeback *wb = &bdi->wb;
1580 int ratelimit; 1582 int ratelimit;
1581 int *p; 1583 int *p;
1582 1584
@@ -1584,7 +1586,7 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
1584 return; 1586 return;
1585 1587
1586 ratelimit = current->nr_dirtied_pause; 1588 ratelimit = current->nr_dirtied_pause;
1587 if (bdi->dirty_exceeded) 1589 if (wb->dirty_exceeded)
1588 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10)); 1590 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1589 1591
1590 preempt_disable(); 1592 preempt_disable();