aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page-writeback.c
diff options
context:
space:
mode:
authorWu Fengguang <fengguang.wu@intel.com>2011-06-11 21:25:42 -0400
committerWu Fengguang <fengguang.wu@intel.com>2011-12-18 01:20:27 -0500
commit83712358ba0a1497ce59a4f84ce4dd0f803fe6fc (patch)
treed17ab27a7bff50616e3b63ad137c004d9ccfbcb0 /mm/page-writeback.c
parent32c7f202a4801252a0f3578807b75a961f792870 (diff)
writeback: dirty ratelimit - think time compensation
Compensate the task's think time when computing the final pause time, so that ->dirty_ratelimit can be executed accurately. think time := time spend outside of balance_dirty_pages() In the rare case that the task slept longer than the 200ms period time (result in negative pause time), the sleep time will be compensated in the following periods, too, if it's less than 1 second. Accumulated errors are carefully avoided as long as the max pause area is not hitted. Pseudo code: period = pages_dirtied / task_ratelimit; think = jiffies - dirty_paused_when; pause = period - think; 1) normal case: period > think pause = period - think dirty_paused_when = jiffies + pause nr_dirtied = 0 period time |===============================>| think time pause time |===============>|==============>| ------|----------------|---------------|------------------------ dirty_paused_when jiffies 2) no pause case: period <= think don't pause; reduce future pause time by: dirty_paused_when += period nr_dirtied = 0 period time |===============================>| think time |===================================================>| ------|--------------------------------+-------------------|---- dirty_paused_when jiffies Acked-by: Jan Kara <jack@suse.cz> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r--mm/page-writeback.c36
1 files changed, 32 insertions, 4 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 96b3e7aa705..49193215582 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1016,6 +1016,7 @@ static void balance_dirty_pages(struct address_space *mapping,
1016 unsigned long background_thresh; 1016 unsigned long background_thresh;
1017 unsigned long dirty_thresh; 1017 unsigned long dirty_thresh;
1018 unsigned long bdi_thresh; 1018 unsigned long bdi_thresh;
1019 long period;
1019 long pause = 0; 1020 long pause = 0;
1020 long uninitialized_var(max_pause); 1021 long uninitialized_var(max_pause);
1021 bool dirty_exceeded = false; 1022 bool dirty_exceeded = false;
@@ -1026,6 +1027,8 @@ static void balance_dirty_pages(struct address_space *mapping,
1026 unsigned long start_time = jiffies; 1027 unsigned long start_time = jiffies;
1027 1028
1028 for (;;) { 1029 for (;;) {
1030 unsigned long now = jiffies;
1031
1029 /* 1032 /*
1030 * Unstable writes are a feature of certain networked 1033 * Unstable writes are a feature of certain networked
1031 * filesystems (i.e. NFS) in which data may have been 1034 * filesystems (i.e. NFS) in which data may have been
@@ -1045,8 +1048,11 @@ static void balance_dirty_pages(struct address_space *mapping,
1045 */ 1048 */
1046 freerun = dirty_freerun_ceiling(dirty_thresh, 1049 freerun = dirty_freerun_ceiling(dirty_thresh,
1047 background_thresh); 1050 background_thresh);
1048 if (nr_dirty <= freerun) 1051 if (nr_dirty <= freerun) {
1052 current->dirty_paused_when = now;
1053 current->nr_dirtied = 0;
1049 break; 1054 break;
1055 }
1050 1056
1051 if (unlikely(!writeback_in_progress(bdi))) 1057 if (unlikely(!writeback_in_progress(bdi)))
1052 bdi_start_background_writeback(bdi); 1058 bdi_start_background_writeback(bdi);
@@ -1104,10 +1110,21 @@ static void balance_dirty_pages(struct address_space *mapping,
1104 task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >> 1110 task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >>
1105 RATELIMIT_CALC_SHIFT; 1111 RATELIMIT_CALC_SHIFT;
1106 if (unlikely(task_ratelimit == 0)) { 1112 if (unlikely(task_ratelimit == 0)) {
1113 period = max_pause;
1107 pause = max_pause; 1114 pause = max_pause;
1108 goto pause; 1115 goto pause;
1109 } 1116 }
1110 pause = HZ * pages_dirtied / task_ratelimit; 1117 period = HZ * pages_dirtied / task_ratelimit;
1118 pause = period;
1119 if (current->dirty_paused_when)
1120 pause -= now - current->dirty_paused_when;
1121 /*
1122 * For less than 1s think time (ext3/4 may block the dirtier
1123 * for up to 800ms from time to time on 1-HDD; so does xfs,
1124 * however at much less frequency), try to compensate it in
1125 * future periods by updating the virtual time; otherwise just
1126 * do a reset, as it may be a light dirtier.
1127 */
1111 if (unlikely(pause <= 0)) { 1128 if (unlikely(pause <= 0)) {
1112 trace_balance_dirty_pages(bdi, 1129 trace_balance_dirty_pages(bdi,
1113 dirty_thresh, 1130 dirty_thresh,
@@ -1118,8 +1135,16 @@ static void balance_dirty_pages(struct address_space *mapping,
1118 dirty_ratelimit, 1135 dirty_ratelimit,
1119 task_ratelimit, 1136 task_ratelimit,
1120 pages_dirtied, 1137 pages_dirtied,
1138 period,
1121 pause, 1139 pause,
1122 start_time); 1140 start_time);
1141 if (pause < -HZ) {
1142 current->dirty_paused_when = now;
1143 current->nr_dirtied = 0;
1144 } else if (period) {
1145 current->dirty_paused_when += period;
1146 current->nr_dirtied = 0;
1147 }
1123 pause = 1; /* avoid resetting nr_dirtied_pause below */ 1148 pause = 1; /* avoid resetting nr_dirtied_pause below */
1124 break; 1149 break;
1125 } 1150 }
@@ -1135,11 +1160,15 @@ pause:
1135 dirty_ratelimit, 1160 dirty_ratelimit,
1136 task_ratelimit, 1161 task_ratelimit,
1137 pages_dirtied, 1162 pages_dirtied,
1163 period,
1138 pause, 1164 pause,
1139 start_time); 1165 start_time);
1140 __set_current_state(TASK_KILLABLE); 1166 __set_current_state(TASK_KILLABLE);
1141 io_schedule_timeout(pause); 1167 io_schedule_timeout(pause);
1142 1168
1169 current->dirty_paused_when = now + pause;
1170 current->nr_dirtied = 0;
1171
1143 /* 1172 /*
1144 * This is typically equal to (nr_dirty < dirty_thresh) and can 1173 * This is typically equal to (nr_dirty < dirty_thresh) and can
1145 * also keep "1000+ dd on a slow USB stick" under control. 1174 * also keep "1000+ dd on a slow USB stick" under control.
@@ -1167,11 +1196,10 @@ pause:
1167 if (!dirty_exceeded && bdi->dirty_exceeded) 1196 if (!dirty_exceeded && bdi->dirty_exceeded)
1168 bdi->dirty_exceeded = 0; 1197 bdi->dirty_exceeded = 0;
1169 1198
1170 current->nr_dirtied = 0;
1171 if (pause == 0) { /* in freerun area */ 1199 if (pause == 0) { /* in freerun area */
1172 current->nr_dirtied_pause = 1200 current->nr_dirtied_pause =
1173 dirty_poll_interval(nr_dirty, dirty_thresh); 1201 dirty_poll_interval(nr_dirty, dirty_thresh);
1174 } else if (pause <= max_pause / 4 && 1202 } else if (period <= max_pause / 4 &&
1175 pages_dirtied >= current->nr_dirtied_pause) { 1203 pages_dirtied >= current->nr_dirtied_pause) {
1176 current->nr_dirtied_pause = clamp_val( 1204 current->nr_dirtied_pause = clamp_val(
1177 dirty_ratelimit * (max_pause / 2) / HZ, 1205 dirty_ratelimit * (max_pause / 2) / HZ,