diff options
author | Wu Fengguang <fengguang.wu@intel.com> | 2011-06-11 21:25:42 -0400 |
---|---|---|
committer | Wu Fengguang <fengguang.wu@intel.com> | 2011-12-18 01:20:27 -0500 |
commit | 83712358ba0a1497ce59a4f84ce4dd0f803fe6fc (patch) | |
tree | d17ab27a7bff50616e3b63ad137c004d9ccfbcb0 /mm | |
parent | 32c7f202a4801252a0f3578807b75a961f792870 (diff) |
writeback: dirty ratelimit - think time compensation
Compensate the task's think time when computing the final pause time,
so that ->dirty_ratelimit can be executed accurately.
think time := time spend outside of balance_dirty_pages()
In the rare case that the task slept longer than the 200ms period time
(result in negative pause time), the sleep time will be compensated in
the following periods, too, if it's less than 1 second.
Accumulated errors are carefully avoided as long as the max pause area
is not hitted.
Pseudo code:
period = pages_dirtied / task_ratelimit;
think = jiffies - dirty_paused_when;
pause = period - think;
1) normal case: period > think
pause = period - think
dirty_paused_when = jiffies + pause
nr_dirtied = 0
period time
|===============================>|
think time pause time
|===============>|==============>|
------|----------------|---------------|------------------------
dirty_paused_when jiffies
2) no pause case: period <= think
don't pause; reduce future pause time by:
dirty_paused_when += period
nr_dirtied = 0
period time
|===============================>|
think time
|===================================================>|
------|--------------------------------+-------------------|----
dirty_paused_when jiffies
Acked-by: Jan Kara <jack@suse.cz>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/page-writeback.c | 36 |
1 files changed, 32 insertions, 4 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 96b3e7aa705c..491932155825 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -1016,6 +1016,7 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1016 | unsigned long background_thresh; | 1016 | unsigned long background_thresh; |
1017 | unsigned long dirty_thresh; | 1017 | unsigned long dirty_thresh; |
1018 | unsigned long bdi_thresh; | 1018 | unsigned long bdi_thresh; |
1019 | long period; | ||
1019 | long pause = 0; | 1020 | long pause = 0; |
1020 | long uninitialized_var(max_pause); | 1021 | long uninitialized_var(max_pause); |
1021 | bool dirty_exceeded = false; | 1022 | bool dirty_exceeded = false; |
@@ -1026,6 +1027,8 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1026 | unsigned long start_time = jiffies; | 1027 | unsigned long start_time = jiffies; |
1027 | 1028 | ||
1028 | for (;;) { | 1029 | for (;;) { |
1030 | unsigned long now = jiffies; | ||
1031 | |||
1029 | /* | 1032 | /* |
1030 | * Unstable writes are a feature of certain networked | 1033 | * Unstable writes are a feature of certain networked |
1031 | * filesystems (i.e. NFS) in which data may have been | 1034 | * filesystems (i.e. NFS) in which data may have been |
@@ -1045,8 +1048,11 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1045 | */ | 1048 | */ |
1046 | freerun = dirty_freerun_ceiling(dirty_thresh, | 1049 | freerun = dirty_freerun_ceiling(dirty_thresh, |
1047 | background_thresh); | 1050 | background_thresh); |
1048 | if (nr_dirty <= freerun) | 1051 | if (nr_dirty <= freerun) { |
1052 | current->dirty_paused_when = now; | ||
1053 | current->nr_dirtied = 0; | ||
1049 | break; | 1054 | break; |
1055 | } | ||
1050 | 1056 | ||
1051 | if (unlikely(!writeback_in_progress(bdi))) | 1057 | if (unlikely(!writeback_in_progress(bdi))) |
1052 | bdi_start_background_writeback(bdi); | 1058 | bdi_start_background_writeback(bdi); |
@@ -1104,10 +1110,21 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1104 | task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >> | 1110 | task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >> |
1105 | RATELIMIT_CALC_SHIFT; | 1111 | RATELIMIT_CALC_SHIFT; |
1106 | if (unlikely(task_ratelimit == 0)) { | 1112 | if (unlikely(task_ratelimit == 0)) { |
1113 | period = max_pause; | ||
1107 | pause = max_pause; | 1114 | pause = max_pause; |
1108 | goto pause; | 1115 | goto pause; |
1109 | } | 1116 | } |
1110 | pause = HZ * pages_dirtied / task_ratelimit; | 1117 | period = HZ * pages_dirtied / task_ratelimit; |
1118 | pause = period; | ||
1119 | if (current->dirty_paused_when) | ||
1120 | pause -= now - current->dirty_paused_when; | ||
1121 | /* | ||
1122 | * For less than 1s think time (ext3/4 may block the dirtier | ||
1123 | * for up to 800ms from time to time on 1-HDD; so does xfs, | ||
1124 | * however at much less frequency), try to compensate it in | ||
1125 | * future periods by updating the virtual time; otherwise just | ||
1126 | * do a reset, as it may be a light dirtier. | ||
1127 | */ | ||
1111 | if (unlikely(pause <= 0)) { | 1128 | if (unlikely(pause <= 0)) { |
1112 | trace_balance_dirty_pages(bdi, | 1129 | trace_balance_dirty_pages(bdi, |
1113 | dirty_thresh, | 1130 | dirty_thresh, |
@@ -1118,8 +1135,16 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1118 | dirty_ratelimit, | 1135 | dirty_ratelimit, |
1119 | task_ratelimit, | 1136 | task_ratelimit, |
1120 | pages_dirtied, | 1137 | pages_dirtied, |
1138 | period, | ||
1121 | pause, | 1139 | pause, |
1122 | start_time); | 1140 | start_time); |
1141 | if (pause < -HZ) { | ||
1142 | current->dirty_paused_when = now; | ||
1143 | current->nr_dirtied = 0; | ||
1144 | } else if (period) { | ||
1145 | current->dirty_paused_when += period; | ||
1146 | current->nr_dirtied = 0; | ||
1147 | } | ||
1123 | pause = 1; /* avoid resetting nr_dirtied_pause below */ | 1148 | pause = 1; /* avoid resetting nr_dirtied_pause below */ |
1124 | break; | 1149 | break; |
1125 | } | 1150 | } |
@@ -1135,11 +1160,15 @@ pause: | |||
1135 | dirty_ratelimit, | 1160 | dirty_ratelimit, |
1136 | task_ratelimit, | 1161 | task_ratelimit, |
1137 | pages_dirtied, | 1162 | pages_dirtied, |
1163 | period, | ||
1138 | pause, | 1164 | pause, |
1139 | start_time); | 1165 | start_time); |
1140 | __set_current_state(TASK_KILLABLE); | 1166 | __set_current_state(TASK_KILLABLE); |
1141 | io_schedule_timeout(pause); | 1167 | io_schedule_timeout(pause); |
1142 | 1168 | ||
1169 | current->dirty_paused_when = now + pause; | ||
1170 | current->nr_dirtied = 0; | ||
1171 | |||
1143 | /* | 1172 | /* |
1144 | * This is typically equal to (nr_dirty < dirty_thresh) and can | 1173 | * This is typically equal to (nr_dirty < dirty_thresh) and can |
1145 | * also keep "1000+ dd on a slow USB stick" under control. | 1174 | * also keep "1000+ dd on a slow USB stick" under control. |
@@ -1167,11 +1196,10 @@ pause: | |||
1167 | if (!dirty_exceeded && bdi->dirty_exceeded) | 1196 | if (!dirty_exceeded && bdi->dirty_exceeded) |
1168 | bdi->dirty_exceeded = 0; | 1197 | bdi->dirty_exceeded = 0; |
1169 | 1198 | ||
1170 | current->nr_dirtied = 0; | ||
1171 | if (pause == 0) { /* in freerun area */ | 1199 | if (pause == 0) { /* in freerun area */ |
1172 | current->nr_dirtied_pause = | 1200 | current->nr_dirtied_pause = |
1173 | dirty_poll_interval(nr_dirty, dirty_thresh); | 1201 | dirty_poll_interval(nr_dirty, dirty_thresh); |
1174 | } else if (pause <= max_pause / 4 && | 1202 | } else if (period <= max_pause / 4 && |
1175 | pages_dirtied >= current->nr_dirtied_pause) { | 1203 | pages_dirtied >= current->nr_dirtied_pause) { |
1176 | current->nr_dirtied_pause = clamp_val( | 1204 | current->nr_dirtied_pause = clamp_val( |
1177 | dirty_ratelimit * (max_pause / 2) / HZ, | 1205 | dirty_ratelimit * (max_pause / 2) / HZ, |