diff options
-rw-r--r-- | include/linux/sched.h | 1 | ||||
-rw-r--r-- | include/trace/events/writeback.h | 14 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | mm/page-writeback.c | 36 |
4 files changed, 45 insertions, 7 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 1c4f3e9b9bc5..984c3b295978 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1527,6 +1527,7 @@ struct task_struct { | |||
1527 | */ | 1527 | */ |
1528 | int nr_dirtied; | 1528 | int nr_dirtied; |
1529 | int nr_dirtied_pause; | 1529 | int nr_dirtied_pause; |
1530 | unsigned long dirty_paused_when; /* start of a write-and-pause period */ | ||
1530 | 1531 | ||
1531 | #ifdef CONFIG_LATENCYTOP | 1532 | #ifdef CONFIG_LATENCYTOP |
1532 | int latency_record_count; | 1533 | int latency_record_count; |
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 99d1d0decf88..8588a8918023 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h | |||
@@ -300,12 +300,13 @@ TRACE_EVENT(balance_dirty_pages, | |||
300 | unsigned long dirty_ratelimit, | 300 | unsigned long dirty_ratelimit, |
301 | unsigned long task_ratelimit, | 301 | unsigned long task_ratelimit, |
302 | unsigned long dirtied, | 302 | unsigned long dirtied, |
303 | unsigned long period, | ||
303 | long pause, | 304 | long pause, |
304 | unsigned long start_time), | 305 | unsigned long start_time), |
305 | 306 | ||
306 | TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty, | 307 | TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty, |
307 | dirty_ratelimit, task_ratelimit, | 308 | dirty_ratelimit, task_ratelimit, |
308 | dirtied, pause, start_time), | 309 | dirtied, period, pause, start_time), |
309 | 310 | ||
310 | TP_STRUCT__entry( | 311 | TP_STRUCT__entry( |
311 | __array( char, bdi, 32) | 312 | __array( char, bdi, 32) |
@@ -320,6 +321,8 @@ TRACE_EVENT(balance_dirty_pages, | |||
320 | __field(unsigned int, dirtied_pause) | 321 | __field(unsigned int, dirtied_pause) |
321 | __field(unsigned long, paused) | 322 | __field(unsigned long, paused) |
322 | __field( long, pause) | 323 | __field( long, pause) |
324 | __field(unsigned long, period) | ||
325 | __field( long, think) | ||
323 | ), | 326 | ), |
324 | 327 | ||
325 | TP_fast_assign( | 328 | TP_fast_assign( |
@@ -336,6 +339,9 @@ TRACE_EVENT(balance_dirty_pages, | |||
336 | __entry->task_ratelimit = KBps(task_ratelimit); | 339 | __entry->task_ratelimit = KBps(task_ratelimit); |
337 | __entry->dirtied = dirtied; | 340 | __entry->dirtied = dirtied; |
338 | __entry->dirtied_pause = current->nr_dirtied_pause; | 341 | __entry->dirtied_pause = current->nr_dirtied_pause; |
342 | __entry->think = current->dirty_paused_when == 0 ? 0 : | ||
343 | (long)(jiffies - current->dirty_paused_when) * 1000/HZ; | ||
344 | __entry->period = period * 1000 / HZ; | ||
339 | __entry->pause = pause * 1000 / HZ; | 345 | __entry->pause = pause * 1000 / HZ; |
340 | __entry->paused = (jiffies - start_time) * 1000 / HZ; | 346 | __entry->paused = (jiffies - start_time) * 1000 / HZ; |
341 | ), | 347 | ), |
@@ -346,7 +352,7 @@ TRACE_EVENT(balance_dirty_pages, | |||
346 | "bdi_setpoint=%lu bdi_dirty=%lu " | 352 | "bdi_setpoint=%lu bdi_dirty=%lu " |
347 | "dirty_ratelimit=%lu task_ratelimit=%lu " | 353 | "dirty_ratelimit=%lu task_ratelimit=%lu " |
348 | "dirtied=%u dirtied_pause=%u " | 354 | "dirtied=%u dirtied_pause=%u " |
349 | "paused=%lu pause=%ld", | 355 | "paused=%lu pause=%ld period=%lu think=%ld", |
350 | __entry->bdi, | 356 | __entry->bdi, |
351 | __entry->limit, | 357 | __entry->limit, |
352 | __entry->setpoint, | 358 | __entry->setpoint, |
@@ -358,7 +364,9 @@ TRACE_EVENT(balance_dirty_pages, | |||
358 | __entry->dirtied, | 364 | __entry->dirtied, |
359 | __entry->dirtied_pause, | 365 | __entry->dirtied_pause, |
360 | __entry->paused, /* ms */ | 366 | __entry->paused, /* ms */ |
361 | __entry->pause /* ms */ | 367 | __entry->pause, /* ms */ |
368 | __entry->period, /* ms */ | ||
369 | __entry->think /* ms */ | ||
362 | ) | 370 | ) |
363 | ); | 371 | ); |
364 | 372 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index da4a6a10d088..f8668cf6a32d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1296,6 +1296,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1296 | 1296 | ||
1297 | p->nr_dirtied = 0; | 1297 | p->nr_dirtied = 0; |
1298 | p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); | 1298 | p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); |
1299 | p->dirty_paused_when = 0; | ||
1299 | 1300 | ||
1300 | /* | 1301 | /* |
1301 | * Ok, make it visible to the rest of the system. | 1302 | * Ok, make it visible to the rest of the system. |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 96b3e7aa705c..491932155825 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -1016,6 +1016,7 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1016 | unsigned long background_thresh; | 1016 | unsigned long background_thresh; |
1017 | unsigned long dirty_thresh; | 1017 | unsigned long dirty_thresh; |
1018 | unsigned long bdi_thresh; | 1018 | unsigned long bdi_thresh; |
1019 | long period; | ||
1019 | long pause = 0; | 1020 | long pause = 0; |
1020 | long uninitialized_var(max_pause); | 1021 | long uninitialized_var(max_pause); |
1021 | bool dirty_exceeded = false; | 1022 | bool dirty_exceeded = false; |
@@ -1026,6 +1027,8 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1026 | unsigned long start_time = jiffies; | 1027 | unsigned long start_time = jiffies; |
1027 | 1028 | ||
1028 | for (;;) { | 1029 | for (;;) { |
1030 | unsigned long now = jiffies; | ||
1031 | |||
1029 | /* | 1032 | /* |
1030 | * Unstable writes are a feature of certain networked | 1033 | * Unstable writes are a feature of certain networked |
1031 | * filesystems (i.e. NFS) in which data may have been | 1034 | * filesystems (i.e. NFS) in which data may have been |
@@ -1045,8 +1048,11 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1045 | */ | 1048 | */ |
1046 | freerun = dirty_freerun_ceiling(dirty_thresh, | 1049 | freerun = dirty_freerun_ceiling(dirty_thresh, |
1047 | background_thresh); | 1050 | background_thresh); |
1048 | if (nr_dirty <= freerun) | 1051 | if (nr_dirty <= freerun) { |
1052 | current->dirty_paused_when = now; | ||
1053 | current->nr_dirtied = 0; | ||
1049 | break; | 1054 | break; |
1055 | } | ||
1050 | 1056 | ||
1051 | if (unlikely(!writeback_in_progress(bdi))) | 1057 | if (unlikely(!writeback_in_progress(bdi))) |
1052 | bdi_start_background_writeback(bdi); | 1058 | bdi_start_background_writeback(bdi); |
@@ -1104,10 +1110,21 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1104 | task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >> | 1110 | task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >> |
1105 | RATELIMIT_CALC_SHIFT; | 1111 | RATELIMIT_CALC_SHIFT; |
1106 | if (unlikely(task_ratelimit == 0)) { | 1112 | if (unlikely(task_ratelimit == 0)) { |
1113 | period = max_pause; | ||
1107 | pause = max_pause; | 1114 | pause = max_pause; |
1108 | goto pause; | 1115 | goto pause; |
1109 | } | 1116 | } |
1110 | pause = HZ * pages_dirtied / task_ratelimit; | 1117 | period = HZ * pages_dirtied / task_ratelimit; |
1118 | pause = period; | ||
1119 | if (current->dirty_paused_when) | ||
1120 | pause -= now - current->dirty_paused_when; | ||
1121 | /* | ||
1122 | * For less than 1s think time (ext3/4 may block the dirtier | ||
1123 | * for up to 800ms from time to time on 1-HDD; so does xfs, | ||
1124 | * however at much less frequency), try to compensate it in | ||
1125 | * future periods by updating the virtual time; otherwise just | ||
1126 | * do a reset, as it may be a light dirtier. | ||
1127 | */ | ||
1111 | if (unlikely(pause <= 0)) { | 1128 | if (unlikely(pause <= 0)) { |
1112 | trace_balance_dirty_pages(bdi, | 1129 | trace_balance_dirty_pages(bdi, |
1113 | dirty_thresh, | 1130 | dirty_thresh, |
@@ -1118,8 +1135,16 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1118 | dirty_ratelimit, | 1135 | dirty_ratelimit, |
1119 | task_ratelimit, | 1136 | task_ratelimit, |
1120 | pages_dirtied, | 1137 | pages_dirtied, |
1138 | period, | ||
1121 | pause, | 1139 | pause, |
1122 | start_time); | 1140 | start_time); |
1141 | if (pause < -HZ) { | ||
1142 | current->dirty_paused_when = now; | ||
1143 | current->nr_dirtied = 0; | ||
1144 | } else if (period) { | ||
1145 | current->dirty_paused_when += period; | ||
1146 | current->nr_dirtied = 0; | ||
1147 | } | ||
1123 | pause = 1; /* avoid resetting nr_dirtied_pause below */ | 1148 | pause = 1; /* avoid resetting nr_dirtied_pause below */ |
1124 | break; | 1149 | break; |
1125 | } | 1150 | } |
@@ -1135,11 +1160,15 @@ pause: | |||
1135 | dirty_ratelimit, | 1160 | dirty_ratelimit, |
1136 | task_ratelimit, | 1161 | task_ratelimit, |
1137 | pages_dirtied, | 1162 | pages_dirtied, |
1163 | period, | ||
1138 | pause, | 1164 | pause, |
1139 | start_time); | 1165 | start_time); |
1140 | __set_current_state(TASK_KILLABLE); | 1166 | __set_current_state(TASK_KILLABLE); |
1141 | io_schedule_timeout(pause); | 1167 | io_schedule_timeout(pause); |
1142 | 1168 | ||
1169 | current->dirty_paused_when = now + pause; | ||
1170 | current->nr_dirtied = 0; | ||
1171 | |||
1143 | /* | 1172 | /* |
1144 | * This is typically equal to (nr_dirty < dirty_thresh) and can | 1173 | * This is typically equal to (nr_dirty < dirty_thresh) and can |
1145 | * also keep "1000+ dd on a slow USB stick" under control. | 1174 | * also keep "1000+ dd on a slow USB stick" under control. |
@@ -1167,11 +1196,10 @@ pause: | |||
1167 | if (!dirty_exceeded && bdi->dirty_exceeded) | 1196 | if (!dirty_exceeded && bdi->dirty_exceeded) |
1168 | bdi->dirty_exceeded = 0; | 1197 | bdi->dirty_exceeded = 0; |
1169 | 1198 | ||
1170 | current->nr_dirtied = 0; | ||
1171 | if (pause == 0) { /* in freerun area */ | 1199 | if (pause == 0) { /* in freerun area */ |
1172 | current->nr_dirtied_pause = | 1200 | current->nr_dirtied_pause = |
1173 | dirty_poll_interval(nr_dirty, dirty_thresh); | 1201 | dirty_poll_interval(nr_dirty, dirty_thresh); |
1174 | } else if (pause <= max_pause / 4 && | 1202 | } else if (period <= max_pause / 4 && |
1175 | pages_dirtied >= current->nr_dirtied_pause) { | 1203 | pages_dirtied >= current->nr_dirtied_pause) { |
1176 | current->nr_dirtied_pause = clamp_val( | 1204 | current->nr_dirtied_pause = clamp_val( |
1177 | dirty_ratelimit * (max_pause / 2) / HZ, | 1205 | dirty_ratelimit * (max_pause / 2) / HZ, |