aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/trace/events/writeback.h14
-rw-r--r--kernel/fork.c1
-rw-r--r--mm/page-writeback.c36
4 files changed, 45 insertions, 7 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c4f3e9b9bc..984c3b29597 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1527,6 +1527,7 @@ struct task_struct {
1527 */ 1527 */
1528 int nr_dirtied; 1528 int nr_dirtied;
1529 int nr_dirtied_pause; 1529 int nr_dirtied_pause;
1530 unsigned long dirty_paused_when; /* start of a write-and-pause period */
1530 1531
1531#ifdef CONFIG_LATENCYTOP 1532#ifdef CONFIG_LATENCYTOP
1532 int latency_record_count; 1533 int latency_record_count;
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 99d1d0decf8..8588a891802 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -300,12 +300,13 @@ TRACE_EVENT(balance_dirty_pages,
300 unsigned long dirty_ratelimit, 300 unsigned long dirty_ratelimit,
301 unsigned long task_ratelimit, 301 unsigned long task_ratelimit,
302 unsigned long dirtied, 302 unsigned long dirtied,
303 unsigned long period,
303 long pause, 304 long pause,
304 unsigned long start_time), 305 unsigned long start_time),
305 306
306 TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty, 307 TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
307 dirty_ratelimit, task_ratelimit, 308 dirty_ratelimit, task_ratelimit,
308 dirtied, pause, start_time), 309 dirtied, period, pause, start_time),
309 310
310 TP_STRUCT__entry( 311 TP_STRUCT__entry(
311 __array( char, bdi, 32) 312 __array( char, bdi, 32)
@@ -320,6 +321,8 @@ TRACE_EVENT(balance_dirty_pages,
320 __field(unsigned int, dirtied_pause) 321 __field(unsigned int, dirtied_pause)
321 __field(unsigned long, paused) 322 __field(unsigned long, paused)
322 __field( long, pause) 323 __field( long, pause)
324 __field(unsigned long, period)
325 __field( long, think)
323 ), 326 ),
324 327
325 TP_fast_assign( 328 TP_fast_assign(
@@ -336,6 +339,9 @@ TRACE_EVENT(balance_dirty_pages,
336 __entry->task_ratelimit = KBps(task_ratelimit); 339 __entry->task_ratelimit = KBps(task_ratelimit);
337 __entry->dirtied = dirtied; 340 __entry->dirtied = dirtied;
338 __entry->dirtied_pause = current->nr_dirtied_pause; 341 __entry->dirtied_pause = current->nr_dirtied_pause;
342 __entry->think = current->dirty_paused_when == 0 ? 0 :
343 (long)(jiffies - current->dirty_paused_when) * 1000/HZ;
344 __entry->period = period * 1000 / HZ;
339 __entry->pause = pause * 1000 / HZ; 345 __entry->pause = pause * 1000 / HZ;
340 __entry->paused = (jiffies - start_time) * 1000 / HZ; 346 __entry->paused = (jiffies - start_time) * 1000 / HZ;
341 ), 347 ),
@@ -346,7 +352,7 @@ TRACE_EVENT(balance_dirty_pages,
346 "bdi_setpoint=%lu bdi_dirty=%lu " 352 "bdi_setpoint=%lu bdi_dirty=%lu "
347 "dirty_ratelimit=%lu task_ratelimit=%lu " 353 "dirty_ratelimit=%lu task_ratelimit=%lu "
348 "dirtied=%u dirtied_pause=%u " 354 "dirtied=%u dirtied_pause=%u "
349 "paused=%lu pause=%ld", 355 "paused=%lu pause=%ld period=%lu think=%ld",
350 __entry->bdi, 356 __entry->bdi,
351 __entry->limit, 357 __entry->limit,
352 __entry->setpoint, 358 __entry->setpoint,
@@ -358,7 +364,9 @@ TRACE_EVENT(balance_dirty_pages,
358 __entry->dirtied, 364 __entry->dirtied,
359 __entry->dirtied_pause, 365 __entry->dirtied_pause,
360 __entry->paused, /* ms */ 366 __entry->paused, /* ms */
361 __entry->pause /* ms */ 367 __entry->pause, /* ms */
368 __entry->period, /* ms */
369 __entry->think /* ms */
362 ) 370 )
363); 371);
364 372
diff --git a/kernel/fork.c b/kernel/fork.c
index da4a6a10d08..f8668cf6a32 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1296,6 +1296,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1296 1296
1297 p->nr_dirtied = 0; 1297 p->nr_dirtied = 0;
1298 p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); 1298 p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
1299 p->dirty_paused_when = 0;
1299 1300
1300 /* 1301 /*
1301 * Ok, make it visible to the rest of the system. 1302 * Ok, make it visible to the rest of the system.
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 96b3e7aa705..49193215582 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1016,6 +1016,7 @@ static void balance_dirty_pages(struct address_space *mapping,
1016 unsigned long background_thresh; 1016 unsigned long background_thresh;
1017 unsigned long dirty_thresh; 1017 unsigned long dirty_thresh;
1018 unsigned long bdi_thresh; 1018 unsigned long bdi_thresh;
1019 long period;
1019 long pause = 0; 1020 long pause = 0;
1020 long uninitialized_var(max_pause); 1021 long uninitialized_var(max_pause);
1021 bool dirty_exceeded = false; 1022 bool dirty_exceeded = false;
@@ -1026,6 +1027,8 @@ static void balance_dirty_pages(struct address_space *mapping,
1026 unsigned long start_time = jiffies; 1027 unsigned long start_time = jiffies;
1027 1028
1028 for (;;) { 1029 for (;;) {
1030 unsigned long now = jiffies;
1031
1029 /* 1032 /*
1030 * Unstable writes are a feature of certain networked 1033 * Unstable writes are a feature of certain networked
1031 * filesystems (i.e. NFS) in which data may have been 1034 * filesystems (i.e. NFS) in which data may have been
@@ -1045,8 +1048,11 @@ static void balance_dirty_pages(struct address_space *mapping,
1045 */ 1048 */
1046 freerun = dirty_freerun_ceiling(dirty_thresh, 1049 freerun = dirty_freerun_ceiling(dirty_thresh,
1047 background_thresh); 1050 background_thresh);
1048 if (nr_dirty <= freerun) 1051 if (nr_dirty <= freerun) {
1052 current->dirty_paused_when = now;
1053 current->nr_dirtied = 0;
1049 break; 1054 break;
1055 }
1050 1056
1051 if (unlikely(!writeback_in_progress(bdi))) 1057 if (unlikely(!writeback_in_progress(bdi)))
1052 bdi_start_background_writeback(bdi); 1058 bdi_start_background_writeback(bdi);
@@ -1104,10 +1110,21 @@ static void balance_dirty_pages(struct address_space *mapping,
1104 task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >> 1110 task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >>
1105 RATELIMIT_CALC_SHIFT; 1111 RATELIMIT_CALC_SHIFT;
1106 if (unlikely(task_ratelimit == 0)) { 1112 if (unlikely(task_ratelimit == 0)) {
1113 period = max_pause;
1107 pause = max_pause; 1114 pause = max_pause;
1108 goto pause; 1115 goto pause;
1109 } 1116 }
1110 pause = HZ * pages_dirtied / task_ratelimit; 1117 period = HZ * pages_dirtied / task_ratelimit;
1118 pause = period;
1119 if (current->dirty_paused_when)
1120 pause -= now - current->dirty_paused_when;
1121 /*
1122 * For less than 1s think time (ext3/4 may block the dirtier
1123 * for up to 800ms from time to time on 1-HDD; so does xfs,
1124 * however at much less frequency), try to compensate it in
1125 * future periods by updating the virtual time; otherwise just
1126 * do a reset, as it may be a light dirtier.
1127 */
1111 if (unlikely(pause <= 0)) { 1128 if (unlikely(pause <= 0)) {
1112 trace_balance_dirty_pages(bdi, 1129 trace_balance_dirty_pages(bdi,
1113 dirty_thresh, 1130 dirty_thresh,
@@ -1118,8 +1135,16 @@ static void balance_dirty_pages(struct address_space *mapping,
1118 dirty_ratelimit, 1135 dirty_ratelimit,
1119 task_ratelimit, 1136 task_ratelimit,
1120 pages_dirtied, 1137 pages_dirtied,
1138 period,
1121 pause, 1139 pause,
1122 start_time); 1140 start_time);
1141 if (pause < -HZ) {
1142 current->dirty_paused_when = now;
1143 current->nr_dirtied = 0;
1144 } else if (period) {
1145 current->dirty_paused_when += period;
1146 current->nr_dirtied = 0;
1147 }
1123 pause = 1; /* avoid resetting nr_dirtied_pause below */ 1148 pause = 1; /* avoid resetting nr_dirtied_pause below */
1124 break; 1149 break;
1125 } 1150 }
@@ -1135,11 +1160,15 @@ pause:
1135 dirty_ratelimit, 1160 dirty_ratelimit,
1136 task_ratelimit, 1161 task_ratelimit,
1137 pages_dirtied, 1162 pages_dirtied,
1163 period,
1138 pause, 1164 pause,
1139 start_time); 1165 start_time);
1140 __set_current_state(TASK_KILLABLE); 1166 __set_current_state(TASK_KILLABLE);
1141 io_schedule_timeout(pause); 1167 io_schedule_timeout(pause);
1142 1168
1169 current->dirty_paused_when = now + pause;
1170 current->nr_dirtied = 0;
1171
1143 /* 1172 /*
1144 * This is typically equal to (nr_dirty < dirty_thresh) and can 1173 * This is typically equal to (nr_dirty < dirty_thresh) and can
1145 * also keep "1000+ dd on a slow USB stick" under control. 1174 * also keep "1000+ dd on a slow USB stick" under control.
@@ -1167,11 +1196,10 @@ pause:
1167 if (!dirty_exceeded && bdi->dirty_exceeded) 1196 if (!dirty_exceeded && bdi->dirty_exceeded)
1168 bdi->dirty_exceeded = 0; 1197 bdi->dirty_exceeded = 0;
1169 1198
1170 current->nr_dirtied = 0;
1171 if (pause == 0) { /* in freerun area */ 1199 if (pause == 0) { /* in freerun area */
1172 current->nr_dirtied_pause = 1200 current->nr_dirtied_pause =
1173 dirty_poll_interval(nr_dirty, dirty_thresh); 1201 dirty_poll_interval(nr_dirty, dirty_thresh);
1174 } else if (pause <= max_pause / 4 && 1202 } else if (period <= max_pause / 4 &&
1175 pages_dirtied >= current->nr_dirtied_pause) { 1203 pages_dirtied >= current->nr_dirtied_pause) {
1176 current->nr_dirtied_pause = clamp_val( 1204 current->nr_dirtied_pause = clamp_val(
1177 dirty_ratelimit * (max_pause / 2) / HZ, 1205 dirty_ratelimit * (max_pause / 2) / HZ,