diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-10 19:59:59 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-10 19:59:59 -0500 |
commit | 001a541ea9163ace5e8243ee0e907ad80a4c0ec2 (patch) | |
tree | a76225046369c440de93739add9823f5ea060245 /mm | |
parent | 40ba587923ae67090d9f141c1d3c951be5c1420e (diff) | |
parent | bc31b86a5923fad5f3fbb6192f767f410241ba27 (diff) |
Merge branch 'writeback-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux
* 'writeback-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux:
writeback: move MIN_WRITEBACK_PAGES to fs-writeback.c
writeback: balanced_rate cannot exceed write bandwidth
writeback: do strict bdi dirty_exceeded
writeback: avoid tiny dirty poll intervals
writeback: max, min and target dirty pause time
writeback: dirty ratelimit - think time compensation
btrfs: fix dirtied pages accounting on sub-page writes
writeback: fix dirtied pages accounting on redirty
writeback: fix dirtied pages accounting on sub-page writes
writeback: charge leaked page dirties to active tasks
writeback: Include all dirty inodes in background writeback
Diffstat (limited to 'mm')
-rw-r--r-- | mm/page-writeback.c | 246 |
1 files changed, 191 insertions, 55 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5cdd4f2b0c9d..363ba7082ef5 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -42,6 +42,12 @@ | |||
42 | #define MAX_PAUSE max(HZ/5, 1) | 42 | #define MAX_PAUSE max(HZ/5, 1) |
43 | 43 | ||
44 | /* | 44 | /* |
45 | * Try to keep balance_dirty_pages() call intervals higher than this many pages | ||
46 | * by raising pause time to max_pause when falls below it. | ||
47 | */ | ||
48 | #define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10)) | ||
49 | |||
50 | /* | ||
45 | * Estimate write bandwidth at 200ms intervals. | 51 | * Estimate write bandwidth at 200ms intervals. |
46 | */ | 52 | */ |
47 | #define BANDWIDTH_INTERVAL max(HZ/5, 1) | 53 | #define BANDWIDTH_INTERVAL max(HZ/5, 1) |
@@ -898,6 +904,11 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi, | |||
898 | */ | 904 | */ |
899 | balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw, | 905 | balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw, |
900 | dirty_rate | 1); | 906 | dirty_rate | 1); |
907 | /* | ||
908 | * balanced_dirty_ratelimit ~= (write_bw / N) <= write_bw | ||
909 | */ | ||
910 | if (unlikely(balanced_dirty_ratelimit > write_bw)) | ||
911 | balanced_dirty_ratelimit = write_bw; | ||
901 | 912 | ||
902 | /* | 913 | /* |
903 | * We could safely do this and return immediately: | 914 | * We could safely do this and return immediately: |
@@ -1044,40 +1055,98 @@ static unsigned long dirty_poll_interval(unsigned long dirty, | |||
1044 | return 1; | 1055 | return 1; |
1045 | } | 1056 | } |
1046 | 1057 | ||
1047 | static unsigned long bdi_max_pause(struct backing_dev_info *bdi, | 1058 | static long bdi_max_pause(struct backing_dev_info *bdi, |
1048 | unsigned long bdi_dirty) | 1059 | unsigned long bdi_dirty) |
1060 | { | ||
1061 | long bw = bdi->avg_write_bandwidth; | ||
1062 | long t; | ||
1063 | |||
1064 | /* | ||
1065 | * Limit pause time for small memory systems. If sleeping for too long | ||
1066 | * time, a small pool of dirty/writeback pages may go empty and disk go | ||
1067 | * idle. | ||
1068 | * | ||
1069 | * 8 serves as the safety ratio. | ||
1070 | */ | ||
1071 | t = bdi_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8)); | ||
1072 | t++; | ||
1073 | |||
1074 | return min_t(long, t, MAX_PAUSE); | ||
1075 | } | ||
1076 | |||
1077 | static long bdi_min_pause(struct backing_dev_info *bdi, | ||
1078 | long max_pause, | ||
1079 | unsigned long task_ratelimit, | ||
1080 | unsigned long dirty_ratelimit, | ||
1081 | int *nr_dirtied_pause) | ||
1049 | { | 1082 | { |
1050 | unsigned long bw = bdi->avg_write_bandwidth; | 1083 | long hi = ilog2(bdi->avg_write_bandwidth); |
1051 | unsigned long hi = ilog2(bw); | 1084 | long lo = ilog2(bdi->dirty_ratelimit); |
1052 | unsigned long lo = ilog2(bdi->dirty_ratelimit); | 1085 | long t; /* target pause */ |
1053 | unsigned long t; | 1086 | long pause; /* estimated next pause */ |
1087 | int pages; /* target nr_dirtied_pause */ | ||
1054 | 1088 | ||
1055 | /* target for 20ms max pause on 1-dd case */ | 1089 | /* target for 10ms pause on 1-dd case */ |
1056 | t = HZ / 50; | 1090 | t = max(1, HZ / 100); |
1057 | 1091 | ||
1058 | /* | 1092 | /* |
1059 | * Scale up pause time for concurrent dirtiers in order to reduce CPU | 1093 | * Scale up pause time for concurrent dirtiers in order to reduce CPU |
1060 | * overheads. | 1094 | * overheads. |
1061 | * | 1095 | * |
1062 | * (N * 20ms) on 2^N concurrent tasks. | 1096 | * (N * 10ms) on 2^N concurrent tasks. |
1063 | */ | 1097 | */ |
1064 | if (hi > lo) | 1098 | if (hi > lo) |
1065 | t += (hi - lo) * (20 * HZ) / 1024; | 1099 | t += (hi - lo) * (10 * HZ) / 1024; |
1066 | 1100 | ||
1067 | /* | 1101 | /* |
1068 | * Limit pause time for small memory systems. If sleeping for too long | 1102 | * This is a bit convoluted. We try to base the next nr_dirtied_pause |
1069 | * time, a small pool of dirty/writeback pages may go empty and disk go | 1103 | * on the much more stable dirty_ratelimit. However the next pause time |
1070 | * idle. | 1104 | * will be computed based on task_ratelimit and the two rate limits may |
1105 | * depart considerably at some time. Especially if task_ratelimit goes | ||
1106 | * below dirty_ratelimit/2 and the target pause is max_pause, the next | ||
1107 | * pause time will be max_pause*2 _trimmed down_ to max_pause. As a | ||
1108 | * result task_ratelimit won't be executed faithfully, which could | ||
1109 | * eventually bring down dirty_ratelimit. | ||
1071 | * | 1110 | * |
1072 | * 8 serves as the safety ratio. | 1111 | * We apply two rules to fix it up: |
1112 | * 1) try to estimate the next pause time and if necessary, use a lower | ||
1113 | * nr_dirtied_pause so as not to exceed max_pause. When this happens, | ||
1114 | * nr_dirtied_pause will be "dancing" with task_ratelimit. | ||
1115 | * 2) limit the target pause time to max_pause/2, so that the normal | ||
1116 | * small fluctuations of task_ratelimit won't trigger rule (1) and | ||
1117 | * nr_dirtied_pause will remain as stable as dirty_ratelimit. | ||
1073 | */ | 1118 | */ |
1074 | t = min(t, bdi_dirty * HZ / (8 * bw + 1)); | 1119 | t = min(t, 1 + max_pause / 2); |
1120 | pages = dirty_ratelimit * t / roundup_pow_of_two(HZ); | ||
1075 | 1121 | ||
1076 | /* | 1122 | /* |
1077 | * The pause time will be settled within range (max_pause/4, max_pause). | 1123 | * Tiny nr_dirtied_pause is found to hurt I/O performance in the test |
1078 | * Apply a minimal value of 4 to get a non-zero max_pause/4. | 1124 | * case fio-mmap-randwrite-64k, which does 16*{sync read, async write}. |
1125 | * When the 16 consecutive reads are often interrupted by some dirty | ||
1126 | * throttling pause during the async writes, cfq will go into idles | ||
1127 | * (deadline is fine). So push nr_dirtied_pause as high as possible | ||
1128 | * until reaches DIRTY_POLL_THRESH=32 pages. | ||
1079 | */ | 1129 | */ |
1080 | return clamp_val(t, 4, MAX_PAUSE); | 1130 | if (pages < DIRTY_POLL_THRESH) { |
1131 | t = max_pause; | ||
1132 | pages = dirty_ratelimit * t / roundup_pow_of_two(HZ); | ||
1133 | if (pages > DIRTY_POLL_THRESH) { | ||
1134 | pages = DIRTY_POLL_THRESH; | ||
1135 | t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit; | ||
1136 | } | ||
1137 | } | ||
1138 | |||
1139 | pause = HZ * pages / (task_ratelimit + 1); | ||
1140 | if (pause > max_pause) { | ||
1141 | t = max_pause; | ||
1142 | pages = task_ratelimit * t / roundup_pow_of_two(HZ); | ||
1143 | } | ||
1144 | |||
1145 | *nr_dirtied_pause = pages; | ||
1146 | /* | ||
1147 | * The minimal pause time will normally be half the target pause time. | ||
1148 | */ | ||
1149 | return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t; | ||
1081 | } | 1150 | } |
1082 | 1151 | ||
1083 | /* | 1152 | /* |
@@ -1098,16 +1167,21 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1098 | unsigned long background_thresh; | 1167 | unsigned long background_thresh; |
1099 | unsigned long dirty_thresh; | 1168 | unsigned long dirty_thresh; |
1100 | unsigned long bdi_thresh; | 1169 | unsigned long bdi_thresh; |
1101 | long pause = 0; | 1170 | long period; |
1102 | long uninitialized_var(max_pause); | 1171 | long pause; |
1172 | long max_pause; | ||
1173 | long min_pause; | ||
1174 | int nr_dirtied_pause; | ||
1103 | bool dirty_exceeded = false; | 1175 | bool dirty_exceeded = false; |
1104 | unsigned long task_ratelimit; | 1176 | unsigned long task_ratelimit; |
1105 | unsigned long uninitialized_var(dirty_ratelimit); | 1177 | unsigned long dirty_ratelimit; |
1106 | unsigned long pos_ratio; | 1178 | unsigned long pos_ratio; |
1107 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 1179 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
1108 | unsigned long start_time = jiffies; | 1180 | unsigned long start_time = jiffies; |
1109 | 1181 | ||
1110 | for (;;) { | 1182 | for (;;) { |
1183 | unsigned long now = jiffies; | ||
1184 | |||
1111 | /* | 1185 | /* |
1112 | * Unstable writes are a feature of certain networked | 1186 | * Unstable writes are a feature of certain networked |
1113 | * filesystems (i.e. NFS) in which data may have been | 1187 | * filesystems (i.e. NFS) in which data may have been |
@@ -1127,8 +1201,13 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1127 | */ | 1201 | */ |
1128 | freerun = dirty_freerun_ceiling(dirty_thresh, | 1202 | freerun = dirty_freerun_ceiling(dirty_thresh, |
1129 | background_thresh); | 1203 | background_thresh); |
1130 | if (nr_dirty <= freerun) | 1204 | if (nr_dirty <= freerun) { |
1205 | current->dirty_paused_when = now; | ||
1206 | current->nr_dirtied = 0; | ||
1207 | current->nr_dirtied_pause = | ||
1208 | dirty_poll_interval(nr_dirty, dirty_thresh); | ||
1131 | break; | 1209 | break; |
1210 | } | ||
1132 | 1211 | ||
1133 | if (unlikely(!writeback_in_progress(bdi))) | 1212 | if (unlikely(!writeback_in_progress(bdi))) |
1134 | bdi_start_background_writeback(bdi); | 1213 | bdi_start_background_writeback(bdi); |
@@ -1168,7 +1247,7 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1168 | bdi_stat(bdi, BDI_WRITEBACK); | 1247 | bdi_stat(bdi, BDI_WRITEBACK); |
1169 | } | 1248 | } |
1170 | 1249 | ||
1171 | dirty_exceeded = (bdi_dirty > bdi_thresh) || | 1250 | dirty_exceeded = (bdi_dirty > bdi_thresh) && |
1172 | (nr_dirty > dirty_thresh); | 1251 | (nr_dirty > dirty_thresh); |
1173 | if (dirty_exceeded && !bdi->dirty_exceeded) | 1252 | if (dirty_exceeded && !bdi->dirty_exceeded) |
1174 | bdi->dirty_exceeded = 1; | 1253 | bdi->dirty_exceeded = 1; |
@@ -1177,20 +1256,34 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1177 | nr_dirty, bdi_thresh, bdi_dirty, | 1256 | nr_dirty, bdi_thresh, bdi_dirty, |
1178 | start_time); | 1257 | start_time); |
1179 | 1258 | ||
1180 | max_pause = bdi_max_pause(bdi, bdi_dirty); | ||
1181 | |||
1182 | dirty_ratelimit = bdi->dirty_ratelimit; | 1259 | dirty_ratelimit = bdi->dirty_ratelimit; |
1183 | pos_ratio = bdi_position_ratio(bdi, dirty_thresh, | 1260 | pos_ratio = bdi_position_ratio(bdi, dirty_thresh, |
1184 | background_thresh, nr_dirty, | 1261 | background_thresh, nr_dirty, |
1185 | bdi_thresh, bdi_dirty); | 1262 | bdi_thresh, bdi_dirty); |
1186 | task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >> | 1263 | task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >> |
1187 | RATELIMIT_CALC_SHIFT; | 1264 | RATELIMIT_CALC_SHIFT; |
1265 | max_pause = bdi_max_pause(bdi, bdi_dirty); | ||
1266 | min_pause = bdi_min_pause(bdi, max_pause, | ||
1267 | task_ratelimit, dirty_ratelimit, | ||
1268 | &nr_dirtied_pause); | ||
1269 | |||
1188 | if (unlikely(task_ratelimit == 0)) { | 1270 | if (unlikely(task_ratelimit == 0)) { |
1271 | period = max_pause; | ||
1189 | pause = max_pause; | 1272 | pause = max_pause; |
1190 | goto pause; | 1273 | goto pause; |
1191 | } | 1274 | } |
1192 | pause = HZ * pages_dirtied / task_ratelimit; | 1275 | period = HZ * pages_dirtied / task_ratelimit; |
1193 | if (unlikely(pause <= 0)) { | 1276 | pause = period; |
1277 | if (current->dirty_paused_when) | ||
1278 | pause -= now - current->dirty_paused_when; | ||
1279 | /* | ||
1280 | * For less than 1s think time (ext3/4 may block the dirtier | ||
1281 | * for up to 800ms from time to time on 1-HDD; so does xfs, | ||
1282 | * however at much less frequency), try to compensate it in | ||
1283 | * future periods by updating the virtual time; otherwise just | ||
1284 | * do a reset, as it may be a light dirtier. | ||
1285 | */ | ||
1286 | if (pause < min_pause) { | ||
1194 | trace_balance_dirty_pages(bdi, | 1287 | trace_balance_dirty_pages(bdi, |
1195 | dirty_thresh, | 1288 | dirty_thresh, |
1196 | background_thresh, | 1289 | background_thresh, |
@@ -1200,12 +1293,24 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1200 | dirty_ratelimit, | 1293 | dirty_ratelimit, |
1201 | task_ratelimit, | 1294 | task_ratelimit, |
1202 | pages_dirtied, | 1295 | pages_dirtied, |
1203 | pause, | 1296 | period, |
1297 | min(pause, 0L), | ||
1204 | start_time); | 1298 | start_time); |
1205 | pause = 1; /* avoid resetting nr_dirtied_pause below */ | 1299 | if (pause < -HZ) { |
1300 | current->dirty_paused_when = now; | ||
1301 | current->nr_dirtied = 0; | ||
1302 | } else if (period) { | ||
1303 | current->dirty_paused_when += period; | ||
1304 | current->nr_dirtied = 0; | ||
1305 | } else if (current->nr_dirtied_pause <= pages_dirtied) | ||
1306 | current->nr_dirtied_pause += pages_dirtied; | ||
1206 | break; | 1307 | break; |
1207 | } | 1308 | } |
1208 | pause = min(pause, max_pause); | 1309 | if (unlikely(pause > max_pause)) { |
1310 | /* for occasional dropped task_ratelimit */ | ||
1311 | now += min(pause - max_pause, max_pause); | ||
1312 | pause = max_pause; | ||
1313 | } | ||
1209 | 1314 | ||
1210 | pause: | 1315 | pause: |
1211 | trace_balance_dirty_pages(bdi, | 1316 | trace_balance_dirty_pages(bdi, |
@@ -1217,11 +1322,16 @@ pause: | |||
1217 | dirty_ratelimit, | 1322 | dirty_ratelimit, |
1218 | task_ratelimit, | 1323 | task_ratelimit, |
1219 | pages_dirtied, | 1324 | pages_dirtied, |
1325 | period, | ||
1220 | pause, | 1326 | pause, |
1221 | start_time); | 1327 | start_time); |
1222 | __set_current_state(TASK_KILLABLE); | 1328 | __set_current_state(TASK_KILLABLE); |
1223 | io_schedule_timeout(pause); | 1329 | io_schedule_timeout(pause); |
1224 | 1330 | ||
1331 | current->dirty_paused_when = now + pause; | ||
1332 | current->nr_dirtied = 0; | ||
1333 | current->nr_dirtied_pause = nr_dirtied_pause; | ||
1334 | |||
1225 | /* | 1335 | /* |
1226 | * This is typically equal to (nr_dirty < dirty_thresh) and can | 1336 | * This is typically equal to (nr_dirty < dirty_thresh) and can |
1227 | * also keep "1000+ dd on a slow USB stick" under control. | 1337 | * also keep "1000+ dd on a slow USB stick" under control. |
@@ -1249,23 +1359,6 @@ pause: | |||
1249 | if (!dirty_exceeded && bdi->dirty_exceeded) | 1359 | if (!dirty_exceeded && bdi->dirty_exceeded) |
1250 | bdi->dirty_exceeded = 0; | 1360 | bdi->dirty_exceeded = 0; |
1251 | 1361 | ||
1252 | current->nr_dirtied = 0; | ||
1253 | if (pause == 0) { /* in freerun area */ | ||
1254 | current->nr_dirtied_pause = | ||
1255 | dirty_poll_interval(nr_dirty, dirty_thresh); | ||
1256 | } else if (pause <= max_pause / 4 && | ||
1257 | pages_dirtied >= current->nr_dirtied_pause) { | ||
1258 | current->nr_dirtied_pause = clamp_val( | ||
1259 | dirty_ratelimit * (max_pause / 2) / HZ, | ||
1260 | pages_dirtied + pages_dirtied / 8, | ||
1261 | pages_dirtied * 4); | ||
1262 | } else if (pause >= max_pause) { | ||
1263 | current->nr_dirtied_pause = 1 | clamp_val( | ||
1264 | dirty_ratelimit * (max_pause / 2) / HZ, | ||
1265 | pages_dirtied / 4, | ||
1266 | pages_dirtied - pages_dirtied / 8); | ||
1267 | } | ||
1268 | |||
1269 | if (writeback_in_progress(bdi)) | 1362 | if (writeback_in_progress(bdi)) |
1270 | return; | 1363 | return; |
1271 | 1364 | ||
@@ -1296,6 +1389,22 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite) | |||
1296 | 1389 | ||
1297 | static DEFINE_PER_CPU(int, bdp_ratelimits); | 1390 | static DEFINE_PER_CPU(int, bdp_ratelimits); |
1298 | 1391 | ||
1392 | /* | ||
1393 | * Normal tasks are throttled by | ||
1394 | * loop { | ||
1395 | * dirty tsk->nr_dirtied_pause pages; | ||
1396 | * take a snap in balance_dirty_pages(); | ||
1397 | * } | ||
1398 | * However there is a worst case. If every task exit immediately when dirtied | ||
1399 | * (tsk->nr_dirtied_pause - 1) pages, balance_dirty_pages() will never be | ||
1400 | * called to throttle the page dirties. The solution is to save the not yet | ||
1401 | * throttled page dirties in dirty_throttle_leaks on task exit and charge them | ||
1402 | * randomly into the running tasks. This works well for the above worst case, | ||
1403 | * as the new task will pick up and accumulate the old task's leaked dirty | ||
1404 | * count and eventually get throttled. | ||
1405 | */ | ||
1406 | DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0; | ||
1407 | |||
1299 | /** | 1408 | /** |
1300 | * balance_dirty_pages_ratelimited_nr - balance dirty memory state | 1409 | * balance_dirty_pages_ratelimited_nr - balance dirty memory state |
1301 | * @mapping: address_space which was dirtied | 1410 | * @mapping: address_space which was dirtied |
@@ -1324,8 +1433,6 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, | |||
1324 | if (bdi->dirty_exceeded) | 1433 | if (bdi->dirty_exceeded) |
1325 | ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10)); | 1434 | ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10)); |
1326 | 1435 | ||
1327 | current->nr_dirtied += nr_pages_dirtied; | ||
1328 | |||
1329 | preempt_disable(); | 1436 | preempt_disable(); |
1330 | /* | 1437 | /* |
1331 | * This prevents one CPU to accumulate too many dirtied pages without | 1438 | * This prevents one CPU to accumulate too many dirtied pages without |
@@ -1336,12 +1443,20 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, | |||
1336 | p = &__get_cpu_var(bdp_ratelimits); | 1443 | p = &__get_cpu_var(bdp_ratelimits); |
1337 | if (unlikely(current->nr_dirtied >= ratelimit)) | 1444 | if (unlikely(current->nr_dirtied >= ratelimit)) |
1338 | *p = 0; | 1445 | *p = 0; |
1339 | else { | 1446 | else if (unlikely(*p >= ratelimit_pages)) { |
1340 | *p += nr_pages_dirtied; | 1447 | *p = 0; |
1341 | if (unlikely(*p >= ratelimit_pages)) { | 1448 | ratelimit = 0; |
1342 | *p = 0; | 1449 | } |
1343 | ratelimit = 0; | 1450 | /* |
1344 | } | 1451 | * Pick up the dirtied pages by the exited tasks. This avoids lots of |
1452 | * short-lived tasks (eg. gcc invocations in a kernel build) escaping | ||
1453 | * the dirty throttling and livelock other long-run dirtiers. | ||
1454 | */ | ||
1455 | p = &__get_cpu_var(dirty_throttle_leaks); | ||
1456 | if (*p > 0 && current->nr_dirtied < ratelimit) { | ||
1457 | nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied); | ||
1458 | *p -= nr_pages_dirtied; | ||
1459 | current->nr_dirtied += nr_pages_dirtied; | ||
1345 | } | 1460 | } |
1346 | preempt_enable(); | 1461 | preempt_enable(); |
1347 | 1462 | ||
@@ -1823,6 +1938,8 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) | |||
1823 | __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); | 1938 | __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); |
1824 | __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED); | 1939 | __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED); |
1825 | task_io_account_write(PAGE_CACHE_SIZE); | 1940 | task_io_account_write(PAGE_CACHE_SIZE); |
1941 | current->nr_dirtied++; | ||
1942 | this_cpu_inc(bdp_ratelimits); | ||
1826 | } | 1943 | } |
1827 | } | 1944 | } |
1828 | EXPORT_SYMBOL(account_page_dirtied); | 1945 | EXPORT_SYMBOL(account_page_dirtied); |
@@ -1883,6 +2000,24 @@ int __set_page_dirty_nobuffers(struct page *page) | |||
1883 | EXPORT_SYMBOL(__set_page_dirty_nobuffers); | 2000 | EXPORT_SYMBOL(__set_page_dirty_nobuffers); |
1884 | 2001 | ||
1885 | /* | 2002 | /* |
2003 | * Call this whenever redirtying a page, to de-account the dirty counters | ||
2004 | * (NR_DIRTIED, BDI_DIRTIED, tsk->nr_dirtied), so that they match the written | ||
2005 | * counters (NR_WRITTEN, BDI_WRITTEN) in long term. The mismatches will lead to | ||
2006 | * systematic errors in balanced_dirty_ratelimit and the dirty pages position | ||
2007 | * control. | ||
2008 | */ | ||
2009 | void account_page_redirty(struct page *page) | ||
2010 | { | ||
2011 | struct address_space *mapping = page->mapping; | ||
2012 | if (mapping && mapping_cap_account_dirty(mapping)) { | ||
2013 | current->nr_dirtied--; | ||
2014 | dec_zone_page_state(page, NR_DIRTIED); | ||
2015 | dec_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED); | ||
2016 | } | ||
2017 | } | ||
2018 | EXPORT_SYMBOL(account_page_redirty); | ||
2019 | |||
2020 | /* | ||
1886 | * When a writepage implementation decides that it doesn't want to write this | 2021 | * When a writepage implementation decides that it doesn't want to write this |
1887 | * page for some reason, it should redirty the locked page via | 2022 | * page for some reason, it should redirty the locked page via |
1888 | * redirty_page_for_writepage() and it should then unlock the page and return 0 | 2023 | * redirty_page_for_writepage() and it should then unlock the page and return 0 |
@@ -1890,6 +2025,7 @@ EXPORT_SYMBOL(__set_page_dirty_nobuffers); | |||
1890 | int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) | 2025 | int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) |
1891 | { | 2026 | { |
1892 | wbc->pages_skipped++; | 2027 | wbc->pages_skipped++; |
2028 | account_page_redirty(page); | ||
1893 | return __set_page_dirty_nobuffers(page); | 2029 | return __set_page_dirty_nobuffers(page); |
1894 | } | 2030 | } |
1895 | EXPORT_SYMBOL(redirty_page_for_writepage); | 2031 | EXPORT_SYMBOL(redirty_page_for_writepage); |