aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h7
-rw-r--r--kernel/fork.c3
-rw-r--r--mm/page-writeback.c89
3 files changed, 60 insertions, 39 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 41d0237fd449..a4a5582dc618 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1525,6 +1525,13 @@ struct task_struct {
1525 int make_it_fail; 1525 int make_it_fail;
1526#endif 1526#endif
1527 struct prop_local_single dirties; 1527 struct prop_local_single dirties;
1528 /*
1529 * when (nr_dirtied >= nr_dirtied_pause), it's time to call
1530 * balance_dirty_pages() for some dirty throttling pause
1531 */
1532 int nr_dirtied;
1533 int nr_dirtied_pause;
1534
1528#ifdef CONFIG_LATENCYTOP 1535#ifdef CONFIG_LATENCYTOP
1529 int latency_record_count; 1536 int latency_record_count;
1530 struct latency_record latency_record[LT_SAVECOUNT]; 1537 struct latency_record latency_record[LT_SAVECOUNT];
diff --git a/kernel/fork.c b/kernel/fork.c
index 8e6b6f4fb272..cc0815df99f2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1302,6 +1302,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1302 p->pdeath_signal = 0; 1302 p->pdeath_signal = 0;
1303 p->exit_state = 0; 1303 p->exit_state = 0;
1304 1304
1305 p->nr_dirtied = 0;
1306 p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
1307
1305 /* 1308 /*
1306 * Ok, make it visible to the rest of the system. 1309 * Ok, make it visible to the rest of the system.
1307 * We dont wake it up yet. 1310 * We dont wake it up yet.
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d4a6e91bd9e5..daff320d263f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -54,20 +54,6 @@
54 */ 54 */
55static long ratelimit_pages = 32; 55static long ratelimit_pages = 32;
56 56
57/*
58 * When balance_dirty_pages decides that the caller needs to perform some
59 * non-background writeback, this is how many pages it will attempt to write.
60 * It should be somewhat larger than dirtied pages to ensure that reasonably
61 * large amounts of I/O are submitted.
62 */
63static inline long sync_writeback_pages(unsigned long dirtied)
64{
65 if (dirtied < ratelimit_pages)
66 dirtied = ratelimit_pages;
67
68 return dirtied + dirtied / 2;
69}
70
71/* The following parameters are exported via /proc/sys/vm */ 57/* The following parameters are exported via /proc/sys/vm */
72 58
73/* 59/*
@@ -169,6 +155,8 @@ static void update_completion_period(void)
169 int shift = calc_period_shift(); 155 int shift = calc_period_shift();
170 prop_change_shift(&vm_completions, shift); 156 prop_change_shift(&vm_completions, shift);
171 prop_change_shift(&vm_dirties, shift); 157 prop_change_shift(&vm_dirties, shift);
158
159 writeback_set_ratelimit();
172} 160}
173 161
174int dirty_background_ratio_handler(struct ctl_table *table, int write, 162int dirty_background_ratio_handler(struct ctl_table *table, int write,
@@ -979,6 +967,23 @@ static void bdi_update_bandwidth(struct backing_dev_info *bdi,
979} 967}
980 968
981/* 969/*
970 * After a task dirtied this many pages, balance_dirty_pages_ratelimited_nr()
971 * will look to see if it needs to start dirty throttling.
972 *
973 * If dirty_poll_interval is too low, big NUMA machines will call the expensive
974 * global_page_state() too often. So scale it near-sqrt to the safety margin
975 * (the number of pages we may dirty without exceeding the dirty limits).
976 */
977static unsigned long dirty_poll_interval(unsigned long dirty,
978 unsigned long thresh)
979{
980 if (thresh > dirty)
981 return 1UL << (ilog2(thresh - dirty) >> 1);
982
983 return 1;
984}
985
986/*
982 * balance_dirty_pages() must be called by processes which are generating dirty 987 * balance_dirty_pages() must be called by processes which are generating dirty
983 * data. It looks at the number of dirty pages in the machine and will force 988 * data. It looks at the number of dirty pages in the machine and will force
984 * the caller to perform writeback if the system is over `vm_dirty_ratio'. 989 * the caller to perform writeback if the system is over `vm_dirty_ratio'.
@@ -1112,6 +1117,9 @@ static void balance_dirty_pages(struct address_space *mapping,
1112 if (clear_dirty_exceeded && bdi->dirty_exceeded) 1117 if (clear_dirty_exceeded && bdi->dirty_exceeded)
1113 bdi->dirty_exceeded = 0; 1118 bdi->dirty_exceeded = 0;
1114 1119
1120 current->nr_dirtied = 0;
1121 current->nr_dirtied_pause = dirty_poll_interval(nr_dirty, dirty_thresh);
1122
1115 if (writeback_in_progress(bdi)) 1123 if (writeback_in_progress(bdi))
1116 return; 1124 return;
1117 1125
@@ -1138,7 +1146,7 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
1138 } 1146 }
1139} 1147}
1140 1148
1141static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0; 1149static DEFINE_PER_CPU(int, bdp_ratelimits);
1142 1150
1143/** 1151/**
1144 * balance_dirty_pages_ratelimited_nr - balance dirty memory state 1152 * balance_dirty_pages_ratelimited_nr - balance dirty memory state
@@ -1158,31 +1166,39 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
1158 unsigned long nr_pages_dirtied) 1166 unsigned long nr_pages_dirtied)
1159{ 1167{
1160 struct backing_dev_info *bdi = mapping->backing_dev_info; 1168 struct backing_dev_info *bdi = mapping->backing_dev_info;
1161 unsigned long ratelimit; 1169 int ratelimit;
1162 unsigned long *p; 1170 int *p;
1163 1171
1164 if (!bdi_cap_account_dirty(bdi)) 1172 if (!bdi_cap_account_dirty(bdi))
1165 return; 1173 return;
1166 1174
1167 ratelimit = ratelimit_pages; 1175 ratelimit = current->nr_dirtied_pause;
1168 if (mapping->backing_dev_info->dirty_exceeded) 1176 if (bdi->dirty_exceeded)
1169 ratelimit = 8; 1177 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1178
1179 current->nr_dirtied += nr_pages_dirtied;
1170 1180
1181 preempt_disable();
1171 /* 1182 /*
1172 * Check the rate limiting. Also, we do not want to throttle real-time 1183 * This prevents one CPU to accumulate too many dirtied pages without
1173 * tasks in balance_dirty_pages(). Period. 1184 * calling into balance_dirty_pages(), which can happen when there are
1185 * 1000+ tasks, all of them start dirtying pages at exactly the same
1186 * time, hence all honoured too large initial task->nr_dirtied_pause.
1174 */ 1187 */
1175 preempt_disable();
1176 p = &__get_cpu_var(bdp_ratelimits); 1188 p = &__get_cpu_var(bdp_ratelimits);
1177 *p += nr_pages_dirtied; 1189 if (unlikely(current->nr_dirtied >= ratelimit))
1178 if (unlikely(*p >= ratelimit)) {
1179 ratelimit = sync_writeback_pages(*p);
1180 *p = 0; 1190 *p = 0;
1181 preempt_enable(); 1191 else {
1182 balance_dirty_pages(mapping, ratelimit); 1192 *p += nr_pages_dirtied;
1183 return; 1193 if (unlikely(*p >= ratelimit_pages)) {
1194 *p = 0;
1195 ratelimit = 0;
1196 }
1184 } 1197 }
1185 preempt_enable(); 1198 preempt_enable();
1199
1200 if (unlikely(current->nr_dirtied >= ratelimit))
1201 balance_dirty_pages(mapping, current->nr_dirtied);
1186} 1202}
1187EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr); 1203EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
1188 1204
@@ -1277,22 +1293,17 @@ void laptop_sync_completion(void)
1277 * 1293 *
1278 * Here we set ratelimit_pages to a level which ensures that when all CPUs are 1294 * Here we set ratelimit_pages to a level which ensures that when all CPUs are
1279 * dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory 1295 * dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory
1280 * thresholds before writeback cuts in. 1296 * thresholds.
1281 *
1282 * But the limit should not be set too high. Because it also controls the
1283 * amount of memory which the balance_dirty_pages() caller has to write back.
1284 * If this is too large then the caller will block on the IO queue all the
1285 * time. So limit it to four megabytes - the balance_dirty_pages() caller
1286 * will write six megabyte chunks, max.
1287 */ 1297 */
1288 1298
1289void writeback_set_ratelimit(void) 1299void writeback_set_ratelimit(void)
1290{ 1300{
1291 ratelimit_pages = vm_total_pages / (num_online_cpus() * 32); 1301 unsigned long background_thresh;
1302 unsigned long dirty_thresh;
1303 global_dirty_limits(&background_thresh, &dirty_thresh);
1304 ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
1292 if (ratelimit_pages < 16) 1305 if (ratelimit_pages < 16)
1293 ratelimit_pages = 16; 1306 ratelimit_pages = 16;
1294 if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024)
1295 ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE;
1296} 1307}
1297 1308
1298static int __cpuinit 1309static int __cpuinit