diff options
-rw-r--r-- | include/linux/sched.h | 7 | ||||
-rw-r--r-- | kernel/fork.c | 3 | ||||
-rw-r--r-- | mm/page-writeback.c | 89 |
3 files changed, 60 insertions, 39 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 41d0237fd449..a4a5582dc618 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1525,6 +1525,13 @@ struct task_struct { | |||
1525 | int make_it_fail; | 1525 | int make_it_fail; |
1526 | #endif | 1526 | #endif |
1527 | struct prop_local_single dirties; | 1527 | struct prop_local_single dirties; |
1528 | /* | ||
1529 | * when (nr_dirtied >= nr_dirtied_pause), it's time to call | ||
1530 | * balance_dirty_pages() for some dirty throttling pause | ||
1531 | */ | ||
1532 | int nr_dirtied; | ||
1533 | int nr_dirtied_pause; | ||
1534 | |||
1528 | #ifdef CONFIG_LATENCYTOP | 1535 | #ifdef CONFIG_LATENCYTOP |
1529 | int latency_record_count; | 1536 | int latency_record_count; |
1530 | struct latency_record latency_record[LT_SAVECOUNT]; | 1537 | struct latency_record latency_record[LT_SAVECOUNT]; |
diff --git a/kernel/fork.c b/kernel/fork.c index 8e6b6f4fb272..cc0815df99f2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1302,6 +1302,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1302 | p->pdeath_signal = 0; | 1302 | p->pdeath_signal = 0; |
1303 | p->exit_state = 0; | 1303 | p->exit_state = 0; |
1304 | 1304 | ||
1305 | p->nr_dirtied = 0; | ||
1306 | p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); | ||
1307 | |||
1305 | /* | 1308 | /* |
1306 | * Ok, make it visible to the rest of the system. | 1309 | * Ok, make it visible to the rest of the system. |
1307 | * We dont wake it up yet. | 1310 | * We dont wake it up yet. |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d4a6e91bd9e5..daff320d263f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -54,20 +54,6 @@ | |||
54 | */ | 54 | */ |
55 | static long ratelimit_pages = 32; | 55 | static long ratelimit_pages = 32; |
56 | 56 | ||
57 | /* | ||
58 | * When balance_dirty_pages decides that the caller needs to perform some | ||
59 | * non-background writeback, this is how many pages it will attempt to write. | ||
60 | * It should be somewhat larger than dirtied pages to ensure that reasonably | ||
61 | * large amounts of I/O are submitted. | ||
62 | */ | ||
63 | static inline long sync_writeback_pages(unsigned long dirtied) | ||
64 | { | ||
65 | if (dirtied < ratelimit_pages) | ||
66 | dirtied = ratelimit_pages; | ||
67 | |||
68 | return dirtied + dirtied / 2; | ||
69 | } | ||
70 | |||
71 | /* The following parameters are exported via /proc/sys/vm */ | 57 | /* The following parameters are exported via /proc/sys/vm */ |
72 | 58 | ||
73 | /* | 59 | /* |
@@ -169,6 +155,8 @@ static void update_completion_period(void) | |||
169 | int shift = calc_period_shift(); | 155 | int shift = calc_period_shift(); |
170 | prop_change_shift(&vm_completions, shift); | 156 | prop_change_shift(&vm_completions, shift); |
171 | prop_change_shift(&vm_dirties, shift); | 157 | prop_change_shift(&vm_dirties, shift); |
158 | |||
159 | writeback_set_ratelimit(); | ||
172 | } | 160 | } |
173 | 161 | ||
174 | int dirty_background_ratio_handler(struct ctl_table *table, int write, | 162 | int dirty_background_ratio_handler(struct ctl_table *table, int write, |
@@ -979,6 +967,23 @@ static void bdi_update_bandwidth(struct backing_dev_info *bdi, | |||
979 | } | 967 | } |
980 | 968 | ||
981 | /* | 969 | /* |
970 | * After a task dirtied this many pages, balance_dirty_pages_ratelimited_nr() | ||
971 | * will look to see if it needs to start dirty throttling. | ||
972 | * | ||
973 | * If dirty_poll_interval is too low, big NUMA machines will call the expensive | ||
974 | * global_page_state() too often. So scale it near-sqrt to the safety margin | ||
975 | * (the number of pages we may dirty without exceeding the dirty limits). | ||
976 | */ | ||
977 | static unsigned long dirty_poll_interval(unsigned long dirty, | ||
978 | unsigned long thresh) | ||
979 | { | ||
980 | if (thresh > dirty) | ||
981 | return 1UL << (ilog2(thresh - dirty) >> 1); | ||
982 | |||
983 | return 1; | ||
984 | } | ||
985 | |||
986 | /* | ||
982 | * balance_dirty_pages() must be called by processes which are generating dirty | 987 | * balance_dirty_pages() must be called by processes which are generating dirty |
983 | * data. It looks at the number of dirty pages in the machine and will force | 988 | * data. It looks at the number of dirty pages in the machine and will force |
984 | * the caller to perform writeback if the system is over `vm_dirty_ratio'. | 989 | * the caller to perform writeback if the system is over `vm_dirty_ratio'. |
@@ -1112,6 +1117,9 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
1112 | if (clear_dirty_exceeded && bdi->dirty_exceeded) | 1117 | if (clear_dirty_exceeded && bdi->dirty_exceeded) |
1113 | bdi->dirty_exceeded = 0; | 1118 | bdi->dirty_exceeded = 0; |
1114 | 1119 | ||
1120 | current->nr_dirtied = 0; | ||
1121 | current->nr_dirtied_pause = dirty_poll_interval(nr_dirty, dirty_thresh); | ||
1122 | |||
1115 | if (writeback_in_progress(bdi)) | 1123 | if (writeback_in_progress(bdi)) |
1116 | return; | 1124 | return; |
1117 | 1125 | ||
@@ -1138,7 +1146,7 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite) | |||
1138 | } | 1146 | } |
1139 | } | 1147 | } |
1140 | 1148 | ||
1141 | static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0; | 1149 | static DEFINE_PER_CPU(int, bdp_ratelimits); |
1142 | 1150 | ||
1143 | /** | 1151 | /** |
1144 | * balance_dirty_pages_ratelimited_nr - balance dirty memory state | 1152 | * balance_dirty_pages_ratelimited_nr - balance dirty memory state |
@@ -1158,31 +1166,39 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, | |||
1158 | unsigned long nr_pages_dirtied) | 1166 | unsigned long nr_pages_dirtied) |
1159 | { | 1167 | { |
1160 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 1168 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
1161 | unsigned long ratelimit; | 1169 | int ratelimit; |
1162 | unsigned long *p; | 1170 | int *p; |
1163 | 1171 | ||
1164 | if (!bdi_cap_account_dirty(bdi)) | 1172 | if (!bdi_cap_account_dirty(bdi)) |
1165 | return; | 1173 | return; |
1166 | 1174 | ||
1167 | ratelimit = ratelimit_pages; | 1175 | ratelimit = current->nr_dirtied_pause; |
1168 | if (mapping->backing_dev_info->dirty_exceeded) | 1176 | if (bdi->dirty_exceeded) |
1169 | ratelimit = 8; | 1177 | ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10)); |
1178 | |||
1179 | current->nr_dirtied += nr_pages_dirtied; | ||
1170 | 1180 | ||
1181 | preempt_disable(); | ||
1171 | /* | 1182 | /* |
1172 | * Check the rate limiting. Also, we do not want to throttle real-time | 1183 | * This prevents one CPU to accumulate too many dirtied pages without |
1173 | * tasks in balance_dirty_pages(). Period. | 1184 | * calling into balance_dirty_pages(), which can happen when there are |
1185 | * 1000+ tasks, all of them start dirtying pages at exactly the same | ||
1186 | * time, hence all honoured too large initial task->nr_dirtied_pause. | ||
1174 | */ | 1187 | */ |
1175 | preempt_disable(); | ||
1176 | p = &__get_cpu_var(bdp_ratelimits); | 1188 | p = &__get_cpu_var(bdp_ratelimits); |
1177 | *p += nr_pages_dirtied; | 1189 | if (unlikely(current->nr_dirtied >= ratelimit)) |
1178 | if (unlikely(*p >= ratelimit)) { | ||
1179 | ratelimit = sync_writeback_pages(*p); | ||
1180 | *p = 0; | 1190 | *p = 0; |
1181 | preempt_enable(); | 1191 | else { |
1182 | balance_dirty_pages(mapping, ratelimit); | 1192 | *p += nr_pages_dirtied; |
1183 | return; | 1193 | if (unlikely(*p >= ratelimit_pages)) { |
1194 | *p = 0; | ||
1195 | ratelimit = 0; | ||
1196 | } | ||
1184 | } | 1197 | } |
1185 | preempt_enable(); | 1198 | preempt_enable(); |
1199 | |||
1200 | if (unlikely(current->nr_dirtied >= ratelimit)) | ||
1201 | balance_dirty_pages(mapping, current->nr_dirtied); | ||
1186 | } | 1202 | } |
1187 | EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr); | 1203 | EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr); |
1188 | 1204 | ||
@@ -1277,22 +1293,17 @@ void laptop_sync_completion(void) | |||
1277 | * | 1293 | * |
1278 | * Here we set ratelimit_pages to a level which ensures that when all CPUs are | 1294 | * Here we set ratelimit_pages to a level which ensures that when all CPUs are |
1279 | * dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory | 1295 | * dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory |
1280 | * thresholds before writeback cuts in. | 1296 | * thresholds. |
1281 | * | ||
1282 | * But the limit should not be set too high. Because it also controls the | ||
1283 | * amount of memory which the balance_dirty_pages() caller has to write back. | ||
1284 | * If this is too large then the caller will block on the IO queue all the | ||
1285 | * time. So limit it to four megabytes - the balance_dirty_pages() caller | ||
1286 | * will write six megabyte chunks, max. | ||
1287 | */ | 1297 | */ |
1288 | 1298 | ||
1289 | void writeback_set_ratelimit(void) | 1299 | void writeback_set_ratelimit(void) |
1290 | { | 1300 | { |
1291 | ratelimit_pages = vm_total_pages / (num_online_cpus() * 32); | 1301 | unsigned long background_thresh; |
1302 | unsigned long dirty_thresh; | ||
1303 | global_dirty_limits(&background_thresh, &dirty_thresh); | ||
1304 | ratelimit_pages = dirty_thresh / (num_online_cpus() * 32); | ||
1292 | if (ratelimit_pages < 16) | 1305 | if (ratelimit_pages < 16) |
1293 | ratelimit_pages = 16; | 1306 | ratelimit_pages = 16; |
1294 | if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024) | ||
1295 | ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE; | ||
1296 | } | 1307 | } |
1297 | 1308 | ||
1298 | static int __cpuinit | 1309 | static int __cpuinit |