diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 341 | ||||
-rw-r--r-- | mm/page-writeback.c | 179 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
3 files changed, 352 insertions, 170 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 6f163e0f0509..7f3fa79f25c0 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -1,8 +1,11 @@ | |||
1 | 1 | ||
2 | #include <linux/wait.h> | 2 | #include <linux/wait.h> |
3 | #include <linux/backing-dev.h> | 3 | #include <linux/backing-dev.h> |
4 | #include <linux/kthread.h> | ||
5 | #include <linux/freezer.h> | ||
4 | #include <linux/fs.h> | 6 | #include <linux/fs.h> |
5 | #include <linux/pagemap.h> | 7 | #include <linux/pagemap.h> |
8 | #include <linux/mm.h> | ||
6 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
7 | #include <linux/module.h> | 10 | #include <linux/module.h> |
8 | #include <linux/writeback.h> | 11 | #include <linux/writeback.h> |
@@ -22,8 +25,18 @@ struct backing_dev_info default_backing_dev_info = { | |||
22 | EXPORT_SYMBOL_GPL(default_backing_dev_info); | 25 | EXPORT_SYMBOL_GPL(default_backing_dev_info); |
23 | 26 | ||
24 | static struct class *bdi_class; | 27 | static struct class *bdi_class; |
25 | DEFINE_MUTEX(bdi_lock); | 28 | DEFINE_SPINLOCK(bdi_lock); |
26 | LIST_HEAD(bdi_list); | 29 | LIST_HEAD(bdi_list); |
30 | LIST_HEAD(bdi_pending_list); | ||
31 | |||
32 | static struct task_struct *sync_supers_tsk; | ||
33 | static struct timer_list sync_supers_timer; | ||
34 | |||
35 | static int bdi_sync_supers(void *); | ||
36 | static void sync_supers_timer_fn(unsigned long); | ||
37 | static void arm_supers_timer(void); | ||
38 | |||
39 | static void bdi_add_default_flusher_task(struct backing_dev_info *bdi); | ||
27 | 40 | ||
28 | #ifdef CONFIG_DEBUG_FS | 41 | #ifdef CONFIG_DEBUG_FS |
29 | #include <linux/debugfs.h> | 42 | #include <linux/debugfs.h> |
@@ -187,6 +200,13 @@ static int __init default_bdi_init(void) | |||
187 | { | 200 | { |
188 | int err; | 201 | int err; |
189 | 202 | ||
203 | sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers"); | ||
204 | BUG_ON(IS_ERR(sync_supers_tsk)); | ||
205 | |||
206 | init_timer(&sync_supers_timer); | ||
207 | setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0); | ||
208 | arm_supers_timer(); | ||
209 | |||
190 | err = bdi_init(&default_backing_dev_info); | 210 | err = bdi_init(&default_backing_dev_info); |
191 | if (!err) | 211 | if (!err) |
192 | bdi_register(&default_backing_dev_info, NULL, "default"); | 212 | bdi_register(&default_backing_dev_info, NULL, "default"); |
@@ -195,6 +215,242 @@ static int __init default_bdi_init(void) | |||
195 | } | 215 | } |
196 | subsys_initcall(default_bdi_init); | 216 | subsys_initcall(default_bdi_init); |
197 | 217 | ||
218 | static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) | ||
219 | { | ||
220 | memset(wb, 0, sizeof(*wb)); | ||
221 | |||
222 | wb->bdi = bdi; | ||
223 | wb->last_old_flush = jiffies; | ||
224 | INIT_LIST_HEAD(&wb->b_dirty); | ||
225 | INIT_LIST_HEAD(&wb->b_io); | ||
226 | INIT_LIST_HEAD(&wb->b_more_io); | ||
227 | } | ||
228 | |||
229 | static void bdi_task_init(struct backing_dev_info *bdi, | ||
230 | struct bdi_writeback *wb) | ||
231 | { | ||
232 | struct task_struct *tsk = current; | ||
233 | |||
234 | spin_lock(&bdi->wb_lock); | ||
235 | list_add_tail_rcu(&wb->list, &bdi->wb_list); | ||
236 | spin_unlock(&bdi->wb_lock); | ||
237 | |||
238 | tsk->flags |= PF_FLUSHER | PF_SWAPWRITE; | ||
239 | set_freezable(); | ||
240 | |||
241 | /* | ||
242 | * Our parent may run at a different priority, just set us to normal | ||
243 | */ | ||
244 | set_user_nice(tsk, 0); | ||
245 | } | ||
246 | |||
247 | static int bdi_start_fn(void *ptr) | ||
248 | { | ||
249 | struct bdi_writeback *wb = ptr; | ||
250 | struct backing_dev_info *bdi = wb->bdi; | ||
251 | int ret; | ||
252 | |||
253 | /* | ||
254 | * Add us to the active bdi_list | ||
255 | */ | ||
256 | spin_lock(&bdi_lock); | ||
257 | list_add(&bdi->bdi_list, &bdi_list); | ||
258 | spin_unlock(&bdi_lock); | ||
259 | |||
260 | bdi_task_init(bdi, wb); | ||
261 | |||
262 | /* | ||
263 | * Clear pending bit and wakeup anybody waiting to tear us down | ||
264 | */ | ||
265 | clear_bit(BDI_pending, &bdi->state); | ||
266 | smp_mb__after_clear_bit(); | ||
267 | wake_up_bit(&bdi->state, BDI_pending); | ||
268 | |||
269 | ret = bdi_writeback_task(wb); | ||
270 | |||
271 | /* | ||
272 | * Remove us from the list | ||
273 | */ | ||
274 | spin_lock(&bdi->wb_lock); | ||
275 | list_del_rcu(&wb->list); | ||
276 | spin_unlock(&bdi->wb_lock); | ||
277 | |||
278 | /* | ||
279 | * Flush any work that raced with us exiting. No new work | ||
280 | * will be added, since this bdi isn't discoverable anymore. | ||
281 | */ | ||
282 | if (!list_empty(&bdi->work_list)) | ||
283 | wb_do_writeback(wb, 1); | ||
284 | |||
285 | wb->task = NULL; | ||
286 | return ret; | ||
287 | } | ||
288 | |||
289 | int bdi_has_dirty_io(struct backing_dev_info *bdi) | ||
290 | { | ||
291 | return wb_has_dirty_io(&bdi->wb); | ||
292 | } | ||
293 | |||
294 | static void bdi_flush_io(struct backing_dev_info *bdi) | ||
295 | { | ||
296 | struct writeback_control wbc = { | ||
297 | .bdi = bdi, | ||
298 | .sync_mode = WB_SYNC_NONE, | ||
299 | .older_than_this = NULL, | ||
300 | .range_cyclic = 1, | ||
301 | .nr_to_write = 1024, | ||
302 | }; | ||
303 | |||
304 | writeback_inodes_wbc(&wbc); | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * kupdated() used to do this. We cannot do it from the bdi_forker_task() | ||
309 | * or we risk deadlocking on ->s_umount. The longer term solution would be | ||
310 | * to implement sync_supers_bdi() or similar and simply do it from the | ||
311 | * bdi writeback tasks individually. | ||
312 | */ | ||
313 | static int bdi_sync_supers(void *unused) | ||
314 | { | ||
315 | set_user_nice(current, 0); | ||
316 | |||
317 | while (!kthread_should_stop()) { | ||
318 | set_current_state(TASK_INTERRUPTIBLE); | ||
319 | schedule(); | ||
320 | |||
321 | /* | ||
322 | * Do this periodically, like kupdated() did before. | ||
323 | */ | ||
324 | sync_supers(); | ||
325 | } | ||
326 | |||
327 | return 0; | ||
328 | } | ||
329 | |||
330 | static void arm_supers_timer(void) | ||
331 | { | ||
332 | unsigned long next; | ||
333 | |||
334 | next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies; | ||
335 | mod_timer(&sync_supers_timer, round_jiffies_up(next)); | ||
336 | } | ||
337 | |||
338 | static void sync_supers_timer_fn(unsigned long unused) | ||
339 | { | ||
340 | wake_up_process(sync_supers_tsk); | ||
341 | arm_supers_timer(); | ||
342 | } | ||
343 | |||
344 | static int bdi_forker_task(void *ptr) | ||
345 | { | ||
346 | struct bdi_writeback *me = ptr; | ||
347 | |||
348 | bdi_task_init(me->bdi, me); | ||
349 | |||
350 | for (;;) { | ||
351 | struct backing_dev_info *bdi, *tmp; | ||
352 | struct bdi_writeback *wb; | ||
353 | |||
354 | /* | ||
355 | * Temporary measure, we want to make sure we don't see | ||
356 | * dirty data on the default backing_dev_info | ||
357 | */ | ||
358 | if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) | ||
359 | wb_do_writeback(me, 0); | ||
360 | |||
361 | spin_lock(&bdi_lock); | ||
362 | |||
363 | /* | ||
364 | * Check if any existing bdi's have dirty data without | ||
365 | * a thread registered. If so, set that up. | ||
366 | */ | ||
367 | list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) { | ||
368 | if (bdi->wb.task) | ||
369 | continue; | ||
370 | if (list_empty(&bdi->work_list) && | ||
371 | !bdi_has_dirty_io(bdi)) | ||
372 | continue; | ||
373 | |||
374 | bdi_add_default_flusher_task(bdi); | ||
375 | } | ||
376 | |||
377 | set_current_state(TASK_INTERRUPTIBLE); | ||
378 | |||
379 | if (list_empty(&bdi_pending_list)) { | ||
380 | unsigned long wait; | ||
381 | |||
382 | spin_unlock(&bdi_lock); | ||
383 | wait = msecs_to_jiffies(dirty_writeback_interval * 10); | ||
384 | schedule_timeout(wait); | ||
385 | try_to_freeze(); | ||
386 | continue; | ||
387 | } | ||
388 | |||
389 | __set_current_state(TASK_RUNNING); | ||
390 | |||
391 | /* | ||
392 | * This is our real job - check for pending entries in | ||
393 | * bdi_pending_list, and create the tasks that got added | ||
394 | */ | ||
395 | bdi = list_entry(bdi_pending_list.next, struct backing_dev_info, | ||
396 | bdi_list); | ||
397 | list_del_init(&bdi->bdi_list); | ||
398 | spin_unlock(&bdi_lock); | ||
399 | |||
400 | wb = &bdi->wb; | ||
401 | wb->task = kthread_run(bdi_start_fn, wb, "flush-%s", | ||
402 | dev_name(bdi->dev)); | ||
403 | /* | ||
404 | * If task creation fails, then readd the bdi to | ||
405 | * the pending list and force writeout of the bdi | ||
406 | * from this forker thread. That will free some memory | ||
407 | * and we can try again. | ||
408 | */ | ||
409 | if (IS_ERR(wb->task)) { | ||
410 | wb->task = NULL; | ||
411 | |||
412 | /* | ||
413 | * Add this 'bdi' to the back, so we get | ||
414 | * a chance to flush other bdi's to free | ||
415 | * memory. | ||
416 | */ | ||
417 | spin_lock(&bdi_lock); | ||
418 | list_add_tail(&bdi->bdi_list, &bdi_pending_list); | ||
419 | spin_unlock(&bdi_lock); | ||
420 | |||
421 | bdi_flush_io(bdi); | ||
422 | } | ||
423 | } | ||
424 | |||
425 | return 0; | ||
426 | } | ||
427 | |||
428 | /* | ||
429 | * Add the default flusher task that gets created for any bdi | ||
430 | * that has dirty data pending writeout | ||
431 | */ | ||
432 | void static bdi_add_default_flusher_task(struct backing_dev_info *bdi) | ||
433 | { | ||
434 | if (!bdi_cap_writeback_dirty(bdi)) | ||
435 | return; | ||
436 | |||
437 | /* | ||
438 | * Check with the helper whether to proceed adding a task. Will only | ||
439 | * abort if we two or more simultanous calls to | ||
440 | * bdi_add_default_flusher_task() occured, further additions will block | ||
441 | * waiting for previous additions to finish. | ||
442 | */ | ||
443 | if (!test_and_set_bit(BDI_pending, &bdi->state)) { | ||
444 | list_move_tail(&bdi->bdi_list, &bdi_pending_list); | ||
445 | |||
446 | /* | ||
447 | * We are now on the pending list, wake up bdi_forker_task() | ||
448 | * to finish the job and add us back to the active bdi_list | ||
449 | */ | ||
450 | wake_up_process(default_backing_dev_info.wb.task); | ||
451 | } | ||
452 | } | ||
453 | |||
198 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, | 454 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, |
199 | const char *fmt, ...) | 455 | const char *fmt, ...) |
200 | { | 456 | { |
@@ -213,13 +469,34 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
213 | goto exit; | 469 | goto exit; |
214 | } | 470 | } |
215 | 471 | ||
216 | mutex_lock(&bdi_lock); | 472 | spin_lock(&bdi_lock); |
217 | list_add_tail(&bdi->bdi_list, &bdi_list); | 473 | list_add_tail(&bdi->bdi_list, &bdi_list); |
218 | mutex_unlock(&bdi_lock); | 474 | spin_unlock(&bdi_lock); |
219 | 475 | ||
220 | bdi->dev = dev; | 476 | bdi->dev = dev; |
221 | bdi_debug_register(bdi, dev_name(dev)); | ||
222 | 477 | ||
478 | /* | ||
479 | * Just start the forker thread for our default backing_dev_info, | ||
480 | * and add other bdi's to the list. They will get a thread created | ||
481 | * on-demand when they need it. | ||
482 | */ | ||
483 | if (bdi_cap_flush_forker(bdi)) { | ||
484 | struct bdi_writeback *wb = &bdi->wb; | ||
485 | |||
486 | wb->task = kthread_run(bdi_forker_task, wb, "bdi-%s", | ||
487 | dev_name(dev)); | ||
488 | if (IS_ERR(wb->task)) { | ||
489 | wb->task = NULL; | ||
490 | ret = -ENOMEM; | ||
491 | |||
492 | spin_lock(&bdi_lock); | ||
493 | list_del(&bdi->bdi_list); | ||
494 | spin_unlock(&bdi_lock); | ||
495 | goto exit; | ||
496 | } | ||
497 | } | ||
498 | |||
499 | bdi_debug_register(bdi, dev_name(dev)); | ||
223 | exit: | 500 | exit: |
224 | return ret; | 501 | return ret; |
225 | } | 502 | } |
@@ -231,17 +508,42 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev) | |||
231 | } | 508 | } |
232 | EXPORT_SYMBOL(bdi_register_dev); | 509 | EXPORT_SYMBOL(bdi_register_dev); |
233 | 510 | ||
234 | static void bdi_remove_from_list(struct backing_dev_info *bdi) | 511 | /* |
512 | * Remove bdi from the global list and shutdown any threads we have running | ||
513 | */ | ||
514 | static void bdi_wb_shutdown(struct backing_dev_info *bdi) | ||
235 | { | 515 | { |
236 | mutex_lock(&bdi_lock); | 516 | struct bdi_writeback *wb; |
517 | |||
518 | if (!bdi_cap_writeback_dirty(bdi)) | ||
519 | return; | ||
520 | |||
521 | /* | ||
522 | * If setup is pending, wait for that to complete first | ||
523 | */ | ||
524 | wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait, | ||
525 | TASK_UNINTERRUPTIBLE); | ||
526 | |||
527 | /* | ||
528 | * Make sure nobody finds us on the bdi_list anymore | ||
529 | */ | ||
530 | spin_lock(&bdi_lock); | ||
237 | list_del(&bdi->bdi_list); | 531 | list_del(&bdi->bdi_list); |
238 | mutex_unlock(&bdi_lock); | 532 | spin_unlock(&bdi_lock); |
533 | |||
534 | /* | ||
535 | * Finally, kill the kernel threads. We don't need to be RCU | ||
536 | * safe anymore, since the bdi is gone from visibility. | ||
537 | */ | ||
538 | list_for_each_entry(wb, &bdi->wb_list, list) | ||
539 | kthread_stop(wb->task); | ||
239 | } | 540 | } |
240 | 541 | ||
241 | void bdi_unregister(struct backing_dev_info *bdi) | 542 | void bdi_unregister(struct backing_dev_info *bdi) |
242 | { | 543 | { |
243 | if (bdi->dev) { | 544 | if (bdi->dev) { |
244 | bdi_remove_from_list(bdi); | 545 | if (!bdi_cap_flush_forker(bdi)) |
546 | bdi_wb_shutdown(bdi); | ||
245 | bdi_debug_unregister(bdi); | 547 | bdi_debug_unregister(bdi); |
246 | device_unregister(bdi->dev); | 548 | device_unregister(bdi->dev); |
247 | bdi->dev = NULL; | 549 | bdi->dev = NULL; |
@@ -251,18 +553,25 @@ EXPORT_SYMBOL(bdi_unregister); | |||
251 | 553 | ||
252 | int bdi_init(struct backing_dev_info *bdi) | 554 | int bdi_init(struct backing_dev_info *bdi) |
253 | { | 555 | { |
254 | int i; | 556 | int i, err; |
255 | int err; | ||
256 | 557 | ||
257 | bdi->dev = NULL; | 558 | bdi->dev = NULL; |
258 | 559 | ||
259 | bdi->min_ratio = 0; | 560 | bdi->min_ratio = 0; |
260 | bdi->max_ratio = 100; | 561 | bdi->max_ratio = 100; |
261 | bdi->max_prop_frac = PROP_FRAC_BASE; | 562 | bdi->max_prop_frac = PROP_FRAC_BASE; |
563 | spin_lock_init(&bdi->wb_lock); | ||
262 | INIT_LIST_HEAD(&bdi->bdi_list); | 564 | INIT_LIST_HEAD(&bdi->bdi_list); |
263 | INIT_LIST_HEAD(&bdi->b_io); | 565 | INIT_LIST_HEAD(&bdi->wb_list); |
264 | INIT_LIST_HEAD(&bdi->b_dirty); | 566 | INIT_LIST_HEAD(&bdi->work_list); |
265 | INIT_LIST_HEAD(&bdi->b_more_io); | 567 | |
568 | bdi_wb_init(&bdi->wb, bdi); | ||
569 | |||
570 | /* | ||
571 | * Just one thread support for now, hard code mask and count | ||
572 | */ | ||
573 | bdi->wb_mask = 1; | ||
574 | bdi->wb_cnt = 1; | ||
266 | 575 | ||
267 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { | 576 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { |
268 | err = percpu_counter_init(&bdi->bdi_stat[i], 0); | 577 | err = percpu_counter_init(&bdi->bdi_stat[i], 0); |
@@ -277,8 +586,6 @@ int bdi_init(struct backing_dev_info *bdi) | |||
277 | err: | 586 | err: |
278 | while (i--) | 587 | while (i--) |
279 | percpu_counter_destroy(&bdi->bdi_stat[i]); | 588 | percpu_counter_destroy(&bdi->bdi_stat[i]); |
280 | |||
281 | bdi_remove_from_list(bdi); | ||
282 | } | 589 | } |
283 | 590 | ||
284 | return err; | 591 | return err; |
@@ -289,9 +596,7 @@ void bdi_destroy(struct backing_dev_info *bdi) | |||
289 | { | 596 | { |
290 | int i; | 597 | int i; |
291 | 598 | ||
292 | WARN_ON(!list_empty(&bdi->b_dirty)); | 599 | WARN_ON(bdi_has_dirty_io(bdi)); |
293 | WARN_ON(!list_empty(&bdi->b_io)); | ||
294 | WARN_ON(!list_empty(&bdi->b_more_io)); | ||
295 | 600 | ||
296 | bdi_unregister(bdi); | 601 | bdi_unregister(bdi); |
297 | 602 | ||
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index f8341b6019bf..25e7770309b8 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -36,15 +36,6 @@ | |||
36 | #include <linux/pagevec.h> | 36 | #include <linux/pagevec.h> |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * The maximum number of pages to writeout in a single bdflush/kupdate | ||
40 | * operation. We do this so we don't hold I_SYNC against an inode for | ||
41 | * enormous amounts of time, which would block a userspace task which has | ||
42 | * been forced to throttle against that inode. Also, the code reevaluates | ||
43 | * the dirty each time it has written this many pages. | ||
44 | */ | ||
45 | #define MAX_WRITEBACK_PAGES 1024 | ||
46 | |||
47 | /* | ||
48 | * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited | 39 | * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited |
49 | * will look to see if it needs to force writeback or throttling. | 40 | * will look to see if it needs to force writeback or throttling. |
50 | */ | 41 | */ |
@@ -117,8 +108,6 @@ EXPORT_SYMBOL(laptop_mode); | |||
117 | /* End of sysctl-exported parameters */ | 108 | /* End of sysctl-exported parameters */ |
118 | 109 | ||
119 | 110 | ||
120 | static void background_writeout(unsigned long _min_pages); | ||
121 | |||
122 | /* | 111 | /* |
123 | * Scale the writeback cache size proportional to the relative writeout speeds. | 112 | * Scale the writeback cache size proportional to the relative writeout speeds. |
124 | * | 113 | * |
@@ -326,7 +315,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | |||
326 | { | 315 | { |
327 | int ret = 0; | 316 | int ret = 0; |
328 | 317 | ||
329 | mutex_lock(&bdi_lock); | 318 | spin_lock(&bdi_lock); |
330 | if (min_ratio > bdi->max_ratio) { | 319 | if (min_ratio > bdi->max_ratio) { |
331 | ret = -EINVAL; | 320 | ret = -EINVAL; |
332 | } else { | 321 | } else { |
@@ -338,7 +327,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | |||
338 | ret = -EINVAL; | 327 | ret = -EINVAL; |
339 | } | 328 | } |
340 | } | 329 | } |
341 | mutex_unlock(&bdi_lock); | 330 | spin_unlock(&bdi_lock); |
342 | 331 | ||
343 | return ret; | 332 | return ret; |
344 | } | 333 | } |
@@ -350,14 +339,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) | |||
350 | if (max_ratio > 100) | 339 | if (max_ratio > 100) |
351 | return -EINVAL; | 340 | return -EINVAL; |
352 | 341 | ||
353 | mutex_lock(&bdi_lock); | 342 | spin_lock(&bdi_lock); |
354 | if (bdi->min_ratio > max_ratio) { | 343 | if (bdi->min_ratio > max_ratio) { |
355 | ret = -EINVAL; | 344 | ret = -EINVAL; |
356 | } else { | 345 | } else { |
357 | bdi->max_ratio = max_ratio; | 346 | bdi->max_ratio = max_ratio; |
358 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; | 347 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; |
359 | } | 348 | } |
360 | mutex_unlock(&bdi_lock); | 349 | spin_unlock(&bdi_lock); |
361 | 350 | ||
362 | return ret; | 351 | return ret; |
363 | } | 352 | } |
@@ -543,7 +532,7 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
543 | * up. | 532 | * up. |
544 | */ | 533 | */ |
545 | if (bdi_nr_reclaimable > bdi_thresh) { | 534 | if (bdi_nr_reclaimable > bdi_thresh) { |
546 | writeback_inodes(&wbc); | 535 | writeback_inodes_wbc(&wbc); |
547 | pages_written += write_chunk - wbc.nr_to_write; | 536 | pages_written += write_chunk - wbc.nr_to_write; |
548 | get_dirty_limits(&background_thresh, &dirty_thresh, | 537 | get_dirty_limits(&background_thresh, &dirty_thresh, |
549 | &bdi_thresh, bdi); | 538 | &bdi_thresh, bdi); |
@@ -572,7 +561,7 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
572 | if (pages_written >= write_chunk) | 561 | if (pages_written >= write_chunk) |
573 | break; /* We've done our duty */ | 562 | break; /* We've done our duty */ |
574 | 563 | ||
575 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 564 | schedule_timeout(1); |
576 | } | 565 | } |
577 | 566 | ||
578 | if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && | 567 | if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && |
@@ -591,10 +580,18 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
591 | * background_thresh, to keep the amount of dirty memory low. | 580 | * background_thresh, to keep the amount of dirty memory low. |
592 | */ | 581 | */ |
593 | if ((laptop_mode && pages_written) || | 582 | if ((laptop_mode && pages_written) || |
594 | (!laptop_mode && (global_page_state(NR_FILE_DIRTY) | 583 | (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY) |
595 | + global_page_state(NR_UNSTABLE_NFS) | 584 | + global_page_state(NR_UNSTABLE_NFS)) |
596 | > background_thresh))) | 585 | > background_thresh))) { |
597 | pdflush_operation(background_writeout, 0); | 586 | struct writeback_control wbc = { |
587 | .bdi = bdi, | ||
588 | .sync_mode = WB_SYNC_NONE, | ||
589 | .nr_to_write = nr_writeback, | ||
590 | }; | ||
591 | |||
592 | |||
593 | bdi_start_writeback(&wbc); | ||
594 | } | ||
598 | } | 595 | } |
599 | 596 | ||
600 | void set_page_dirty_balance(struct page *page, int page_mkwrite) | 597 | void set_page_dirty_balance(struct page *page, int page_mkwrite) |
@@ -678,153 +675,35 @@ void throttle_vm_writeout(gfp_t gfp_mask) | |||
678 | } | 675 | } |
679 | } | 676 | } |
680 | 677 | ||
681 | /* | ||
682 | * writeback at least _min_pages, and keep writing until the amount of dirty | ||
683 | * memory is less than the background threshold, or until we're all clean. | ||
684 | */ | ||
685 | static void background_writeout(unsigned long _min_pages) | ||
686 | { | ||
687 | long min_pages = _min_pages; | ||
688 | struct writeback_control wbc = { | ||
689 | .bdi = NULL, | ||
690 | .sync_mode = WB_SYNC_NONE, | ||
691 | .older_than_this = NULL, | ||
692 | .nr_to_write = 0, | ||
693 | .nonblocking = 1, | ||
694 | .range_cyclic = 1, | ||
695 | }; | ||
696 | |||
697 | for ( ; ; ) { | ||
698 | unsigned long background_thresh; | ||
699 | unsigned long dirty_thresh; | ||
700 | |||
701 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); | ||
702 | if (global_page_state(NR_FILE_DIRTY) + | ||
703 | global_page_state(NR_UNSTABLE_NFS) < background_thresh | ||
704 | && min_pages <= 0) | ||
705 | break; | ||
706 | wbc.more_io = 0; | ||
707 | wbc.encountered_congestion = 0; | ||
708 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | ||
709 | wbc.pages_skipped = 0; | ||
710 | writeback_inodes(&wbc); | ||
711 | min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
712 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { | ||
713 | /* Wrote less than expected */ | ||
714 | if (wbc.encountered_congestion || wbc.more_io) | ||
715 | congestion_wait(BLK_RW_ASYNC, HZ/10); | ||
716 | else | ||
717 | break; | ||
718 | } | ||
719 | } | ||
720 | } | ||
721 | |||
722 | /* | ||
723 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back | ||
724 | * the whole world. Returns 0 if a pdflush thread was dispatched. Returns | ||
725 | * -1 if all pdflush threads were busy. | ||
726 | */ | ||
727 | int wakeup_pdflush(long nr_pages) | ||
728 | { | ||
729 | if (nr_pages == 0) | ||
730 | nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
731 | global_page_state(NR_UNSTABLE_NFS); | ||
732 | return pdflush_operation(background_writeout, nr_pages); | ||
733 | } | ||
734 | |||
735 | static void wb_timer_fn(unsigned long unused); | ||
736 | static void laptop_timer_fn(unsigned long unused); | 678 | static void laptop_timer_fn(unsigned long unused); |
737 | 679 | ||
738 | static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0); | ||
739 | static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); | 680 | static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); |
740 | 681 | ||
741 | /* | 682 | /* |
742 | * Periodic writeback of "old" data. | ||
743 | * | ||
744 | * Define "old": the first time one of an inode's pages is dirtied, we mark the | ||
745 | * dirtying-time in the inode's address_space. So this periodic writeback code | ||
746 | * just walks the superblock inode list, writing back any inodes which are | ||
747 | * older than a specific point in time. | ||
748 | * | ||
749 | * Try to run once per dirty_writeback_interval. But if a writeback event | ||
750 | * takes longer than a dirty_writeback_interval interval, then leave a | ||
751 | * one-second gap. | ||
752 | * | ||
753 | * older_than_this takes precedence over nr_to_write. So we'll only write back | ||
754 | * all dirty pages if they are all attached to "old" mappings. | ||
755 | */ | ||
756 | static void wb_kupdate(unsigned long arg) | ||
757 | { | ||
758 | unsigned long oldest_jif; | ||
759 | unsigned long start_jif; | ||
760 | unsigned long next_jif; | ||
761 | long nr_to_write; | ||
762 | struct writeback_control wbc = { | ||
763 | .bdi = NULL, | ||
764 | .sync_mode = WB_SYNC_NONE, | ||
765 | .older_than_this = &oldest_jif, | ||
766 | .nr_to_write = 0, | ||
767 | .nonblocking = 1, | ||
768 | .for_kupdate = 1, | ||
769 | .range_cyclic = 1, | ||
770 | }; | ||
771 | |||
772 | sync_supers(); | ||
773 | |||
774 | oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); | ||
775 | start_jif = jiffies; | ||
776 | next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10); | ||
777 | nr_to_write = global_page_state(NR_FILE_DIRTY) + | ||
778 | global_page_state(NR_UNSTABLE_NFS) + | ||
779 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
780 | while (nr_to_write > 0) { | ||
781 | wbc.more_io = 0; | ||
782 | wbc.encountered_congestion = 0; | ||
783 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | ||
784 | writeback_inodes(&wbc); | ||
785 | if (wbc.nr_to_write > 0) { | ||
786 | if (wbc.encountered_congestion || wbc.more_io) | ||
787 | congestion_wait(BLK_RW_ASYNC, HZ/10); | ||
788 | else | ||
789 | break; /* All the old data is written */ | ||
790 | } | ||
791 | nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
792 | } | ||
793 | if (time_before(next_jif, jiffies + HZ)) | ||
794 | next_jif = jiffies + HZ; | ||
795 | if (dirty_writeback_interval) | ||
796 | mod_timer(&wb_timer, next_jif); | ||
797 | } | ||
798 | |||
799 | /* | ||
800 | * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs | 683 | * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs |
801 | */ | 684 | */ |
802 | int dirty_writeback_centisecs_handler(ctl_table *table, int write, | 685 | int dirty_writeback_centisecs_handler(ctl_table *table, int write, |
803 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 686 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) |
804 | { | 687 | { |
805 | proc_dointvec(table, write, file, buffer, length, ppos); | 688 | proc_dointvec(table, write, file, buffer, length, ppos); |
806 | if (dirty_writeback_interval) | ||
807 | mod_timer(&wb_timer, jiffies + | ||
808 | msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
809 | else | ||
810 | del_timer(&wb_timer); | ||
811 | return 0; | 689 | return 0; |
812 | } | 690 | } |
813 | 691 | ||
814 | static void wb_timer_fn(unsigned long unused) | 692 | static void do_laptop_sync(struct work_struct *work) |
815 | { | ||
816 | if (pdflush_operation(wb_kupdate, 0) < 0) | ||
817 | mod_timer(&wb_timer, jiffies + HZ); /* delay 1 second */ | ||
818 | } | ||
819 | |||
820 | static void laptop_flush(unsigned long unused) | ||
821 | { | 693 | { |
822 | sys_sync(); | 694 | wakeup_flusher_threads(0); |
695 | kfree(work); | ||
823 | } | 696 | } |
824 | 697 | ||
825 | static void laptop_timer_fn(unsigned long unused) | 698 | static void laptop_timer_fn(unsigned long unused) |
826 | { | 699 | { |
827 | pdflush_operation(laptop_flush, 0); | 700 | struct work_struct *work; |
701 | |||
702 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | ||
703 | if (work) { | ||
704 | INIT_WORK(work, do_laptop_sync); | ||
705 | schedule_work(work); | ||
706 | } | ||
828 | } | 707 | } |
829 | 708 | ||
830 | /* | 709 | /* |
@@ -907,8 +786,6 @@ void __init page_writeback_init(void) | |||
907 | { | 786 | { |
908 | int shift; | 787 | int shift; |
909 | 788 | ||
910 | mod_timer(&wb_timer, | ||
911 | jiffies + msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
912 | writeback_set_ratelimit(); | 789 | writeback_set_ratelimit(); |
913 | register_cpu_notifier(&ratelimit_nb); | 790 | register_cpu_notifier(&ratelimit_nb); |
914 | 791 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index 94e86dd6954c..ba8228e0a806 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1720,7 +1720,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1720 | */ | 1720 | */ |
1721 | if (total_scanned > sc->swap_cluster_max + | 1721 | if (total_scanned > sc->swap_cluster_max + |
1722 | sc->swap_cluster_max / 2) { | 1722 | sc->swap_cluster_max / 2) { |
1723 | wakeup_pdflush(laptop_mode ? 0 : total_scanned); | 1723 | wakeup_flusher_threads(laptop_mode ? 0 : total_scanned); |
1724 | sc->may_writepage = 1; | 1724 | sc->may_writepage = 1; |
1725 | } | 1725 | } |
1726 | 1726 | ||