aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c341
-rw-r--r--mm/page-writeback.c179
-rw-r--r--mm/vmscan.c2
3 files changed, 352 insertions, 170 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 6f163e0f0509..7f3fa79f25c0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -1,8 +1,11 @@
1 1
2#include <linux/wait.h> 2#include <linux/wait.h>
3#include <linux/backing-dev.h> 3#include <linux/backing-dev.h>
4#include <linux/kthread.h>
5#include <linux/freezer.h>
4#include <linux/fs.h> 6#include <linux/fs.h>
5#include <linux/pagemap.h> 7#include <linux/pagemap.h>
8#include <linux/mm.h>
6#include <linux/sched.h> 9#include <linux/sched.h>
7#include <linux/module.h> 10#include <linux/module.h>
8#include <linux/writeback.h> 11#include <linux/writeback.h>
@@ -22,8 +25,18 @@ struct backing_dev_info default_backing_dev_info = {
22EXPORT_SYMBOL_GPL(default_backing_dev_info); 25EXPORT_SYMBOL_GPL(default_backing_dev_info);
23 26
24static struct class *bdi_class; 27static struct class *bdi_class;
25DEFINE_MUTEX(bdi_lock); 28DEFINE_SPINLOCK(bdi_lock);
26LIST_HEAD(bdi_list); 29LIST_HEAD(bdi_list);
30LIST_HEAD(bdi_pending_list);
31
32static struct task_struct *sync_supers_tsk;
33static struct timer_list sync_supers_timer;
34
35static int bdi_sync_supers(void *);
36static void sync_supers_timer_fn(unsigned long);
37static void arm_supers_timer(void);
38
39static void bdi_add_default_flusher_task(struct backing_dev_info *bdi);
27 40
28#ifdef CONFIG_DEBUG_FS 41#ifdef CONFIG_DEBUG_FS
29#include <linux/debugfs.h> 42#include <linux/debugfs.h>
@@ -187,6 +200,13 @@ static int __init default_bdi_init(void)
187{ 200{
188 int err; 201 int err;
189 202
203 sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers");
204 BUG_ON(IS_ERR(sync_supers_tsk));
205
206 init_timer(&sync_supers_timer);
207 setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0);
208 arm_supers_timer();
209
190 err = bdi_init(&default_backing_dev_info); 210 err = bdi_init(&default_backing_dev_info);
191 if (!err) 211 if (!err)
192 bdi_register(&default_backing_dev_info, NULL, "default"); 212 bdi_register(&default_backing_dev_info, NULL, "default");
@@ -195,6 +215,242 @@ static int __init default_bdi_init(void)
195} 215}
196subsys_initcall(default_bdi_init); 216subsys_initcall(default_bdi_init);
197 217
218static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
219{
220 memset(wb, 0, sizeof(*wb));
221
222 wb->bdi = bdi;
223 wb->last_old_flush = jiffies;
224 INIT_LIST_HEAD(&wb->b_dirty);
225 INIT_LIST_HEAD(&wb->b_io);
226 INIT_LIST_HEAD(&wb->b_more_io);
227}
228
229static void bdi_task_init(struct backing_dev_info *bdi,
230 struct bdi_writeback *wb)
231{
232 struct task_struct *tsk = current;
233
234 spin_lock(&bdi->wb_lock);
235 list_add_tail_rcu(&wb->list, &bdi->wb_list);
236 spin_unlock(&bdi->wb_lock);
237
238 tsk->flags |= PF_FLUSHER | PF_SWAPWRITE;
239 set_freezable();
240
241 /*
242 * Our parent may run at a different priority, just set us to normal
243 */
244 set_user_nice(tsk, 0);
245}
246
247static int bdi_start_fn(void *ptr)
248{
249 struct bdi_writeback *wb = ptr;
250 struct backing_dev_info *bdi = wb->bdi;
251 int ret;
252
253 /*
254 * Add us to the active bdi_list
255 */
256 spin_lock(&bdi_lock);
257 list_add(&bdi->bdi_list, &bdi_list);
258 spin_unlock(&bdi_lock);
259
260 bdi_task_init(bdi, wb);
261
262 /*
263 * Clear pending bit and wakeup anybody waiting to tear us down
264 */
265 clear_bit(BDI_pending, &bdi->state);
266 smp_mb__after_clear_bit();
267 wake_up_bit(&bdi->state, BDI_pending);
268
269 ret = bdi_writeback_task(wb);
270
271 /*
272 * Remove us from the list
273 */
274 spin_lock(&bdi->wb_lock);
275 list_del_rcu(&wb->list);
276 spin_unlock(&bdi->wb_lock);
277
278 /*
279 * Flush any work that raced with us exiting. No new work
280 * will be added, since this bdi isn't discoverable anymore.
281 */
282 if (!list_empty(&bdi->work_list))
283 wb_do_writeback(wb, 1);
284
285 wb->task = NULL;
286 return ret;
287}
288
289int bdi_has_dirty_io(struct backing_dev_info *bdi)
290{
291 return wb_has_dirty_io(&bdi->wb);
292}
293
294static void bdi_flush_io(struct backing_dev_info *bdi)
295{
296 struct writeback_control wbc = {
297 .bdi = bdi,
298 .sync_mode = WB_SYNC_NONE,
299 .older_than_this = NULL,
300 .range_cyclic = 1,
301 .nr_to_write = 1024,
302 };
303
304 writeback_inodes_wbc(&wbc);
305}
306
307/*
308 * kupdated() used to do this. We cannot do it from the bdi_forker_task()
309 * or we risk deadlocking on ->s_umount. The longer term solution would be
310 * to implement sync_supers_bdi() or similar and simply do it from the
311 * bdi writeback tasks individually.
312 */
313static int bdi_sync_supers(void *unused)
314{
315 set_user_nice(current, 0);
316
317 while (!kthread_should_stop()) {
318 set_current_state(TASK_INTERRUPTIBLE);
319 schedule();
320
321 /*
322 * Do this periodically, like kupdated() did before.
323 */
324 sync_supers();
325 }
326
327 return 0;
328}
329
330static void arm_supers_timer(void)
331{
332 unsigned long next;
333
334 next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies;
335 mod_timer(&sync_supers_timer, round_jiffies_up(next));
336}
337
338static void sync_supers_timer_fn(unsigned long unused)
339{
340 wake_up_process(sync_supers_tsk);
341 arm_supers_timer();
342}
343
344static int bdi_forker_task(void *ptr)
345{
346 struct bdi_writeback *me = ptr;
347
348 bdi_task_init(me->bdi, me);
349
350 for (;;) {
351 struct backing_dev_info *bdi, *tmp;
352 struct bdi_writeback *wb;
353
354 /*
355 * Temporary measure, we want to make sure we don't see
356 * dirty data on the default backing_dev_info
357 */
358 if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list))
359 wb_do_writeback(me, 0);
360
361 spin_lock(&bdi_lock);
362
363 /*
364 * Check if any existing bdi's have dirty data without
365 * a thread registered. If so, set that up.
366 */
367 list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) {
368 if (bdi->wb.task)
369 continue;
370 if (list_empty(&bdi->work_list) &&
371 !bdi_has_dirty_io(bdi))
372 continue;
373
374 bdi_add_default_flusher_task(bdi);
375 }
376
377 set_current_state(TASK_INTERRUPTIBLE);
378
379 if (list_empty(&bdi_pending_list)) {
380 unsigned long wait;
381
382 spin_unlock(&bdi_lock);
383 wait = msecs_to_jiffies(dirty_writeback_interval * 10);
384 schedule_timeout(wait);
385 try_to_freeze();
386 continue;
387 }
388
389 __set_current_state(TASK_RUNNING);
390
391 /*
392 * This is our real job - check for pending entries in
393 * bdi_pending_list, and create the tasks that got added
394 */
395 bdi = list_entry(bdi_pending_list.next, struct backing_dev_info,
396 bdi_list);
397 list_del_init(&bdi->bdi_list);
398 spin_unlock(&bdi_lock);
399
400 wb = &bdi->wb;
401 wb->task = kthread_run(bdi_start_fn, wb, "flush-%s",
402 dev_name(bdi->dev));
403 /*
404 * If task creation fails, then readd the bdi to
405 * the pending list and force writeout of the bdi
406 * from this forker thread. That will free some memory
407 * and we can try again.
408 */
409 if (IS_ERR(wb->task)) {
410 wb->task = NULL;
411
412 /*
413 * Add this 'bdi' to the back, so we get
414 * a chance to flush other bdi's to free
415 * memory.
416 */
417 spin_lock(&bdi_lock);
418 list_add_tail(&bdi->bdi_list, &bdi_pending_list);
419 spin_unlock(&bdi_lock);
420
421 bdi_flush_io(bdi);
422 }
423 }
424
425 return 0;
426}
427
428/*
429 * Add the default flusher task that gets created for any bdi
430 * that has dirty data pending writeout
431 */
432void static bdi_add_default_flusher_task(struct backing_dev_info *bdi)
433{
434 if (!bdi_cap_writeback_dirty(bdi))
435 return;
436
437 /*
438 * Check with the helper whether to proceed adding a task. Will only
439 * abort if we two or more simultanous calls to
440 * bdi_add_default_flusher_task() occured, further additions will block
441 * waiting for previous additions to finish.
442 */
443 if (!test_and_set_bit(BDI_pending, &bdi->state)) {
444 list_move_tail(&bdi->bdi_list, &bdi_pending_list);
445
446 /*
447 * We are now on the pending list, wake up bdi_forker_task()
448 * to finish the job and add us back to the active bdi_list
449 */
450 wake_up_process(default_backing_dev_info.wb.task);
451 }
452}
453
198int bdi_register(struct backing_dev_info *bdi, struct device *parent, 454int bdi_register(struct backing_dev_info *bdi, struct device *parent,
199 const char *fmt, ...) 455 const char *fmt, ...)
200{ 456{
@@ -213,13 +469,34 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
213 goto exit; 469 goto exit;
214 } 470 }
215 471
216 mutex_lock(&bdi_lock); 472 spin_lock(&bdi_lock);
217 list_add_tail(&bdi->bdi_list, &bdi_list); 473 list_add_tail(&bdi->bdi_list, &bdi_list);
218 mutex_unlock(&bdi_lock); 474 spin_unlock(&bdi_lock);
219 475
220 bdi->dev = dev; 476 bdi->dev = dev;
221 bdi_debug_register(bdi, dev_name(dev));
222 477
478 /*
479 * Just start the forker thread for our default backing_dev_info,
480 * and add other bdi's to the list. They will get a thread created
481 * on-demand when they need it.
482 */
483 if (bdi_cap_flush_forker(bdi)) {
484 struct bdi_writeback *wb = &bdi->wb;
485
486 wb->task = kthread_run(bdi_forker_task, wb, "bdi-%s",
487 dev_name(dev));
488 if (IS_ERR(wb->task)) {
489 wb->task = NULL;
490 ret = -ENOMEM;
491
492 spin_lock(&bdi_lock);
493 list_del(&bdi->bdi_list);
494 spin_unlock(&bdi_lock);
495 goto exit;
496 }
497 }
498
499 bdi_debug_register(bdi, dev_name(dev));
223exit: 500exit:
224 return ret; 501 return ret;
225} 502}
@@ -231,17 +508,42 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
231} 508}
232EXPORT_SYMBOL(bdi_register_dev); 509EXPORT_SYMBOL(bdi_register_dev);
233 510
234static void bdi_remove_from_list(struct backing_dev_info *bdi) 511/*
512 * Remove bdi from the global list and shutdown any threads we have running
513 */
514static void bdi_wb_shutdown(struct backing_dev_info *bdi)
235{ 515{
236 mutex_lock(&bdi_lock); 516 struct bdi_writeback *wb;
517
518 if (!bdi_cap_writeback_dirty(bdi))
519 return;
520
521 /*
522 * If setup is pending, wait for that to complete first
523 */
524 wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
525 TASK_UNINTERRUPTIBLE);
526
527 /*
528 * Make sure nobody finds us on the bdi_list anymore
529 */
530 spin_lock(&bdi_lock);
237 list_del(&bdi->bdi_list); 531 list_del(&bdi->bdi_list);
238 mutex_unlock(&bdi_lock); 532 spin_unlock(&bdi_lock);
533
534 /*
535 * Finally, kill the kernel threads. We don't need to be RCU
536 * safe anymore, since the bdi is gone from visibility.
537 */
538 list_for_each_entry(wb, &bdi->wb_list, list)
539 kthread_stop(wb->task);
239} 540}
240 541
241void bdi_unregister(struct backing_dev_info *bdi) 542void bdi_unregister(struct backing_dev_info *bdi)
242{ 543{
243 if (bdi->dev) { 544 if (bdi->dev) {
244 bdi_remove_from_list(bdi); 545 if (!bdi_cap_flush_forker(bdi))
546 bdi_wb_shutdown(bdi);
245 bdi_debug_unregister(bdi); 547 bdi_debug_unregister(bdi);
246 device_unregister(bdi->dev); 548 device_unregister(bdi->dev);
247 bdi->dev = NULL; 549 bdi->dev = NULL;
@@ -251,18 +553,25 @@ EXPORT_SYMBOL(bdi_unregister);
251 553
252int bdi_init(struct backing_dev_info *bdi) 554int bdi_init(struct backing_dev_info *bdi)
253{ 555{
254 int i; 556 int i, err;
255 int err;
256 557
257 bdi->dev = NULL; 558 bdi->dev = NULL;
258 559
259 bdi->min_ratio = 0; 560 bdi->min_ratio = 0;
260 bdi->max_ratio = 100; 561 bdi->max_ratio = 100;
261 bdi->max_prop_frac = PROP_FRAC_BASE; 562 bdi->max_prop_frac = PROP_FRAC_BASE;
563 spin_lock_init(&bdi->wb_lock);
262 INIT_LIST_HEAD(&bdi->bdi_list); 564 INIT_LIST_HEAD(&bdi->bdi_list);
263 INIT_LIST_HEAD(&bdi->b_io); 565 INIT_LIST_HEAD(&bdi->wb_list);
264 INIT_LIST_HEAD(&bdi->b_dirty); 566 INIT_LIST_HEAD(&bdi->work_list);
265 INIT_LIST_HEAD(&bdi->b_more_io); 567
568 bdi_wb_init(&bdi->wb, bdi);
569
570 /*
571 * Just one thread support for now, hard code mask and count
572 */
573 bdi->wb_mask = 1;
574 bdi->wb_cnt = 1;
266 575
267 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { 576 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
268 err = percpu_counter_init(&bdi->bdi_stat[i], 0); 577 err = percpu_counter_init(&bdi->bdi_stat[i], 0);
@@ -277,8 +586,6 @@ int bdi_init(struct backing_dev_info *bdi)
277err: 586err:
278 while (i--) 587 while (i--)
279 percpu_counter_destroy(&bdi->bdi_stat[i]); 588 percpu_counter_destroy(&bdi->bdi_stat[i]);
280
281 bdi_remove_from_list(bdi);
282 } 589 }
283 590
284 return err; 591 return err;
@@ -289,9 +596,7 @@ void bdi_destroy(struct backing_dev_info *bdi)
289{ 596{
290 int i; 597 int i;
291 598
292 WARN_ON(!list_empty(&bdi->b_dirty)); 599 WARN_ON(bdi_has_dirty_io(bdi));
293 WARN_ON(!list_empty(&bdi->b_io));
294 WARN_ON(!list_empty(&bdi->b_more_io));
295 600
296 bdi_unregister(bdi); 601 bdi_unregister(bdi);
297 602
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index f8341b6019bf..25e7770309b8 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -36,15 +36,6 @@
36#include <linux/pagevec.h> 36#include <linux/pagevec.h>
37 37
38/* 38/*
39 * The maximum number of pages to writeout in a single bdflush/kupdate
40 * operation. We do this so we don't hold I_SYNC against an inode for
41 * enormous amounts of time, which would block a userspace task which has
42 * been forced to throttle against that inode. Also, the code reevaluates
43 * the dirty each time it has written this many pages.
44 */
45#define MAX_WRITEBACK_PAGES 1024
46
47/*
48 * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited 39 * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
49 * will look to see if it needs to force writeback or throttling. 40 * will look to see if it needs to force writeback or throttling.
50 */ 41 */
@@ -117,8 +108,6 @@ EXPORT_SYMBOL(laptop_mode);
117/* End of sysctl-exported parameters */ 108/* End of sysctl-exported parameters */
118 109
119 110
120static void background_writeout(unsigned long _min_pages);
121
122/* 111/*
123 * Scale the writeback cache size proportional to the relative writeout speeds. 112 * Scale the writeback cache size proportional to the relative writeout speeds.
124 * 113 *
@@ -326,7 +315,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
326{ 315{
327 int ret = 0; 316 int ret = 0;
328 317
329 mutex_lock(&bdi_lock); 318 spin_lock(&bdi_lock);
330 if (min_ratio > bdi->max_ratio) { 319 if (min_ratio > bdi->max_ratio) {
331 ret = -EINVAL; 320 ret = -EINVAL;
332 } else { 321 } else {
@@ -338,7 +327,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
338 ret = -EINVAL; 327 ret = -EINVAL;
339 } 328 }
340 } 329 }
341 mutex_unlock(&bdi_lock); 330 spin_unlock(&bdi_lock);
342 331
343 return ret; 332 return ret;
344} 333}
@@ -350,14 +339,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
350 if (max_ratio > 100) 339 if (max_ratio > 100)
351 return -EINVAL; 340 return -EINVAL;
352 341
353 mutex_lock(&bdi_lock); 342 spin_lock(&bdi_lock);
354 if (bdi->min_ratio > max_ratio) { 343 if (bdi->min_ratio > max_ratio) {
355 ret = -EINVAL; 344 ret = -EINVAL;
356 } else { 345 } else {
357 bdi->max_ratio = max_ratio; 346 bdi->max_ratio = max_ratio;
358 bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; 347 bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
359 } 348 }
360 mutex_unlock(&bdi_lock); 349 spin_unlock(&bdi_lock);
361 350
362 return ret; 351 return ret;
363} 352}
@@ -543,7 +532,7 @@ static void balance_dirty_pages(struct address_space *mapping)
543 * up. 532 * up.
544 */ 533 */
545 if (bdi_nr_reclaimable > bdi_thresh) { 534 if (bdi_nr_reclaimable > bdi_thresh) {
546 writeback_inodes(&wbc); 535 writeback_inodes_wbc(&wbc);
547 pages_written += write_chunk - wbc.nr_to_write; 536 pages_written += write_chunk - wbc.nr_to_write;
548 get_dirty_limits(&background_thresh, &dirty_thresh, 537 get_dirty_limits(&background_thresh, &dirty_thresh,
549 &bdi_thresh, bdi); 538 &bdi_thresh, bdi);
@@ -572,7 +561,7 @@ static void balance_dirty_pages(struct address_space *mapping)
572 if (pages_written >= write_chunk) 561 if (pages_written >= write_chunk)
573 break; /* We've done our duty */ 562 break; /* We've done our duty */
574 563
575 congestion_wait(BLK_RW_ASYNC, HZ/10); 564 schedule_timeout(1);
576 } 565 }
577 566
578 if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && 567 if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
@@ -591,10 +580,18 @@ static void balance_dirty_pages(struct address_space *mapping)
591 * background_thresh, to keep the amount of dirty memory low. 580 * background_thresh, to keep the amount of dirty memory low.
592 */ 581 */
593 if ((laptop_mode && pages_written) || 582 if ((laptop_mode && pages_written) ||
594 (!laptop_mode && (global_page_state(NR_FILE_DIRTY) 583 (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY)
595 + global_page_state(NR_UNSTABLE_NFS) 584 + global_page_state(NR_UNSTABLE_NFS))
596 > background_thresh))) 585 > background_thresh))) {
597 pdflush_operation(background_writeout, 0); 586 struct writeback_control wbc = {
587 .bdi = bdi,
588 .sync_mode = WB_SYNC_NONE,
589 .nr_to_write = nr_writeback,
590 };
591
592
593 bdi_start_writeback(&wbc);
594 }
598} 595}
599 596
600void set_page_dirty_balance(struct page *page, int page_mkwrite) 597void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -678,153 +675,35 @@ void throttle_vm_writeout(gfp_t gfp_mask)
678 } 675 }
679} 676}
680 677
681/*
682 * writeback at least _min_pages, and keep writing until the amount of dirty
683 * memory is less than the background threshold, or until we're all clean.
684 */
685static void background_writeout(unsigned long _min_pages)
686{
687 long min_pages = _min_pages;
688 struct writeback_control wbc = {
689 .bdi = NULL,
690 .sync_mode = WB_SYNC_NONE,
691 .older_than_this = NULL,
692 .nr_to_write = 0,
693 .nonblocking = 1,
694 .range_cyclic = 1,
695 };
696
697 for ( ; ; ) {
698 unsigned long background_thresh;
699 unsigned long dirty_thresh;
700
701 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
702 if (global_page_state(NR_FILE_DIRTY) +
703 global_page_state(NR_UNSTABLE_NFS) < background_thresh
704 && min_pages <= 0)
705 break;
706 wbc.more_io = 0;
707 wbc.encountered_congestion = 0;
708 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
709 wbc.pages_skipped = 0;
710 writeback_inodes(&wbc);
711 min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
712 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
713 /* Wrote less than expected */
714 if (wbc.encountered_congestion || wbc.more_io)
715 congestion_wait(BLK_RW_ASYNC, HZ/10);
716 else
717 break;
718 }
719 }
720}
721
722/*
723 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
724 * the whole world. Returns 0 if a pdflush thread was dispatched. Returns
725 * -1 if all pdflush threads were busy.
726 */
727int wakeup_pdflush(long nr_pages)
728{
729 if (nr_pages == 0)
730 nr_pages = global_page_state(NR_FILE_DIRTY) +
731 global_page_state(NR_UNSTABLE_NFS);
732 return pdflush_operation(background_writeout, nr_pages);
733}
734
735static void wb_timer_fn(unsigned long unused);
736static void laptop_timer_fn(unsigned long unused); 678static void laptop_timer_fn(unsigned long unused);
737 679
738static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0);
739static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); 680static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
740 681
741/* 682/*
742 * Periodic writeback of "old" data.
743 *
744 * Define "old": the first time one of an inode's pages is dirtied, we mark the
745 * dirtying-time in the inode's address_space. So this periodic writeback code
746 * just walks the superblock inode list, writing back any inodes which are
747 * older than a specific point in time.
748 *
749 * Try to run once per dirty_writeback_interval. But if a writeback event
750 * takes longer than a dirty_writeback_interval interval, then leave a
751 * one-second gap.
752 *
753 * older_than_this takes precedence over nr_to_write. So we'll only write back
754 * all dirty pages if they are all attached to "old" mappings.
755 */
756static void wb_kupdate(unsigned long arg)
757{
758 unsigned long oldest_jif;
759 unsigned long start_jif;
760 unsigned long next_jif;
761 long nr_to_write;
762 struct writeback_control wbc = {
763 .bdi = NULL,
764 .sync_mode = WB_SYNC_NONE,
765 .older_than_this = &oldest_jif,
766 .nr_to_write = 0,
767 .nonblocking = 1,
768 .for_kupdate = 1,
769 .range_cyclic = 1,
770 };
771
772 sync_supers();
773
774 oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10);
775 start_jif = jiffies;
776 next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10);
777 nr_to_write = global_page_state(NR_FILE_DIRTY) +
778 global_page_state(NR_UNSTABLE_NFS) +
779 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
780 while (nr_to_write > 0) {
781 wbc.more_io = 0;
782 wbc.encountered_congestion = 0;
783 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
784 writeback_inodes(&wbc);
785 if (wbc.nr_to_write > 0) {
786 if (wbc.encountered_congestion || wbc.more_io)
787 congestion_wait(BLK_RW_ASYNC, HZ/10);
788 else
789 break; /* All the old data is written */
790 }
791 nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
792 }
793 if (time_before(next_jif, jiffies + HZ))
794 next_jif = jiffies + HZ;
795 if (dirty_writeback_interval)
796 mod_timer(&wb_timer, next_jif);
797}
798
799/*
800 * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs 683 * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
801 */ 684 */
802int dirty_writeback_centisecs_handler(ctl_table *table, int write, 685int dirty_writeback_centisecs_handler(ctl_table *table, int write,
803 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 686 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
804{ 687{
805 proc_dointvec(table, write, file, buffer, length, ppos); 688 proc_dointvec(table, write, file, buffer, length, ppos);
806 if (dirty_writeback_interval)
807 mod_timer(&wb_timer, jiffies +
808 msecs_to_jiffies(dirty_writeback_interval * 10));
809 else
810 del_timer(&wb_timer);
811 return 0; 689 return 0;
812} 690}
813 691
814static void wb_timer_fn(unsigned long unused) 692static void do_laptop_sync(struct work_struct *work)
815{
816 if (pdflush_operation(wb_kupdate, 0) < 0)
817 mod_timer(&wb_timer, jiffies + HZ); /* delay 1 second */
818}
819
820static void laptop_flush(unsigned long unused)
821{ 693{
822 sys_sync(); 694 wakeup_flusher_threads(0);
695 kfree(work);
823} 696}
824 697
825static void laptop_timer_fn(unsigned long unused) 698static void laptop_timer_fn(unsigned long unused)
826{ 699{
827 pdflush_operation(laptop_flush, 0); 700 struct work_struct *work;
701
702 work = kmalloc(sizeof(*work), GFP_ATOMIC);
703 if (work) {
704 INIT_WORK(work, do_laptop_sync);
705 schedule_work(work);
706 }
828} 707}
829 708
830/* 709/*
@@ -907,8 +786,6 @@ void __init page_writeback_init(void)
907{ 786{
908 int shift; 787 int shift;
909 788
910 mod_timer(&wb_timer,
911 jiffies + msecs_to_jiffies(dirty_writeback_interval * 10));
912 writeback_set_ratelimit(); 789 writeback_set_ratelimit();
913 register_cpu_notifier(&ratelimit_nb); 790 register_cpu_notifier(&ratelimit_nb);
914 791
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 94e86dd6954c..ba8228e0a806 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1720,7 +1720,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1720 */ 1720 */
1721 if (total_scanned > sc->swap_cluster_max + 1721 if (total_scanned > sc->swap_cluster_max +
1722 sc->swap_cluster_max / 2) { 1722 sc->swap_cluster_max / 2) {
1723 wakeup_pdflush(laptop_mode ? 0 : total_scanned); 1723 wakeup_flusher_threads(laptop_mode ? 0 : total_scanned);
1724 sc->may_writepage = 1; 1724 sc->may_writepage = 1;
1725 } 1725 }
1726 1726