aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r--mm/page-writeback.c182
1 files changed, 28 insertions, 154 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 997186c0b519..dd73d29c15a8 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -36,15 +36,6 @@
36#include <linux/pagevec.h> 36#include <linux/pagevec.h>
37 37
38/* 38/*
39 * The maximum number of pages to writeout in a single bdflush/kupdate
40 * operation. We do this so we don't hold I_SYNC against an inode for
41 * enormous amounts of time, which would block a userspace task which has
42 * been forced to throttle against that inode. Also, the code reevaluates
43 * the dirty each time it has written this many pages.
44 */
45#define MAX_WRITEBACK_PAGES 1024
46
47/*
48 * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited 39 * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
49 * will look to see if it needs to force writeback or throttling. 40 * will look to see if it needs to force writeback or throttling.
50 */ 41 */
@@ -117,8 +108,6 @@ EXPORT_SYMBOL(laptop_mode);
117/* End of sysctl-exported parameters */ 108/* End of sysctl-exported parameters */
118 109
119 110
120static void background_writeout(unsigned long _min_pages);
121
122/* 111/*
123 * Scale the writeback cache size proportional to the relative writeout speeds. 112 * Scale the writeback cache size proportional to the relative writeout speeds.
124 * 113 *
@@ -320,15 +309,13 @@ static void task_dirty_limit(struct task_struct *tsk, unsigned long *pdirty)
320/* 309/*
321 * 310 *
322 */ 311 */
323static DEFINE_SPINLOCK(bdi_lock);
324static unsigned int bdi_min_ratio; 312static unsigned int bdi_min_ratio;
325 313
326int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) 314int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
327{ 315{
328 int ret = 0; 316 int ret = 0;
329 unsigned long flags;
330 317
331 spin_lock_irqsave(&bdi_lock, flags); 318 spin_lock(&bdi_lock);
332 if (min_ratio > bdi->max_ratio) { 319 if (min_ratio > bdi->max_ratio) {
333 ret = -EINVAL; 320 ret = -EINVAL;
334 } else { 321 } else {
@@ -340,27 +327,26 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
340 ret = -EINVAL; 327 ret = -EINVAL;
341 } 328 }
342 } 329 }
343 spin_unlock_irqrestore(&bdi_lock, flags); 330 spin_unlock(&bdi_lock);
344 331
345 return ret; 332 return ret;
346} 333}
347 334
348int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) 335int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
349{ 336{
350 unsigned long flags;
351 int ret = 0; 337 int ret = 0;
352 338
353 if (max_ratio > 100) 339 if (max_ratio > 100)
354 return -EINVAL; 340 return -EINVAL;
355 341
356 spin_lock_irqsave(&bdi_lock, flags); 342 spin_lock(&bdi_lock);
357 if (bdi->min_ratio > max_ratio) { 343 if (bdi->min_ratio > max_ratio) {
358 ret = -EINVAL; 344 ret = -EINVAL;
359 } else { 345 } else {
360 bdi->max_ratio = max_ratio; 346 bdi->max_ratio = max_ratio;
361 bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; 347 bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
362 } 348 }
363 spin_unlock_irqrestore(&bdi_lock, flags); 349 spin_unlock(&bdi_lock);
364 350
365 return ret; 351 return ret;
366} 352}
@@ -546,7 +532,7 @@ static void balance_dirty_pages(struct address_space *mapping)
546 * up. 532 * up.
547 */ 533 */
548 if (bdi_nr_reclaimable > bdi_thresh) { 534 if (bdi_nr_reclaimable > bdi_thresh) {
549 writeback_inodes(&wbc); 535 writeback_inodes_wbc(&wbc);
550 pages_written += write_chunk - wbc.nr_to_write; 536 pages_written += write_chunk - wbc.nr_to_write;
551 get_dirty_limits(&background_thresh, &dirty_thresh, 537 get_dirty_limits(&background_thresh, &dirty_thresh,
552 &bdi_thresh, bdi); 538 &bdi_thresh, bdi);
@@ -575,7 +561,7 @@ static void balance_dirty_pages(struct address_space *mapping)
575 if (pages_written >= write_chunk) 561 if (pages_written >= write_chunk)
576 break; /* We've done our duty */ 562 break; /* We've done our duty */
577 563
578 congestion_wait(BLK_RW_ASYNC, HZ/10); 564 schedule_timeout(1);
579 } 565 }
580 566
581 if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && 567 if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
@@ -594,10 +580,18 @@ static void balance_dirty_pages(struct address_space *mapping)
594 * background_thresh, to keep the amount of dirty memory low. 580 * background_thresh, to keep the amount of dirty memory low.
595 */ 581 */
596 if ((laptop_mode && pages_written) || 582 if ((laptop_mode && pages_written) ||
597 (!laptop_mode && (global_page_state(NR_FILE_DIRTY) 583 (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY)
598 + global_page_state(NR_UNSTABLE_NFS) 584 + global_page_state(NR_UNSTABLE_NFS))
599 > background_thresh))) 585 > background_thresh))) {
600 pdflush_operation(background_writeout, 0); 586 struct writeback_control wbc = {
587 .bdi = bdi,
588 .sync_mode = WB_SYNC_NONE,
589 .nr_to_write = nr_writeback,
590 };
591
592
593 bdi_start_writeback(&wbc);
594 }
601} 595}
602 596
603void set_page_dirty_balance(struct page *page, int page_mkwrite) 597void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -682,153 +676,35 @@ void throttle_vm_writeout(gfp_t gfp_mask)
682 } 676 }
683} 677}
684 678
685/*
686 * writeback at least _min_pages, and keep writing until the amount of dirty
687 * memory is less than the background threshold, or until we're all clean.
688 */
689static void background_writeout(unsigned long _min_pages)
690{
691 long min_pages = _min_pages;
692 struct writeback_control wbc = {
693 .bdi = NULL,
694 .sync_mode = WB_SYNC_NONE,
695 .older_than_this = NULL,
696 .nr_to_write = 0,
697 .nonblocking = 1,
698 .range_cyclic = 1,
699 };
700
701 for ( ; ; ) {
702 unsigned long background_thresh;
703 unsigned long dirty_thresh;
704
705 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
706 if (global_page_state(NR_FILE_DIRTY) +
707 global_page_state(NR_UNSTABLE_NFS) < background_thresh
708 && min_pages <= 0)
709 break;
710 wbc.more_io = 0;
711 wbc.encountered_congestion = 0;
712 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
713 wbc.pages_skipped = 0;
714 writeback_inodes(&wbc);
715 min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
716 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
717 /* Wrote less than expected */
718 if (wbc.encountered_congestion || wbc.more_io)
719 congestion_wait(BLK_RW_ASYNC, HZ/10);
720 else
721 break;
722 }
723 }
724}
725
726/*
727 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
728 * the whole world. Returns 0 if a pdflush thread was dispatched. Returns
729 * -1 if all pdflush threads were busy.
730 */
731int wakeup_pdflush(long nr_pages)
732{
733 if (nr_pages == 0)
734 nr_pages = global_page_state(NR_FILE_DIRTY) +
735 global_page_state(NR_UNSTABLE_NFS);
736 return pdflush_operation(background_writeout, nr_pages);
737}
738
739static void wb_timer_fn(unsigned long unused);
740static void laptop_timer_fn(unsigned long unused); 679static void laptop_timer_fn(unsigned long unused);
741 680
742static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0);
743static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); 681static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
744 682
745/* 683/*
746 * Periodic writeback of "old" data.
747 *
748 * Define "old": the first time one of an inode's pages is dirtied, we mark the
749 * dirtying-time in the inode's address_space. So this periodic writeback code
750 * just walks the superblock inode list, writing back any inodes which are
751 * older than a specific point in time.
752 *
753 * Try to run once per dirty_writeback_interval. But if a writeback event
754 * takes longer than a dirty_writeback_interval interval, then leave a
755 * one-second gap.
756 *
757 * older_than_this takes precedence over nr_to_write. So we'll only write back
758 * all dirty pages if they are all attached to "old" mappings.
759 */
760static void wb_kupdate(unsigned long arg)
761{
762 unsigned long oldest_jif;
763 unsigned long start_jif;
764 unsigned long next_jif;
765 long nr_to_write;
766 struct writeback_control wbc = {
767 .bdi = NULL,
768 .sync_mode = WB_SYNC_NONE,
769 .older_than_this = &oldest_jif,
770 .nr_to_write = 0,
771 .nonblocking = 1,
772 .for_kupdate = 1,
773 .range_cyclic = 1,
774 };
775
776 sync_supers();
777
778 oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10);
779 start_jif = jiffies;
780 next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10);
781 nr_to_write = global_page_state(NR_FILE_DIRTY) +
782 global_page_state(NR_UNSTABLE_NFS) +
783 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
784 while (nr_to_write > 0) {
785 wbc.more_io = 0;
786 wbc.encountered_congestion = 0;
787 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
788 writeback_inodes(&wbc);
789 if (wbc.nr_to_write > 0) {
790 if (wbc.encountered_congestion || wbc.more_io)
791 congestion_wait(BLK_RW_ASYNC, HZ/10);
792 else
793 break; /* All the old data is written */
794 }
795 nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
796 }
797 if (time_before(next_jif, jiffies + HZ))
798 next_jif = jiffies + HZ;
799 if (dirty_writeback_interval)
800 mod_timer(&wb_timer, next_jif);
801}
802
803/*
804 * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs 684 * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
805 */ 685 */
806int dirty_writeback_centisecs_handler(ctl_table *table, int write, 686int dirty_writeback_centisecs_handler(ctl_table *table, int write,
807 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 687 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
808{ 688{
809 proc_dointvec(table, write, file, buffer, length, ppos); 689 proc_dointvec(table, write, file, buffer, length, ppos);
810 if (dirty_writeback_interval)
811 mod_timer(&wb_timer, jiffies +
812 msecs_to_jiffies(dirty_writeback_interval * 10));
813 else
814 del_timer(&wb_timer);
815 return 0; 690 return 0;
816} 691}
817 692
818static void wb_timer_fn(unsigned long unused) 693static void do_laptop_sync(struct work_struct *work)
819{
820 if (pdflush_operation(wb_kupdate, 0) < 0)
821 mod_timer(&wb_timer, jiffies + HZ); /* delay 1 second */
822}
823
824static void laptop_flush(unsigned long unused)
825{ 694{
826 sys_sync(); 695 wakeup_flusher_threads(0);
696 kfree(work);
827} 697}
828 698
829static void laptop_timer_fn(unsigned long unused) 699static void laptop_timer_fn(unsigned long unused)
830{ 700{
831 pdflush_operation(laptop_flush, 0); 701 struct work_struct *work;
702
703 work = kmalloc(sizeof(*work), GFP_ATOMIC);
704 if (work) {
705 INIT_WORK(work, do_laptop_sync);
706 schedule_work(work);
707 }
832} 708}
833 709
834/* 710/*
@@ -911,8 +787,6 @@ void __init page_writeback_init(void)
911{ 787{
912 int shift; 788 int shift;
913 789
914 mod_timer(&wb_timer,
915 jiffies + msecs_to_jiffies(dirty_writeback_interval * 10));
916 writeback_set_ratelimit(); 790 writeback_set_ratelimit();
917 register_cpu_notifier(&ratelimit_nb); 791 register_cpu_notifier(&ratelimit_nb);
918 792