diff options
Diffstat (limited to 'mm/page-writeback.c')
| -rw-r--r-- | mm/page-writeback.c | 182 |
1 files changed, 28 insertions, 154 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 81627ebcd313..25e7770309b8 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -36,15 +36,6 @@ | |||
| 36 | #include <linux/pagevec.h> | 36 | #include <linux/pagevec.h> |
| 37 | 37 | ||
| 38 | /* | 38 | /* |
| 39 | * The maximum number of pages to writeout in a single bdflush/kupdate | ||
| 40 | * operation. We do this so we don't hold I_SYNC against an inode for | ||
| 41 | * enormous amounts of time, which would block a userspace task which has | ||
| 42 | * been forced to throttle against that inode. Also, the code reevaluates | ||
| 43 | * the dirty each time it has written this many pages. | ||
| 44 | */ | ||
| 45 | #define MAX_WRITEBACK_PAGES 1024 | ||
| 46 | |||
| 47 | /* | ||
| 48 | * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited | 39 | * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited |
| 49 | * will look to see if it needs to force writeback or throttling. | 40 | * will look to see if it needs to force writeback or throttling. |
| 50 | */ | 41 | */ |
| @@ -117,8 +108,6 @@ EXPORT_SYMBOL(laptop_mode); | |||
| 117 | /* End of sysctl-exported parameters */ | 108 | /* End of sysctl-exported parameters */ |
| 118 | 109 | ||
| 119 | 110 | ||
| 120 | static void background_writeout(unsigned long _min_pages); | ||
| 121 | |||
| 122 | /* | 111 | /* |
| 123 | * Scale the writeback cache size proportional to the relative writeout speeds. | 112 | * Scale the writeback cache size proportional to the relative writeout speeds. |
| 124 | * | 113 | * |
| @@ -320,15 +309,13 @@ static void task_dirty_limit(struct task_struct *tsk, unsigned long *pdirty) | |||
| 320 | /* | 309 | /* |
| 321 | * | 310 | * |
| 322 | */ | 311 | */ |
| 323 | static DEFINE_SPINLOCK(bdi_lock); | ||
| 324 | static unsigned int bdi_min_ratio; | 312 | static unsigned int bdi_min_ratio; |
| 325 | 313 | ||
| 326 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | 314 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) |
| 327 | { | 315 | { |
| 328 | int ret = 0; | 316 | int ret = 0; |
| 329 | unsigned long flags; | ||
| 330 | 317 | ||
| 331 | spin_lock_irqsave(&bdi_lock, flags); | 318 | spin_lock(&bdi_lock); |
| 332 | if (min_ratio > bdi->max_ratio) { | 319 | if (min_ratio > bdi->max_ratio) { |
| 333 | ret = -EINVAL; | 320 | ret = -EINVAL; |
| 334 | } else { | 321 | } else { |
| @@ -340,27 +327,26 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | |||
| 340 | ret = -EINVAL; | 327 | ret = -EINVAL; |
| 341 | } | 328 | } |
| 342 | } | 329 | } |
| 343 | spin_unlock_irqrestore(&bdi_lock, flags); | 330 | spin_unlock(&bdi_lock); |
| 344 | 331 | ||
| 345 | return ret; | 332 | return ret; |
| 346 | } | 333 | } |
| 347 | 334 | ||
| 348 | int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) | 335 | int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) |
| 349 | { | 336 | { |
| 350 | unsigned long flags; | ||
| 351 | int ret = 0; | 337 | int ret = 0; |
| 352 | 338 | ||
| 353 | if (max_ratio > 100) | 339 | if (max_ratio > 100) |
| 354 | return -EINVAL; | 340 | return -EINVAL; |
| 355 | 341 | ||
| 356 | spin_lock_irqsave(&bdi_lock, flags); | 342 | spin_lock(&bdi_lock); |
| 357 | if (bdi->min_ratio > max_ratio) { | 343 | if (bdi->min_ratio > max_ratio) { |
| 358 | ret = -EINVAL; | 344 | ret = -EINVAL; |
| 359 | } else { | 345 | } else { |
| 360 | bdi->max_ratio = max_ratio; | 346 | bdi->max_ratio = max_ratio; |
| 361 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; | 347 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; |
| 362 | } | 348 | } |
| 363 | spin_unlock_irqrestore(&bdi_lock, flags); | 349 | spin_unlock(&bdi_lock); |
| 364 | 350 | ||
| 365 | return ret; | 351 | return ret; |
| 366 | } | 352 | } |
| @@ -546,7 +532,7 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
| 546 | * up. | 532 | * up. |
| 547 | */ | 533 | */ |
| 548 | if (bdi_nr_reclaimable > bdi_thresh) { | 534 | if (bdi_nr_reclaimable > bdi_thresh) { |
| 549 | writeback_inodes(&wbc); | 535 | writeback_inodes_wbc(&wbc); |
| 550 | pages_written += write_chunk - wbc.nr_to_write; | 536 | pages_written += write_chunk - wbc.nr_to_write; |
| 551 | get_dirty_limits(&background_thresh, &dirty_thresh, | 537 | get_dirty_limits(&background_thresh, &dirty_thresh, |
| 552 | &bdi_thresh, bdi); | 538 | &bdi_thresh, bdi); |
| @@ -575,7 +561,7 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
| 575 | if (pages_written >= write_chunk) | 561 | if (pages_written >= write_chunk) |
| 576 | break; /* We've done our duty */ | 562 | break; /* We've done our duty */ |
| 577 | 563 | ||
| 578 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 564 | schedule_timeout(1); |
| 579 | } | 565 | } |
| 580 | 566 | ||
| 581 | if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && | 567 | if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && |
| @@ -594,10 +580,18 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
| 594 | * background_thresh, to keep the amount of dirty memory low. | 580 | * background_thresh, to keep the amount of dirty memory low. |
| 595 | */ | 581 | */ |
| 596 | if ((laptop_mode && pages_written) || | 582 | if ((laptop_mode && pages_written) || |
| 597 | (!laptop_mode && (global_page_state(NR_FILE_DIRTY) | 583 | (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY) |
| 598 | + global_page_state(NR_UNSTABLE_NFS) | 584 | + global_page_state(NR_UNSTABLE_NFS)) |
| 599 | > background_thresh))) | 585 | > background_thresh))) { |
| 600 | pdflush_operation(background_writeout, 0); | 586 | struct writeback_control wbc = { |
| 587 | .bdi = bdi, | ||
| 588 | .sync_mode = WB_SYNC_NONE, | ||
| 589 | .nr_to_write = nr_writeback, | ||
| 590 | }; | ||
| 591 | |||
| 592 | |||
| 593 | bdi_start_writeback(&wbc); | ||
| 594 | } | ||
| 601 | } | 595 | } |
| 602 | 596 | ||
| 603 | void set_page_dirty_balance(struct page *page, int page_mkwrite) | 597 | void set_page_dirty_balance(struct page *page, int page_mkwrite) |
| @@ -681,153 +675,35 @@ void throttle_vm_writeout(gfp_t gfp_mask) | |||
| 681 | } | 675 | } |
| 682 | } | 676 | } |
| 683 | 677 | ||
| 684 | /* | ||
| 685 | * writeback at least _min_pages, and keep writing until the amount of dirty | ||
| 686 | * memory is less than the background threshold, or until we're all clean. | ||
| 687 | */ | ||
| 688 | static void background_writeout(unsigned long _min_pages) | ||
| 689 | { | ||
| 690 | long min_pages = _min_pages; | ||
| 691 | struct writeback_control wbc = { | ||
| 692 | .bdi = NULL, | ||
| 693 | .sync_mode = WB_SYNC_NONE, | ||
| 694 | .older_than_this = NULL, | ||
| 695 | .nr_to_write = 0, | ||
| 696 | .nonblocking = 1, | ||
| 697 | .range_cyclic = 1, | ||
| 698 | }; | ||
| 699 | |||
| 700 | for ( ; ; ) { | ||
| 701 | unsigned long background_thresh; | ||
| 702 | unsigned long dirty_thresh; | ||
| 703 | |||
| 704 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); | ||
| 705 | if (global_page_state(NR_FILE_DIRTY) + | ||
| 706 | global_page_state(NR_UNSTABLE_NFS) < background_thresh | ||
| 707 | && min_pages <= 0) | ||
| 708 | break; | ||
| 709 | wbc.more_io = 0; | ||
| 710 | wbc.encountered_congestion = 0; | ||
| 711 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | ||
| 712 | wbc.pages_skipped = 0; | ||
| 713 | writeback_inodes(&wbc); | ||
| 714 | min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
| 715 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { | ||
| 716 | /* Wrote less than expected */ | ||
| 717 | if (wbc.encountered_congestion || wbc.more_io) | ||
| 718 | congestion_wait(BLK_RW_ASYNC, HZ/10); | ||
| 719 | else | ||
| 720 | break; | ||
| 721 | } | ||
| 722 | } | ||
| 723 | } | ||
| 724 | |||
| 725 | /* | ||
| 726 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back | ||
| 727 | * the whole world. Returns 0 if a pdflush thread was dispatched. Returns | ||
| 728 | * -1 if all pdflush threads were busy. | ||
| 729 | */ | ||
| 730 | int wakeup_pdflush(long nr_pages) | ||
| 731 | { | ||
| 732 | if (nr_pages == 0) | ||
| 733 | nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
| 734 | global_page_state(NR_UNSTABLE_NFS); | ||
| 735 | return pdflush_operation(background_writeout, nr_pages); | ||
| 736 | } | ||
| 737 | |||
| 738 | static void wb_timer_fn(unsigned long unused); | ||
| 739 | static void laptop_timer_fn(unsigned long unused); | 678 | static void laptop_timer_fn(unsigned long unused); |
| 740 | 679 | ||
| 741 | static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0); | ||
| 742 | static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); | 680 | static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); |
| 743 | 681 | ||
| 744 | /* | 682 | /* |
| 745 | * Periodic writeback of "old" data. | ||
| 746 | * | ||
| 747 | * Define "old": the first time one of an inode's pages is dirtied, we mark the | ||
| 748 | * dirtying-time in the inode's address_space. So this periodic writeback code | ||
| 749 | * just walks the superblock inode list, writing back any inodes which are | ||
| 750 | * older than a specific point in time. | ||
| 751 | * | ||
| 752 | * Try to run once per dirty_writeback_interval. But if a writeback event | ||
| 753 | * takes longer than a dirty_writeback_interval interval, then leave a | ||
| 754 | * one-second gap. | ||
| 755 | * | ||
| 756 | * older_than_this takes precedence over nr_to_write. So we'll only write back | ||
| 757 | * all dirty pages if they are all attached to "old" mappings. | ||
| 758 | */ | ||
| 759 | static void wb_kupdate(unsigned long arg) | ||
| 760 | { | ||
| 761 | unsigned long oldest_jif; | ||
| 762 | unsigned long start_jif; | ||
| 763 | unsigned long next_jif; | ||
| 764 | long nr_to_write; | ||
| 765 | struct writeback_control wbc = { | ||
| 766 | .bdi = NULL, | ||
| 767 | .sync_mode = WB_SYNC_NONE, | ||
| 768 | .older_than_this = &oldest_jif, | ||
| 769 | .nr_to_write = 0, | ||
| 770 | .nonblocking = 1, | ||
| 771 | .for_kupdate = 1, | ||
| 772 | .range_cyclic = 1, | ||
| 773 | }; | ||
| 774 | |||
| 775 | sync_supers(); | ||
| 776 | |||
| 777 | oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); | ||
| 778 | start_jif = jiffies; | ||
| 779 | next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10); | ||
| 780 | nr_to_write = global_page_state(NR_FILE_DIRTY) + | ||
| 781 | global_page_state(NR_UNSTABLE_NFS) + | ||
| 782 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
| 783 | while (nr_to_write > 0) { | ||
| 784 | wbc.more_io = 0; | ||
| 785 | wbc.encountered_congestion = 0; | ||
| 786 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | ||
| 787 | writeback_inodes(&wbc); | ||
| 788 | if (wbc.nr_to_write > 0) { | ||
| 789 | if (wbc.encountered_congestion || wbc.more_io) | ||
| 790 | congestion_wait(BLK_RW_ASYNC, HZ/10); | ||
| 791 | else | ||
| 792 | break; /* All the old data is written */ | ||
| 793 | } | ||
| 794 | nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
| 795 | } | ||
| 796 | if (time_before(next_jif, jiffies + HZ)) | ||
| 797 | next_jif = jiffies + HZ; | ||
| 798 | if (dirty_writeback_interval) | ||
| 799 | mod_timer(&wb_timer, next_jif); | ||
| 800 | } | ||
| 801 | |||
| 802 | /* | ||
| 803 | * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs | 683 | * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs |
| 804 | */ | 684 | */ |
| 805 | int dirty_writeback_centisecs_handler(ctl_table *table, int write, | 685 | int dirty_writeback_centisecs_handler(ctl_table *table, int write, |
| 806 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 686 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) |
| 807 | { | 687 | { |
| 808 | proc_dointvec(table, write, file, buffer, length, ppos); | 688 | proc_dointvec(table, write, file, buffer, length, ppos); |
| 809 | if (dirty_writeback_interval) | ||
| 810 | mod_timer(&wb_timer, jiffies + | ||
| 811 | msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
| 812 | else | ||
| 813 | del_timer(&wb_timer); | ||
| 814 | return 0; | 689 | return 0; |
| 815 | } | 690 | } |
| 816 | 691 | ||
| 817 | static void wb_timer_fn(unsigned long unused) | 692 | static void do_laptop_sync(struct work_struct *work) |
| 818 | { | ||
| 819 | if (pdflush_operation(wb_kupdate, 0) < 0) | ||
| 820 | mod_timer(&wb_timer, jiffies + HZ); /* delay 1 second */ | ||
| 821 | } | ||
| 822 | |||
| 823 | static void laptop_flush(unsigned long unused) | ||
| 824 | { | 693 | { |
| 825 | sys_sync(); | 694 | wakeup_flusher_threads(0); |
| 695 | kfree(work); | ||
| 826 | } | 696 | } |
| 827 | 697 | ||
| 828 | static void laptop_timer_fn(unsigned long unused) | 698 | static void laptop_timer_fn(unsigned long unused) |
| 829 | { | 699 | { |
| 830 | pdflush_operation(laptop_flush, 0); | 700 | struct work_struct *work; |
| 701 | |||
| 702 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | ||
| 703 | if (work) { | ||
| 704 | INIT_WORK(work, do_laptop_sync); | ||
| 705 | schedule_work(work); | ||
| 706 | } | ||
| 831 | } | 707 | } |
| 832 | 708 | ||
| 833 | /* | 709 | /* |
| @@ -910,8 +786,6 @@ void __init page_writeback_init(void) | |||
| 910 | { | 786 | { |
| 911 | int shift; | 787 | int shift; |
| 912 | 788 | ||
| 913 | mod_timer(&wb_timer, | ||
| 914 | jiffies + msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
| 915 | writeback_set_ratelimit(); | 789 | writeback_set_ratelimit(); |
| 916 | register_cpu_notifier(&ratelimit_nb); | 790 | register_cpu_notifier(&ratelimit_nb); |
| 917 | 791 | ||
