diff options
Diffstat (limited to 'mm/page-writeback.c')
| -rw-r--r-- | mm/page-writeback.c | 146 |
1 files changed, 78 insertions, 68 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 8ccf6f1b1473..488b7088557c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include <linux/backing-dev.h> | 23 | #include <linux/backing-dev.h> |
| 24 | #include <linux/blkdev.h> | 24 | #include <linux/blkdev.h> |
| 25 | #include <linux/mpage.h> | 25 | #include <linux/mpage.h> |
| 26 | #include <linux/rmap.h> | ||
| 26 | #include <linux/percpu.h> | 27 | #include <linux/percpu.h> |
| 27 | #include <linux/notifier.h> | 28 | #include <linux/notifier.h> |
| 28 | #include <linux/smp.h> | 29 | #include <linux/smp.h> |
| @@ -45,7 +46,6 @@ | |||
| 45 | */ | 46 | */ |
| 46 | static long ratelimit_pages = 32; | 47 | static long ratelimit_pages = 32; |
| 47 | 48 | ||
| 48 | static long total_pages; /* The total number of pages in the machine. */ | ||
| 49 | static int dirty_exceeded __cacheline_aligned_in_smp; /* Dirty mem may be over limit */ | 49 | static int dirty_exceeded __cacheline_aligned_in_smp; /* Dirty mem may be over limit */ |
| 50 | 50 | ||
| 51 | /* | 51 | /* |
| @@ -99,22 +99,6 @@ EXPORT_SYMBOL(laptop_mode); | |||
| 99 | 99 | ||
| 100 | static void background_writeout(unsigned long _min_pages); | 100 | static void background_writeout(unsigned long _min_pages); |
| 101 | 101 | ||
| 102 | struct writeback_state | ||
| 103 | { | ||
| 104 | unsigned long nr_dirty; | ||
| 105 | unsigned long nr_unstable; | ||
| 106 | unsigned long nr_mapped; | ||
| 107 | unsigned long nr_writeback; | ||
| 108 | }; | ||
| 109 | |||
| 110 | static void get_writeback_state(struct writeback_state *wbs) | ||
| 111 | { | ||
| 112 | wbs->nr_dirty = read_page_state(nr_dirty); | ||
| 113 | wbs->nr_unstable = read_page_state(nr_unstable); | ||
| 114 | wbs->nr_mapped = read_page_state(nr_mapped); | ||
| 115 | wbs->nr_writeback = read_page_state(nr_writeback); | ||
| 116 | } | ||
| 117 | |||
| 118 | /* | 102 | /* |
| 119 | * Work out the current dirty-memory clamping and background writeout | 103 | * Work out the current dirty-memory clamping and background writeout |
| 120 | * thresholds. | 104 | * thresholds. |
| @@ -133,19 +117,17 @@ static void get_writeback_state(struct writeback_state *wbs) | |||
| 133 | * clamping level. | 117 | * clamping level. |
| 134 | */ | 118 | */ |
| 135 | static void | 119 | static void |
| 136 | get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty, | 120 | get_dirty_limits(long *pbackground, long *pdirty, |
| 137 | struct address_space *mapping) | 121 | struct address_space *mapping) |
| 138 | { | 122 | { |
| 139 | int background_ratio; /* Percentages */ | 123 | int background_ratio; /* Percentages */ |
| 140 | int dirty_ratio; | 124 | int dirty_ratio; |
| 141 | int unmapped_ratio; | 125 | int unmapped_ratio; |
| 142 | long background; | 126 | long background; |
| 143 | long dirty; | 127 | long dirty; |
| 144 | unsigned long available_memory = total_pages; | 128 | unsigned long available_memory = vm_total_pages; |
| 145 | struct task_struct *tsk; | 129 | struct task_struct *tsk; |
| 146 | 130 | ||
| 147 | get_writeback_state(wbs); | ||
| 148 | |||
| 149 | #ifdef CONFIG_HIGHMEM | 131 | #ifdef CONFIG_HIGHMEM |
| 150 | /* | 132 | /* |
| 151 | * If this mapping can only allocate from low memory, | 133 | * If this mapping can only allocate from low memory, |
| @@ -156,7 +138,9 @@ get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty, | |||
| 156 | #endif | 138 | #endif |
| 157 | 139 | ||
| 158 | 140 | ||
| 159 | unmapped_ratio = 100 - (wbs->nr_mapped * 100) / total_pages; | 141 | unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) + |
| 142 | global_page_state(NR_ANON_PAGES)) * 100) / | ||
| 143 | vm_total_pages; | ||
| 160 | 144 | ||
| 161 | dirty_ratio = vm_dirty_ratio; | 145 | dirty_ratio = vm_dirty_ratio; |
| 162 | if (dirty_ratio > unmapped_ratio / 2) | 146 | if (dirty_ratio > unmapped_ratio / 2) |
| @@ -189,7 +173,6 @@ get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty, | |||
| 189 | */ | 173 | */ |
| 190 | static void balance_dirty_pages(struct address_space *mapping) | 174 | static void balance_dirty_pages(struct address_space *mapping) |
| 191 | { | 175 | { |
| 192 | struct writeback_state wbs; | ||
| 193 | long nr_reclaimable; | 176 | long nr_reclaimable; |
| 194 | long background_thresh; | 177 | long background_thresh; |
| 195 | long dirty_thresh; | 178 | long dirty_thresh; |
| @@ -207,11 +190,12 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
| 207 | .range_cyclic = 1, | 190 | .range_cyclic = 1, |
| 208 | }; | 191 | }; |
| 209 | 192 | ||
| 210 | get_dirty_limits(&wbs, &background_thresh, | 193 | get_dirty_limits(&background_thresh, &dirty_thresh, mapping); |
| 211 | &dirty_thresh, mapping); | 194 | nr_reclaimable = global_page_state(NR_FILE_DIRTY) + |
| 212 | nr_reclaimable = wbs.nr_dirty + wbs.nr_unstable; | 195 | global_page_state(NR_UNSTABLE_NFS); |
| 213 | if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh) | 196 | if (nr_reclaimable + global_page_state(NR_WRITEBACK) <= |
| 214 | break; | 197 | dirty_thresh) |
| 198 | break; | ||
| 215 | 199 | ||
| 216 | if (!dirty_exceeded) | 200 | if (!dirty_exceeded) |
| 217 | dirty_exceeded = 1; | 201 | dirty_exceeded = 1; |
| @@ -224,11 +208,14 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
| 224 | */ | 208 | */ |
| 225 | if (nr_reclaimable) { | 209 | if (nr_reclaimable) { |
| 226 | writeback_inodes(&wbc); | 210 | writeback_inodes(&wbc); |
| 227 | get_dirty_limits(&wbs, &background_thresh, | 211 | get_dirty_limits(&background_thresh, |
| 228 | &dirty_thresh, mapping); | 212 | &dirty_thresh, mapping); |
| 229 | nr_reclaimable = wbs.nr_dirty + wbs.nr_unstable; | 213 | nr_reclaimable = global_page_state(NR_FILE_DIRTY) + |
| 230 | if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh) | 214 | global_page_state(NR_UNSTABLE_NFS); |
| 231 | break; | 215 | if (nr_reclaimable + |
| 216 | global_page_state(NR_WRITEBACK) | ||
| 217 | <= dirty_thresh) | ||
| 218 | break; | ||
| 232 | pages_written += write_chunk - wbc.nr_to_write; | 219 | pages_written += write_chunk - wbc.nr_to_write; |
| 233 | if (pages_written >= write_chunk) | 220 | if (pages_written >= write_chunk) |
| 234 | break; /* We've done our duty */ | 221 | break; /* We've done our duty */ |
| @@ -236,8 +223,9 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
| 236 | blk_congestion_wait(WRITE, HZ/10); | 223 | blk_congestion_wait(WRITE, HZ/10); |
| 237 | } | 224 | } |
| 238 | 225 | ||
| 239 | if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh && dirty_exceeded) | 226 | if (nr_reclaimable + global_page_state(NR_WRITEBACK) |
| 240 | dirty_exceeded = 0; | 227 | <= dirty_thresh && dirty_exceeded) |
| 228 | dirty_exceeded = 0; | ||
| 241 | 229 | ||
| 242 | if (writeback_in_progress(bdi)) | 230 | if (writeback_in_progress(bdi)) |
| 243 | return; /* pdflush is already working this queue */ | 231 | return; /* pdflush is already working this queue */ |
| @@ -255,6 +243,16 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
| 255 | pdflush_operation(background_writeout, 0); | 243 | pdflush_operation(background_writeout, 0); |
| 256 | } | 244 | } |
| 257 | 245 | ||
| 246 | void set_page_dirty_balance(struct page *page) | ||
| 247 | { | ||
| 248 | if (set_page_dirty(page)) { | ||
| 249 | struct address_space *mapping = page_mapping(page); | ||
| 250 | |||
| 251 | if (mapping) | ||
| 252 | balance_dirty_pages_ratelimited(mapping); | ||
| 253 | } | ||
| 254 | } | ||
| 255 | |||
| 258 | /** | 256 | /** |
| 259 | * balance_dirty_pages_ratelimited_nr - balance dirty memory state | 257 | * balance_dirty_pages_ratelimited_nr - balance dirty memory state |
| 260 | * @mapping: address_space which was dirtied | 258 | * @mapping: address_space which was dirtied |
| @@ -299,12 +297,11 @@ EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr); | |||
| 299 | 297 | ||
| 300 | void throttle_vm_writeout(void) | 298 | void throttle_vm_writeout(void) |
| 301 | { | 299 | { |
| 302 | struct writeback_state wbs; | ||
| 303 | long background_thresh; | 300 | long background_thresh; |
| 304 | long dirty_thresh; | 301 | long dirty_thresh; |
| 305 | 302 | ||
| 306 | for ( ; ; ) { | 303 | for ( ; ; ) { |
| 307 | get_dirty_limits(&wbs, &background_thresh, &dirty_thresh, NULL); | 304 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL); |
| 308 | 305 | ||
| 309 | /* | 306 | /* |
| 310 | * Boost the allowable dirty threshold a bit for page | 307 | * Boost the allowable dirty threshold a bit for page |
| @@ -312,8 +309,9 @@ void throttle_vm_writeout(void) | |||
| 312 | */ | 309 | */ |
| 313 | dirty_thresh += dirty_thresh / 10; /* wheeee... */ | 310 | dirty_thresh += dirty_thresh / 10; /* wheeee... */ |
| 314 | 311 | ||
| 315 | if (wbs.nr_unstable + wbs.nr_writeback <= dirty_thresh) | 312 | if (global_page_state(NR_UNSTABLE_NFS) + |
| 316 | break; | 313 | global_page_state(NR_WRITEBACK) <= dirty_thresh) |
| 314 | break; | ||
| 317 | blk_congestion_wait(WRITE, HZ/10); | 315 | blk_congestion_wait(WRITE, HZ/10); |
| 318 | } | 316 | } |
| 319 | } | 317 | } |
| @@ -336,12 +334,12 @@ static void background_writeout(unsigned long _min_pages) | |||
| 336 | }; | 334 | }; |
| 337 | 335 | ||
| 338 | for ( ; ; ) { | 336 | for ( ; ; ) { |
| 339 | struct writeback_state wbs; | ||
| 340 | long background_thresh; | 337 | long background_thresh; |
| 341 | long dirty_thresh; | 338 | long dirty_thresh; |
| 342 | 339 | ||
| 343 | get_dirty_limits(&wbs, &background_thresh, &dirty_thresh, NULL); | 340 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL); |
| 344 | if (wbs.nr_dirty + wbs.nr_unstable < background_thresh | 341 | if (global_page_state(NR_FILE_DIRTY) + |
| 342 | global_page_state(NR_UNSTABLE_NFS) < background_thresh | ||
| 345 | && min_pages <= 0) | 343 | && min_pages <= 0) |
| 346 | break; | 344 | break; |
| 347 | wbc.encountered_congestion = 0; | 345 | wbc.encountered_congestion = 0; |
| @@ -365,12 +363,9 @@ static void background_writeout(unsigned long _min_pages) | |||
| 365 | */ | 363 | */ |
| 366 | int wakeup_pdflush(long nr_pages) | 364 | int wakeup_pdflush(long nr_pages) |
| 367 | { | 365 | { |
| 368 | if (nr_pages == 0) { | 366 | if (nr_pages == 0) |
| 369 | struct writeback_state wbs; | 367 | nr_pages = global_page_state(NR_FILE_DIRTY) + |
| 370 | 368 | global_page_state(NR_UNSTABLE_NFS); | |
| 371 | get_writeback_state(&wbs); | ||
| 372 | nr_pages = wbs.nr_dirty + wbs.nr_unstable; | ||
| 373 | } | ||
| 374 | return pdflush_operation(background_writeout, nr_pages); | 369 | return pdflush_operation(background_writeout, nr_pages); |
| 375 | } | 370 | } |
| 376 | 371 | ||
| @@ -401,7 +396,6 @@ static void wb_kupdate(unsigned long arg) | |||
| 401 | unsigned long start_jif; | 396 | unsigned long start_jif; |
| 402 | unsigned long next_jif; | 397 | unsigned long next_jif; |
| 403 | long nr_to_write; | 398 | long nr_to_write; |
| 404 | struct writeback_state wbs; | ||
| 405 | struct writeback_control wbc = { | 399 | struct writeback_control wbc = { |
| 406 | .bdi = NULL, | 400 | .bdi = NULL, |
| 407 | .sync_mode = WB_SYNC_NONE, | 401 | .sync_mode = WB_SYNC_NONE, |
| @@ -414,11 +408,11 @@ static void wb_kupdate(unsigned long arg) | |||
| 414 | 408 | ||
| 415 | sync_supers(); | 409 | sync_supers(); |
| 416 | 410 | ||
| 417 | get_writeback_state(&wbs); | ||
| 418 | oldest_jif = jiffies - dirty_expire_interval; | 411 | oldest_jif = jiffies - dirty_expire_interval; |
| 419 | start_jif = jiffies; | 412 | start_jif = jiffies; |
| 420 | next_jif = start_jif + dirty_writeback_interval; | 413 | next_jif = start_jif + dirty_writeback_interval; |
| 421 | nr_to_write = wbs.nr_dirty + wbs.nr_unstable + | 414 | nr_to_write = global_page_state(NR_FILE_DIRTY) + |
| 415 | global_page_state(NR_UNSTABLE_NFS) + | ||
| 422 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 416 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
| 423 | while (nr_to_write > 0) { | 417 | while (nr_to_write > 0) { |
| 424 | wbc.encountered_congestion = 0; | 418 | wbc.encountered_congestion = 0; |
| @@ -507,23 +501,23 @@ void laptop_sync_completion(void) | |||
| 507 | * will write six megabyte chunks, max. | 501 | * will write six megabyte chunks, max. |
| 508 | */ | 502 | */ |
| 509 | 503 | ||
| 510 | static void set_ratelimit(void) | 504 | void writeback_set_ratelimit(void) |
| 511 | { | 505 | { |
| 512 | ratelimit_pages = total_pages / (num_online_cpus() * 32); | 506 | ratelimit_pages = vm_total_pages / (num_online_cpus() * 32); |
| 513 | if (ratelimit_pages < 16) | 507 | if (ratelimit_pages < 16) |
| 514 | ratelimit_pages = 16; | 508 | ratelimit_pages = 16; |
| 515 | if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024) | 509 | if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024) |
| 516 | ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE; | 510 | ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE; |
| 517 | } | 511 | } |
| 518 | 512 | ||
| 519 | static int | 513 | static int __cpuinit |
| 520 | ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) | 514 | ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) |
| 521 | { | 515 | { |
| 522 | set_ratelimit(); | 516 | writeback_set_ratelimit(); |
| 523 | return 0; | 517 | return 0; |
| 524 | } | 518 | } |
| 525 | 519 | ||
| 526 | static struct notifier_block ratelimit_nb = { | 520 | static struct notifier_block __cpuinitdata ratelimit_nb = { |
| 527 | .notifier_call = ratelimit_handler, | 521 | .notifier_call = ratelimit_handler, |
| 528 | .next = NULL, | 522 | .next = NULL, |
| 529 | }; | 523 | }; |
| @@ -538,9 +532,7 @@ void __init page_writeback_init(void) | |||
| 538 | long buffer_pages = nr_free_buffer_pages(); | 532 | long buffer_pages = nr_free_buffer_pages(); |
| 539 | long correction; | 533 | long correction; |
| 540 | 534 | ||
| 541 | total_pages = nr_free_pagecache_pages(); | 535 | correction = (100 * 4 * buffer_pages) / vm_total_pages; |
| 542 | |||
| 543 | correction = (100 * 4 * buffer_pages) / total_pages; | ||
| 544 | 536 | ||
| 545 | if (correction < 100) { | 537 | if (correction < 100) { |
| 546 | dirty_background_ratio *= correction; | 538 | dirty_background_ratio *= correction; |
| @@ -554,7 +546,7 @@ void __init page_writeback_init(void) | |||
| 554 | vm_dirty_ratio = 1; | 546 | vm_dirty_ratio = 1; |
| 555 | } | 547 | } |
| 556 | mod_timer(&wb_timer, jiffies + dirty_writeback_interval); | 548 | mod_timer(&wb_timer, jiffies + dirty_writeback_interval); |
| 557 | set_ratelimit(); | 549 | writeback_set_ratelimit(); |
| 558 | register_cpu_notifier(&ratelimit_nb); | 550 | register_cpu_notifier(&ratelimit_nb); |
| 559 | } | 551 | } |
| 560 | 552 | ||
| @@ -566,7 +558,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) | |||
| 566 | return 0; | 558 | return 0; |
| 567 | wbc->for_writepages = 1; | 559 | wbc->for_writepages = 1; |
| 568 | if (mapping->a_ops->writepages) | 560 | if (mapping->a_ops->writepages) |
| 569 | ret = mapping->a_ops->writepages(mapping, wbc); | 561 | ret = mapping->a_ops->writepages(mapping, wbc); |
| 570 | else | 562 | else |
| 571 | ret = generic_writepages(mapping, wbc); | 563 | ret = generic_writepages(mapping, wbc); |
| 572 | wbc->for_writepages = 0; | 564 | wbc->for_writepages = 0; |
| @@ -640,7 +632,8 @@ int __set_page_dirty_nobuffers(struct page *page) | |||
| 640 | if (mapping2) { /* Race with truncate? */ | 632 | if (mapping2) { /* Race with truncate? */ |
| 641 | BUG_ON(mapping2 != mapping); | 633 | BUG_ON(mapping2 != mapping); |
| 642 | if (mapping_cap_account_dirty(mapping)) | 634 | if (mapping_cap_account_dirty(mapping)) |
| 643 | inc_page_state(nr_dirty); | 635 | __inc_zone_page_state(page, |
| 636 | NR_FILE_DIRTY); | ||
| 644 | radix_tree_tag_set(&mapping->page_tree, | 637 | radix_tree_tag_set(&mapping->page_tree, |
| 645 | page_index(page), PAGECACHE_TAG_DIRTY); | 638 | page_index(page), PAGECACHE_TAG_DIRTY); |
| 646 | } | 639 | } |
| @@ -705,7 +698,7 @@ int set_page_dirty_lock(struct page *page) | |||
| 705 | { | 698 | { |
| 706 | int ret; | 699 | int ret; |
| 707 | 700 | ||
| 708 | lock_page(page); | 701 | lock_page_nosync(page); |
| 709 | ret = set_page_dirty(page); | 702 | ret = set_page_dirty(page); |
| 710 | unlock_page(page); | 703 | unlock_page(page); |
| 711 | return ret; | 704 | return ret; |
| @@ -728,8 +721,14 @@ int test_clear_page_dirty(struct page *page) | |||
| 728 | page_index(page), | 721 | page_index(page), |
| 729 | PAGECACHE_TAG_DIRTY); | 722 | PAGECACHE_TAG_DIRTY); |
| 730 | write_unlock_irqrestore(&mapping->tree_lock, flags); | 723 | write_unlock_irqrestore(&mapping->tree_lock, flags); |
| 731 | if (mapping_cap_account_dirty(mapping)) | 724 | /* |
| 732 | dec_page_state(nr_dirty); | 725 | * We can continue to use `mapping' here because the |
| 726 | * page is locked, which pins the address_space | ||
| 727 | */ | ||
| 728 | if (mapping_cap_account_dirty(mapping)) { | ||
| 729 | page_mkclean(page); | ||
| 730 | dec_zone_page_state(page, NR_FILE_DIRTY); | ||
| 731 | } | ||
| 733 | return 1; | 732 | return 1; |
| 734 | } | 733 | } |
| 735 | write_unlock_irqrestore(&mapping->tree_lock, flags); | 734 | write_unlock_irqrestore(&mapping->tree_lock, flags); |
| @@ -759,8 +758,10 @@ int clear_page_dirty_for_io(struct page *page) | |||
| 759 | 758 | ||
| 760 | if (mapping) { | 759 | if (mapping) { |
| 761 | if (TestClearPageDirty(page)) { | 760 | if (TestClearPageDirty(page)) { |
| 762 | if (mapping_cap_account_dirty(mapping)) | 761 | if (mapping_cap_account_dirty(mapping)) { |
| 763 | dec_page_state(nr_dirty); | 762 | page_mkclean(page); |
| 763 | dec_zone_page_state(page, NR_FILE_DIRTY); | ||
| 764 | } | ||
| 764 | return 1; | 765 | return 1; |
| 765 | } | 766 | } |
| 766 | return 0; | 767 | return 0; |
| @@ -818,6 +819,15 @@ int test_set_page_writeback(struct page *page) | |||
| 818 | EXPORT_SYMBOL(test_set_page_writeback); | 819 | EXPORT_SYMBOL(test_set_page_writeback); |
| 819 | 820 | ||
| 820 | /* | 821 | /* |
| 822 | * Wakes up tasks that are being throttled due to writeback congestion | ||
| 823 | */ | ||
| 824 | void writeback_congestion_end(void) | ||
| 825 | { | ||
| 826 | blk_congestion_end(WRITE); | ||
| 827 | } | ||
| 828 | EXPORT_SYMBOL(writeback_congestion_end); | ||
| 829 | |||
| 830 | /* | ||
| 821 | * Return true if any of the pages in the mapping are marged with the | 831 | * Return true if any of the pages in the mapping are marged with the |
| 822 | * passed tag. | 832 | * passed tag. |
| 823 | */ | 833 | */ |
