diff options
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r-- | mm/page-writeback.c | 160 |
1 files changed, 147 insertions, 13 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 555752907dc3..c0d4ce144dec 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -30,6 +30,8 @@ | |||
30 | #include <linux/sysctl.h> | 30 | #include <linux/sysctl.h> |
31 | #include <linux/cpu.h> | 31 | #include <linux/cpu.h> |
32 | #include <linux/syscalls.h> | 32 | #include <linux/syscalls.h> |
33 | #include <linux/buffer_head.h> | ||
34 | #include <linux/pagevec.h> | ||
33 | 35 | ||
34 | /* | 36 | /* |
35 | * The maximum number of pages to writeout in a single bdflush/kupdate | 37 | * The maximum number of pages to writeout in a single bdflush/kupdate |
@@ -46,7 +48,6 @@ | |||
46 | */ | 48 | */ |
47 | static long ratelimit_pages = 32; | 49 | static long ratelimit_pages = 32; |
48 | 50 | ||
49 | static long total_pages; /* The total number of pages in the machine. */ | ||
50 | static int dirty_exceeded __cacheline_aligned_in_smp; /* Dirty mem may be over limit */ | 51 | static int dirty_exceeded __cacheline_aligned_in_smp; /* Dirty mem may be over limit */ |
51 | 52 | ||
52 | /* | 53 | /* |
@@ -126,7 +127,7 @@ get_dirty_limits(long *pbackground, long *pdirty, | |||
126 | int unmapped_ratio; | 127 | int unmapped_ratio; |
127 | long background; | 128 | long background; |
128 | long dirty; | 129 | long dirty; |
129 | unsigned long available_memory = total_pages; | 130 | unsigned long available_memory = vm_total_pages; |
130 | struct task_struct *tsk; | 131 | struct task_struct *tsk; |
131 | 132 | ||
132 | #ifdef CONFIG_HIGHMEM | 133 | #ifdef CONFIG_HIGHMEM |
@@ -141,7 +142,7 @@ get_dirty_limits(long *pbackground, long *pdirty, | |||
141 | 142 | ||
142 | unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) + | 143 | unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) + |
143 | global_page_state(NR_ANON_PAGES)) * 100) / | 144 | global_page_state(NR_ANON_PAGES)) * 100) / |
144 | total_pages; | 145 | vm_total_pages; |
145 | 146 | ||
146 | dirty_ratio = vm_dirty_ratio; | 147 | dirty_ratio = vm_dirty_ratio; |
147 | if (dirty_ratio > unmapped_ratio / 2) | 148 | if (dirty_ratio > unmapped_ratio / 2) |
@@ -502,9 +503,9 @@ void laptop_sync_completion(void) | |||
502 | * will write six megabyte chunks, max. | 503 | * will write six megabyte chunks, max. |
503 | */ | 504 | */ |
504 | 505 | ||
505 | static void set_ratelimit(void) | 506 | void writeback_set_ratelimit(void) |
506 | { | 507 | { |
507 | ratelimit_pages = total_pages / (num_online_cpus() * 32); | 508 | ratelimit_pages = vm_total_pages / (num_online_cpus() * 32); |
508 | if (ratelimit_pages < 16) | 509 | if (ratelimit_pages < 16) |
509 | ratelimit_pages = 16; | 510 | ratelimit_pages = 16; |
510 | if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024) | 511 | if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024) |
@@ -514,7 +515,7 @@ static void set_ratelimit(void) | |||
514 | static int __cpuinit | 515 | static int __cpuinit |
515 | ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) | 516 | ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) |
516 | { | 517 | { |
517 | set_ratelimit(); | 518 | writeback_set_ratelimit(); |
518 | return 0; | 519 | return 0; |
519 | } | 520 | } |
520 | 521 | ||
@@ -533,9 +534,7 @@ void __init page_writeback_init(void) | |||
533 | long buffer_pages = nr_free_buffer_pages(); | 534 | long buffer_pages = nr_free_buffer_pages(); |
534 | long correction; | 535 | long correction; |
535 | 536 | ||
536 | total_pages = nr_free_pagecache_pages(); | 537 | correction = (100 * 4 * buffer_pages) / vm_total_pages; |
537 | |||
538 | correction = (100 * 4 * buffer_pages) / total_pages; | ||
539 | 538 | ||
540 | if (correction < 100) { | 539 | if (correction < 100) { |
541 | dirty_background_ratio *= correction; | 540 | dirty_background_ratio *= correction; |
@@ -549,10 +548,143 @@ void __init page_writeback_init(void) | |||
549 | vm_dirty_ratio = 1; | 548 | vm_dirty_ratio = 1; |
550 | } | 549 | } |
551 | mod_timer(&wb_timer, jiffies + dirty_writeback_interval); | 550 | mod_timer(&wb_timer, jiffies + dirty_writeback_interval); |
552 | set_ratelimit(); | 551 | writeback_set_ratelimit(); |
553 | register_cpu_notifier(&ratelimit_nb); | 552 | register_cpu_notifier(&ratelimit_nb); |
554 | } | 553 | } |
555 | 554 | ||
555 | /** | ||
556 | * generic_writepages - walk the list of dirty pages of the given | ||
557 | * address space and writepage() all of them. | ||
558 | * | ||
559 | * @mapping: address space structure to write | ||
560 | * @wbc: subtract the number of written pages from *@wbc->nr_to_write | ||
561 | * | ||
562 | * This is a library function, which implements the writepages() | ||
563 | * address_space_operation. | ||
564 | * | ||
565 | * If a page is already under I/O, generic_writepages() skips it, even | ||
566 | * if it's dirty. This is desirable behaviour for memory-cleaning writeback, | ||
567 | * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() | ||
568 | * and msync() need to guarantee that all the data which was dirty at the time | ||
569 | * the call was made get new I/O started against them. If wbc->sync_mode is | ||
570 | * WB_SYNC_ALL then we were called for data integrity and we must wait for | ||
571 | * existing IO to complete. | ||
572 | * | ||
573 | * Derived from mpage_writepages() - if you fix this you should check that | ||
574 | * also! | ||
575 | */ | ||
576 | int generic_writepages(struct address_space *mapping, | ||
577 | struct writeback_control *wbc) | ||
578 | { | ||
579 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
580 | int ret = 0; | ||
581 | int done = 0; | ||
582 | int (*writepage)(struct page *page, struct writeback_control *wbc); | ||
583 | struct pagevec pvec; | ||
584 | int nr_pages; | ||
585 | pgoff_t index; | ||
586 | pgoff_t end; /* Inclusive */ | ||
587 | int scanned = 0; | ||
588 | int range_whole = 0; | ||
589 | |||
590 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
591 | wbc->encountered_congestion = 1; | ||
592 | return 0; | ||
593 | } | ||
594 | |||
595 | writepage = mapping->a_ops->writepage; | ||
596 | |||
597 | /* deal with chardevs and other special file */ | ||
598 | if (!writepage) | ||
599 | return 0; | ||
600 | |||
601 | pagevec_init(&pvec, 0); | ||
602 | if (wbc->range_cyclic) { | ||
603 | index = mapping->writeback_index; /* Start from prev offset */ | ||
604 | end = -1; | ||
605 | } else { | ||
606 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
607 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
608 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
609 | range_whole = 1; | ||
610 | scanned = 1; | ||
611 | } | ||
612 | retry: | ||
613 | while (!done && (index <= end) && | ||
614 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
615 | PAGECACHE_TAG_DIRTY, | ||
616 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | ||
617 | unsigned i; | ||
618 | |||
619 | scanned = 1; | ||
620 | for (i = 0; i < nr_pages; i++) { | ||
621 | struct page *page = pvec.pages[i]; | ||
622 | |||
623 | /* | ||
624 | * At this point we hold neither mapping->tree_lock nor | ||
625 | * lock on the page itself: the page may be truncated or | ||
626 | * invalidated (changing page->mapping to NULL), or even | ||
627 | * swizzled back from swapper_space to tmpfs file | ||
628 | * mapping | ||
629 | */ | ||
630 | lock_page(page); | ||
631 | |||
632 | if (unlikely(page->mapping != mapping)) { | ||
633 | unlock_page(page); | ||
634 | continue; | ||
635 | } | ||
636 | |||
637 | if (!wbc->range_cyclic && page->index > end) { | ||
638 | done = 1; | ||
639 | unlock_page(page); | ||
640 | continue; | ||
641 | } | ||
642 | |||
643 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
644 | wait_on_page_writeback(page); | ||
645 | |||
646 | if (PageWriteback(page) || | ||
647 | !clear_page_dirty_for_io(page)) { | ||
648 | unlock_page(page); | ||
649 | continue; | ||
650 | } | ||
651 | |||
652 | ret = (*writepage)(page, wbc); | ||
653 | if (ret) { | ||
654 | if (ret == -ENOSPC) | ||
655 | set_bit(AS_ENOSPC, &mapping->flags); | ||
656 | else | ||
657 | set_bit(AS_EIO, &mapping->flags); | ||
658 | } | ||
659 | |||
660 | if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) | ||
661 | unlock_page(page); | ||
662 | if (ret || (--(wbc->nr_to_write) <= 0)) | ||
663 | done = 1; | ||
664 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
665 | wbc->encountered_congestion = 1; | ||
666 | done = 1; | ||
667 | } | ||
668 | } | ||
669 | pagevec_release(&pvec); | ||
670 | cond_resched(); | ||
671 | } | ||
672 | if (!scanned && !done) { | ||
673 | /* | ||
674 | * We hit the last page and there is more work to be done: wrap | ||
675 | * back to the start of the file | ||
676 | */ | ||
677 | scanned = 1; | ||
678 | index = 0; | ||
679 | goto retry; | ||
680 | } | ||
681 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | ||
682 | mapping->writeback_index = index; | ||
683 | return ret; | ||
684 | } | ||
685 | |||
686 | EXPORT_SYMBOL(generic_writepages); | ||
687 | |||
556 | int do_writepages(struct address_space *mapping, struct writeback_control *wbc) | 688 | int do_writepages(struct address_space *mapping, struct writeback_control *wbc) |
557 | { | 689 | { |
558 | int ret; | 690 | int ret; |
@@ -675,9 +807,11 @@ int fastcall set_page_dirty(struct page *page) | |||
675 | 807 | ||
676 | if (likely(mapping)) { | 808 | if (likely(mapping)) { |
677 | int (*spd)(struct page *) = mapping->a_ops->set_page_dirty; | 809 | int (*spd)(struct page *) = mapping->a_ops->set_page_dirty; |
678 | if (spd) | 810 | #ifdef CONFIG_BLOCK |
679 | return (*spd)(page); | 811 | if (!spd) |
680 | return __set_page_dirty_buffers(page); | 812 | spd = __set_page_dirty_buffers; |
813 | #endif | ||
814 | return (*spd)(page); | ||
681 | } | 815 | } |
682 | if (!PageDirty(page)) { | 816 | if (!PageDirty(page)) { |
683 | if (!TestSetPageDirty(page)) | 817 | if (!TestSetPageDirty(page)) |