diff options
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r-- | mm/page-writeback.c | 70 |
1 files changed, 53 insertions, 17 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 37498ef61548..df8202ebc7b8 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -805,6 +805,41 @@ void __init page_writeback_init(void) | |||
805 | } | 805 | } |
806 | 806 | ||
807 | /** | 807 | /** |
808 | * tag_pages_for_writeback - tag pages to be written by write_cache_pages | ||
809 | * @mapping: address space structure to write | ||
810 | * @start: starting page index | ||
811 | * @end: ending page index (inclusive) | ||
812 | * | ||
813 | * This function scans the page range from @start to @end (inclusive) and tags | ||
814 | * all pages that have DIRTY tag set with a special TOWRITE tag. The idea is | ||
815 | * that write_cache_pages (or whoever calls this function) will then use | ||
816 | * TOWRITE tag to identify pages eligible for writeback. This mechanism is | ||
817 | * used to avoid livelocking of writeback by a process steadily creating new | ||
818 | * dirty pages in the file (thus it is important for this function to be quick | ||
819 | * so that it can tag pages faster than a dirtying process can create them). | ||
820 | */ | ||
821 | /* | ||
822 | * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce tree_lock latency. | ||
823 | */ | ||
824 | #define WRITEBACK_TAG_BATCH 4096 | ||
825 | void tag_pages_for_writeback(struct address_space *mapping, | ||
826 | pgoff_t start, pgoff_t end) | ||
827 | { | ||
828 | unsigned long tagged; | ||
829 | |||
830 | do { | ||
831 | spin_lock_irq(&mapping->tree_lock); | ||
832 | tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree, | ||
833 | &start, end, WRITEBACK_TAG_BATCH, | ||
834 | PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE); | ||
835 | spin_unlock_irq(&mapping->tree_lock); | ||
836 | WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH); | ||
837 | cond_resched(); | ||
838 | } while (tagged >= WRITEBACK_TAG_BATCH); | ||
839 | } | ||
840 | EXPORT_SYMBOL(tag_pages_for_writeback); | ||
841 | |||
842 | /** | ||
808 | * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. | 843 | * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. |
809 | * @mapping: address space structure to write | 844 | * @mapping: address space structure to write |
810 | * @wbc: subtract the number of written pages from *@wbc->nr_to_write | 845 | * @wbc: subtract the number of written pages from *@wbc->nr_to_write |
@@ -818,6 +853,13 @@ void __init page_writeback_init(void) | |||
818 | * the call was made get new I/O started against them. If wbc->sync_mode is | 853 | * the call was made get new I/O started against them. If wbc->sync_mode is |
819 | * WB_SYNC_ALL then we were called for data integrity and we must wait for | 854 | * WB_SYNC_ALL then we were called for data integrity and we must wait for |
820 | * existing IO to complete. | 855 | * existing IO to complete. |
856 | * | ||
857 | * To avoid livelocks (when other process dirties new pages), we first tag | ||
858 | * pages which should be written back with TOWRITE tag and only then start | ||
859 | * writing them. For data-integrity sync we have to be careful so that we do | ||
860 | * not miss some pages (e.g., because some other process has cleared TOWRITE | ||
861 | * tag we set). The rule we follow is that TOWRITE tag can be cleared only | ||
862 | * by the process clearing the DIRTY tag (and submitting the page for IO). | ||
821 | */ | 863 | */ |
822 | int write_cache_pages(struct address_space *mapping, | 864 | int write_cache_pages(struct address_space *mapping, |
823 | struct writeback_control *wbc, writepage_t writepage, | 865 | struct writeback_control *wbc, writepage_t writepage, |
@@ -833,6 +875,7 @@ int write_cache_pages(struct address_space *mapping, | |||
833 | pgoff_t done_index; | 875 | pgoff_t done_index; |
834 | int cycled; | 876 | int cycled; |
835 | int range_whole = 0; | 877 | int range_whole = 0; |
878 | int tag; | ||
836 | 879 | ||
837 | pagevec_init(&pvec, 0); | 880 | pagevec_init(&pvec, 0); |
838 | if (wbc->range_cyclic) { | 881 | if (wbc->range_cyclic) { |
@@ -849,29 +892,19 @@ int write_cache_pages(struct address_space *mapping, | |||
849 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 892 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
850 | range_whole = 1; | 893 | range_whole = 1; |
851 | cycled = 1; /* ignore range_cyclic tests */ | 894 | cycled = 1; /* ignore range_cyclic tests */ |
852 | |||
853 | /* | ||
854 | * If this is a data integrity sync, cap the writeback to the | ||
855 | * current end of file. Any extension to the file that occurs | ||
856 | * after this is a new write and we don't need to write those | ||
857 | * pages out to fulfil our data integrity requirements. If we | ||
858 | * try to write them out, we can get stuck in this scan until | ||
859 | * the concurrent writer stops adding dirty pages and extending | ||
860 | * EOF. | ||
861 | */ | ||
862 | if (wbc->sync_mode == WB_SYNC_ALL && | ||
863 | wbc->range_end == LLONG_MAX) { | ||
864 | end = i_size_read(mapping->host) >> PAGE_CACHE_SHIFT; | ||
865 | } | ||
866 | } | 895 | } |
867 | 896 | if (wbc->sync_mode == WB_SYNC_ALL) | |
897 | tag = PAGECACHE_TAG_TOWRITE; | ||
898 | else | ||
899 | tag = PAGECACHE_TAG_DIRTY; | ||
868 | retry: | 900 | retry: |
901 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
902 | tag_pages_for_writeback(mapping, index, end); | ||
869 | done_index = index; | 903 | done_index = index; |
870 | while (!done && (index <= end)) { | 904 | while (!done && (index <= end)) { |
871 | int i; | 905 | int i; |
872 | 906 | ||
873 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 907 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
874 | PAGECACHE_TAG_DIRTY, | ||
875 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 908 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
876 | if (nr_pages == 0) | 909 | if (nr_pages == 0) |
877 | break; | 910 | break; |
@@ -1327,6 +1360,9 @@ int test_set_page_writeback(struct page *page) | |||
1327 | radix_tree_tag_clear(&mapping->page_tree, | 1360 | radix_tree_tag_clear(&mapping->page_tree, |
1328 | page_index(page), | 1361 | page_index(page), |
1329 | PAGECACHE_TAG_DIRTY); | 1362 | PAGECACHE_TAG_DIRTY); |
1363 | radix_tree_tag_clear(&mapping->page_tree, | ||
1364 | page_index(page), | ||
1365 | PAGECACHE_TAG_TOWRITE); | ||
1330 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 1366 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
1331 | } else { | 1367 | } else { |
1332 | ret = TestSetPageWriteback(page); | 1368 | ret = TestSetPageWriteback(page); |