aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r--mm/page-writeback.c70
1 files changed, 53 insertions, 17 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 37498ef61548..df8202ebc7b8 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -805,6 +805,41 @@ void __init page_writeback_init(void)
805} 805}
806 806
807/** 807/**
808 * tag_pages_for_writeback - tag pages to be written by write_cache_pages
809 * @mapping: address space structure to write
810 * @start: starting page index
811 * @end: ending page index (inclusive)
812 *
813 * This function scans the page range from @start to @end (inclusive) and tags
814 * all pages that have DIRTY tag set with a special TOWRITE tag. The idea is
815 * that write_cache_pages (or whoever calls this function) will then use
816 * TOWRITE tag to identify pages eligible for writeback. This mechanism is
817 * used to avoid livelocking of writeback by a process steadily creating new
818 * dirty pages in the file (thus it is important for this function to be quick
819 * so that it can tag pages faster than a dirtying process can create them).
820 */
821/*
822 * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce tree_lock latency.
823 */
824#define WRITEBACK_TAG_BATCH 4096
825void tag_pages_for_writeback(struct address_space *mapping,
826 pgoff_t start, pgoff_t end)
827{
828 unsigned long tagged;
829
830 do {
831 spin_lock_irq(&mapping->tree_lock);
832 tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree,
833 &start, end, WRITEBACK_TAG_BATCH,
834 PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
835 spin_unlock_irq(&mapping->tree_lock);
836 WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH);
837 cond_resched();
838 } while (tagged >= WRITEBACK_TAG_BATCH);
839}
840EXPORT_SYMBOL(tag_pages_for_writeback);
841
842/**
808 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. 843 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
809 * @mapping: address space structure to write 844 * @mapping: address space structure to write
810 * @wbc: subtract the number of written pages from *@wbc->nr_to_write 845 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
@@ -818,6 +853,13 @@ void __init page_writeback_init(void)
818 * the call was made get new I/O started against them. If wbc->sync_mode is 853 * the call was made get new I/O started against them. If wbc->sync_mode is
819 * WB_SYNC_ALL then we were called for data integrity and we must wait for 854 * WB_SYNC_ALL then we were called for data integrity and we must wait for
820 * existing IO to complete. 855 * existing IO to complete.
856 *
857 * To avoid livelocks (when other process dirties new pages), we first tag
858 * pages which should be written back with TOWRITE tag and only then start
859 * writing them. For data-integrity sync we have to be careful so that we do
860 * not miss some pages (e.g., because some other process has cleared TOWRITE
861 * tag we set). The rule we follow is that TOWRITE tag can be cleared only
862 * by the process clearing the DIRTY tag (and submitting the page for IO).
821 */ 863 */
822int write_cache_pages(struct address_space *mapping, 864int write_cache_pages(struct address_space *mapping,
823 struct writeback_control *wbc, writepage_t writepage, 865 struct writeback_control *wbc, writepage_t writepage,
@@ -833,6 +875,7 @@ int write_cache_pages(struct address_space *mapping,
833 pgoff_t done_index; 875 pgoff_t done_index;
834 int cycled; 876 int cycled;
835 int range_whole = 0; 877 int range_whole = 0;
878 int tag;
836 879
837 pagevec_init(&pvec, 0); 880 pagevec_init(&pvec, 0);
838 if (wbc->range_cyclic) { 881 if (wbc->range_cyclic) {
@@ -849,29 +892,19 @@ int write_cache_pages(struct address_space *mapping,
849 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 892 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
850 range_whole = 1; 893 range_whole = 1;
851 cycled = 1; /* ignore range_cyclic tests */ 894 cycled = 1; /* ignore range_cyclic tests */
852
853 /*
854 * If this is a data integrity sync, cap the writeback to the
855 * current end of file. Any extension to the file that occurs
856 * after this is a new write and we don't need to write those
857 * pages out to fulfil our data integrity requirements. If we
858 * try to write them out, we can get stuck in this scan until
859 * the concurrent writer stops adding dirty pages and extending
860 * EOF.
861 */
862 if (wbc->sync_mode == WB_SYNC_ALL &&
863 wbc->range_end == LLONG_MAX) {
864 end = i_size_read(mapping->host) >> PAGE_CACHE_SHIFT;
865 }
866 } 895 }
867 896 if (wbc->sync_mode == WB_SYNC_ALL)
897 tag = PAGECACHE_TAG_TOWRITE;
898 else
899 tag = PAGECACHE_TAG_DIRTY;
868retry: 900retry:
901 if (wbc->sync_mode == WB_SYNC_ALL)
902 tag_pages_for_writeback(mapping, index, end);
869 done_index = index; 903 done_index = index;
870 while (!done && (index <= end)) { 904 while (!done && (index <= end)) {
871 int i; 905 int i;
872 906
873 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 907 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
874 PAGECACHE_TAG_DIRTY,
875 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 908 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
876 if (nr_pages == 0) 909 if (nr_pages == 0)
877 break; 910 break;
@@ -1327,6 +1360,9 @@ int test_set_page_writeback(struct page *page)
1327 radix_tree_tag_clear(&mapping->page_tree, 1360 radix_tree_tag_clear(&mapping->page_tree,
1328 page_index(page), 1361 page_index(page),
1329 PAGECACHE_TAG_DIRTY); 1362 PAGECACHE_TAG_DIRTY);
1363 radix_tree_tag_clear(&mapping->page_tree,
1364 page_index(page),
1365 PAGECACHE_TAG_TOWRITE);
1330 spin_unlock_irqrestore(&mapping->tree_lock, flags); 1366 spin_unlock_irqrestore(&mapping->tree_lock, flags);
1331 } else { 1367 } else {
1332 ret = TestSetPageWriteback(page); 1368 ret = TestSetPageWriteback(page);