aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Sandeen <sandeen@redhat.com>2010-10-27 21:30:13 -0400
committerTheodore Ts'o <tytso@mit.edu>2010-10-27 21:30:13 -0400
commit5b41d92437f1ae19b3f3ffa3b16589fd5df50ac0 (patch)
treeed7e1999a2349811e3cae0f0bdd53a93aea413d7
parentbbd08344e3df8c7c1d7aa04bc0c8c9367806e12d (diff)
ext4: implement writeback livelock avoidance using page tagging
This is analogous to Jan Kara's commit, f446daaea9d4a420d16c606f755f3689dcb2d0ce mm: implement writeback livelock avoidance using page tagging but since we forked write_cache_pages, we need to reimplement it there (and in ext4_da_writepages, since range_cyclic handling was moved to there) If you start a large buffered IO to a file, and then set fsync after it, you'll find that fsync does not complete until the other IO stops. If you continue re-dirtying the file (say, putting dd with conv=notrunc in a loop), when fsync finally completes (after all IO is done), it reports via tracing that it has written many more pages than the file contains; in other words it has synced and re-synced pages in the file multiple times. This then leads to problems with our writeback_index update, since it advances it by pages written, and essentially sets writeback_index off the end of the file... With the following patch, we only sync as much as was dirty at the time of the sync. Signed-off-by: Eric Sandeen <sandeen@redhat.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/inode.c18
-rw-r--r--include/linux/writeback.h2
2 files changed, 17 insertions, 3 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6671fcbb5293..c9ea95ba5fde 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2809,16 +2809,21 @@ static int write_cache_pages_da(struct address_space *mapping,
2809 pgoff_t index; 2809 pgoff_t index;
2810 pgoff_t end; /* Inclusive */ 2810 pgoff_t end; /* Inclusive */
2811 long nr_to_write = wbc->nr_to_write; 2811 long nr_to_write = wbc->nr_to_write;
2812 int tag;
2812 2813
2813 pagevec_init(&pvec, 0); 2814 pagevec_init(&pvec, 0);
2814 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2815 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2815 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2816 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2816 2817
2818 if (wbc->sync_mode == WB_SYNC_ALL)
2819 tag = PAGECACHE_TAG_TOWRITE;
2820 else
2821 tag = PAGECACHE_TAG_DIRTY;
2822
2817 while (!done && (index <= end)) { 2823 while (!done && (index <= end)) {
2818 int i; 2824 int i;
2819 2825
2820 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 2826 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2821 PAGECACHE_TAG_DIRTY,
2822 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 2827 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2823 if (nr_pages == 0) 2828 if (nr_pages == 0)
2824 break; 2829 break;
@@ -2923,6 +2928,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2923 long desired_nr_to_write, nr_to_writebump = 0; 2928 long desired_nr_to_write, nr_to_writebump = 0;
2924 loff_t range_start = wbc->range_start; 2929 loff_t range_start = wbc->range_start;
2925 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2930 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2931 pgoff_t end;
2926 2932
2927 trace_ext4_da_writepages(inode, wbc); 2933 trace_ext4_da_writepages(inode, wbc);
2928 2934
@@ -2958,8 +2964,11 @@ static int ext4_da_writepages(struct address_space *mapping,
2958 wbc->range_start = index << PAGE_CACHE_SHIFT; 2964 wbc->range_start = index << PAGE_CACHE_SHIFT;
2959 wbc->range_end = LLONG_MAX; 2965 wbc->range_end = LLONG_MAX;
2960 wbc->range_cyclic = 0; 2966 wbc->range_cyclic = 0;
2961 } else 2967 end = -1;
2968 } else {
2962 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2969 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2970 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2971 }
2963 2972
2964 /* 2973 /*
2965 * This works around two forms of stupidity. The first is in 2974 * This works around two forms of stupidity. The first is in
@@ -3000,6 +3009,9 @@ static int ext4_da_writepages(struct address_space *mapping,
3000 pages_skipped = wbc->pages_skipped; 3009 pages_skipped = wbc->pages_skipped;
3001 3010
3002retry: 3011retry:
3012 if (wbc->sync_mode == WB_SYNC_ALL)
3013 tag_pages_for_writeback(mapping, index, end);
3014
3003 while (!ret && wbc->nr_to_write > 0) { 3015 while (!ret && wbc->nr_to_write > 0) {
3004 3016
3005 /* 3017 /*
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 72a5d647a5f2..3d132bfb4f3d 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -143,6 +143,8 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
143 143
144int generic_writepages(struct address_space *mapping, 144int generic_writepages(struct address_space *mapping,
145 struct writeback_control *wbc); 145 struct writeback_control *wbc);
146void tag_pages_for_writeback(struct address_space *mapping,
147 pgoff_t start, pgoff_t end);
146int write_cache_pages(struct address_space *mapping, 148int write_cache_pages(struct address_space *mapping,
147 struct writeback_control *wbc, writepage_t writepage, 149 struct writeback_control *wbc, writepage_t writepage,
148 void *data); 150 void *data);