diff options
author | Eric Sandeen <sandeen@redhat.com> | 2010-10-27 21:30:13 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2010-10-27 21:30:13 -0400 |
commit | 5b41d92437f1ae19b3f3ffa3b16589fd5df50ac0 (patch) | |
tree | ed7e1999a2349811e3cae0f0bdd53a93aea413d7 | |
parent | bbd08344e3df8c7c1d7aa04bc0c8c9367806e12d (diff) |
ext4: implement writeback livelock avoidance using page tagging
This is analogous to Jan Kara's commit,
f446daaea9d4a420d16c606f755f3689dcb2d0ce
mm: implement writeback livelock avoidance using page tagging
but since we forked write_cache_pages, we need to reimplement
it there (and in ext4_da_writepages, since range_cyclic handling
was moved to there)
If you start a large buffered IO to a file, and then set
fsync after it, you'll find that fsync does not complete
until the other IO stops.
If you continue re-dirtying the file (say, putting dd
with conv=notrunc in a loop), when fsync finally completes
(after all IO is done), it reports via tracing that
it has written many more pages than the file contains;
in other words it has synced and re-synced pages in
the file multiple times.
This then leads to problems with our writeback_index
update, since it advances it by pages written, and
essentially sets writeback_index off the end of the
file...
With the following patch, we only sync as much as was
dirty at the time of the sync.
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | fs/ext4/inode.c | 18 | ||||
-rw-r--r-- | include/linux/writeback.h | 2 |
2 files changed, 17 insertions, 3 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6671fcbb5293..c9ea95ba5fde 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2809,16 +2809,21 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2809 | pgoff_t index; | 2809 | pgoff_t index; |
2810 | pgoff_t end; /* Inclusive */ | 2810 | pgoff_t end; /* Inclusive */ |
2811 | long nr_to_write = wbc->nr_to_write; | 2811 | long nr_to_write = wbc->nr_to_write; |
2812 | int tag; | ||
2812 | 2813 | ||
2813 | pagevec_init(&pvec, 0); | 2814 | pagevec_init(&pvec, 0); |
2814 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2815 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2815 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2816 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2816 | 2817 | ||
2818 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2819 | tag = PAGECACHE_TAG_TOWRITE; | ||
2820 | else | ||
2821 | tag = PAGECACHE_TAG_DIRTY; | ||
2822 | |||
2817 | while (!done && (index <= end)) { | 2823 | while (!done && (index <= end)) { |
2818 | int i; | 2824 | int i; |
2819 | 2825 | ||
2820 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2826 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2821 | PAGECACHE_TAG_DIRTY, | ||
2822 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2827 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2823 | if (nr_pages == 0) | 2828 | if (nr_pages == 0) |
2824 | break; | 2829 | break; |
@@ -2923,6 +2928,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2923 | long desired_nr_to_write, nr_to_writebump = 0; | 2928 | long desired_nr_to_write, nr_to_writebump = 0; |
2924 | loff_t range_start = wbc->range_start; | 2929 | loff_t range_start = wbc->range_start; |
2925 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2930 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2931 | pgoff_t end; | ||
2926 | 2932 | ||
2927 | trace_ext4_da_writepages(inode, wbc); | 2933 | trace_ext4_da_writepages(inode, wbc); |
2928 | 2934 | ||
@@ -2958,8 +2964,11 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2958 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2964 | wbc->range_start = index << PAGE_CACHE_SHIFT; |
2959 | wbc->range_end = LLONG_MAX; | 2965 | wbc->range_end = LLONG_MAX; |
2960 | wbc->range_cyclic = 0; | 2966 | wbc->range_cyclic = 0; |
2961 | } else | 2967 | end = -1; |
2968 | } else { | ||
2962 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2969 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2970 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2971 | } | ||
2963 | 2972 | ||
2964 | /* | 2973 | /* |
2965 | * This works around two forms of stupidity. The first is in | 2974 | * This works around two forms of stupidity. The first is in |
@@ -3000,6 +3009,9 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3000 | pages_skipped = wbc->pages_skipped; | 3009 | pages_skipped = wbc->pages_skipped; |
3001 | 3010 | ||
3002 | retry: | 3011 | retry: |
3012 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
3013 | tag_pages_for_writeback(mapping, index, end); | ||
3014 | |||
3003 | while (!ret && wbc->nr_to_write > 0) { | 3015 | while (!ret && wbc->nr_to_write > 0) { |
3004 | 3016 | ||
3005 | /* | 3017 | /* |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..3d132bfb4f3d 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -143,6 +143,8 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, | |||
143 | 143 | ||
144 | int generic_writepages(struct address_space *mapping, | 144 | int generic_writepages(struct address_space *mapping, |
145 | struct writeback_control *wbc); | 145 | struct writeback_control *wbc); |
146 | void tag_pages_for_writeback(struct address_space *mapping, | ||
147 | pgoff_t start, pgoff_t end); | ||
146 | int write_cache_pages(struct address_space *mapping, | 148 | int write_cache_pages(struct address_space *mapping, |
147 | struct writeback_control *wbc, writepage_t writepage, | 149 | struct writeback_control *wbc, writepage_t writepage, |
148 | void *data); | 150 | void *data); |