diff options
author | Theodore Ts'o <tytso@mit.edu> | 2010-05-16 18:00:00 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2010-05-16 18:00:00 -0400 |
commit | 8e48dcfbd7c0892b4cfd064d682cc4c95a29df32 (patch) | |
tree | 653f12cc4eea9606c80142752612ad28f1ccdb45 /fs/ext4/inode.c | |
parent | 39a4bade8c1826b658316d66ee81c09b0a4d7d42 (diff) |
ext4: Use our own write_cache_pages()
Make a copy of write_cache_pages() for the benefit of
ext4_da_writepages(). This allows us to simplify the code some, and
will allow us to further customize the code in future patches.
There are some nasty hacks in write_cache_pages(), which Linus has
(correctly) characterized as vile. I've just copied it into
write_cache_pages_da(), without trying to clean those bits up lest I
break something in the ext4's delalloc implementation, which is a bit
fragile right now. This will allow Dave Chinner to clean up
write_cache_pages() in mm/page-writeback.c, without worrying about
breaking ext4. Eventually write_cache_pages_da() will go away when I
rewrite ext4's delayed allocation and create a general
ext4_writepages() which is used for all of ext4's writeback. Until
now this is the lowest risk way to clean up the core
write_cache_pages() function.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 141 |
1 files changed, 119 insertions, 22 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6aa0442811d2..830336d3911b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2426,17 +2426,6 @@ static int __mpage_da_writepage(struct page *page, | |||
2426 | struct buffer_head *bh, *head; | 2426 | struct buffer_head *bh, *head; |
2427 | sector_t logical; | 2427 | sector_t logical; |
2428 | 2428 | ||
2429 | if (mpd->io_done) { | ||
2430 | /* | ||
2431 | * Rest of the page in the page_vec | ||
2432 | * redirty then and skip then. We will | ||
2433 | * try to write them again after | ||
2434 | * starting a new transaction | ||
2435 | */ | ||
2436 | redirty_page_for_writepage(wbc, page); | ||
2437 | unlock_page(page); | ||
2438 | return MPAGE_DA_EXTENT_TAIL; | ||
2439 | } | ||
2440 | /* | 2429 | /* |
2441 | * Can we merge this page to current extent? | 2430 | * Can we merge this page to current extent? |
2442 | */ | 2431 | */ |
@@ -2831,6 +2820,124 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2831 | return ext4_chunk_trans_blocks(inode, max_blocks); | 2820 | return ext4_chunk_trans_blocks(inode, max_blocks); |
2832 | } | 2821 | } |
2833 | 2822 | ||
2823 | /* | ||
2824 | * write_cache_pages_da - walk the list of dirty pages of the given | ||
2825 | * address space and call the callback function (which usually writes | ||
2826 | * the pages). | ||
2827 | * | ||
2828 | * This is a forked version of write_cache_pages(). Differences: | ||
2829 | * Range cyclic is ignored. | ||
2830 | * no_nrwrite_index_update is always presumed true | ||
2831 | */ | ||
2832 | static int write_cache_pages_da(struct address_space *mapping, | ||
2833 | struct writeback_control *wbc, | ||
2834 | struct mpage_da_data *mpd) | ||
2835 | { | ||
2836 | int ret = 0; | ||
2837 | int done = 0; | ||
2838 | struct pagevec pvec; | ||
2839 | int nr_pages; | ||
2840 | pgoff_t index; | ||
2841 | pgoff_t end; /* Inclusive */ | ||
2842 | long nr_to_write = wbc->nr_to_write; | ||
2843 | |||
2844 | pagevec_init(&pvec, 0); | ||
2845 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
2846 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2847 | |||
2848 | while (!done && (index <= end)) { | ||
2849 | int i; | ||
2850 | |||
2851 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
2852 | PAGECACHE_TAG_DIRTY, | ||
2853 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
2854 | if (nr_pages == 0) | ||
2855 | break; | ||
2856 | |||
2857 | for (i = 0; i < nr_pages; i++) { | ||
2858 | struct page *page = pvec.pages[i]; | ||
2859 | |||
2860 | /* | ||
2861 | * At this point, the page may be truncated or | ||
2862 | * invalidated (changing page->mapping to NULL), or | ||
2863 | * even swizzled back from swapper_space to tmpfs file | ||
2864 | * mapping. However, page->index will not change | ||
2865 | * because we have a reference on the page. | ||
2866 | */ | ||
2867 | if (page->index > end) { | ||
2868 | done = 1; | ||
2869 | break; | ||
2870 | } | ||
2871 | |||
2872 | lock_page(page); | ||
2873 | |||
2874 | /* | ||
2875 | * Page truncated or invalidated. We can freely skip it | ||
2876 | * then, even for data integrity operations: the page | ||
2877 | * has disappeared concurrently, so there could be no | ||
2878 | * real expectation of this data interity operation | ||
2879 | * even if there is now a new, dirty page at the same | ||
2880 | * pagecache address. | ||
2881 | */ | ||
2882 | if (unlikely(page->mapping != mapping)) { | ||
2883 | continue_unlock: | ||
2884 | unlock_page(page); | ||
2885 | continue; | ||
2886 | } | ||
2887 | |||
2888 | if (!PageDirty(page)) { | ||
2889 | /* someone wrote it for us */ | ||
2890 | goto continue_unlock; | ||
2891 | } | ||
2892 | |||
2893 | if (PageWriteback(page)) { | ||
2894 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
2895 | wait_on_page_writeback(page); | ||
2896 | else | ||
2897 | goto continue_unlock; | ||
2898 | } | ||
2899 | |||
2900 | BUG_ON(PageWriteback(page)); | ||
2901 | if (!clear_page_dirty_for_io(page)) | ||
2902 | goto continue_unlock; | ||
2903 | |||
2904 | ret = __mpage_da_writepage(page, wbc, mpd); | ||
2905 | if (unlikely(ret)) { | ||
2906 | if (ret == AOP_WRITEPAGE_ACTIVATE) { | ||
2907 | unlock_page(page); | ||
2908 | ret = 0; | ||
2909 | } else { | ||
2910 | done = 1; | ||
2911 | break; | ||
2912 | } | ||
2913 | } | ||
2914 | |||
2915 | if (nr_to_write > 0) { | ||
2916 | nr_to_write--; | ||
2917 | if (nr_to_write == 0 && | ||
2918 | wbc->sync_mode == WB_SYNC_NONE) { | ||
2919 | /* | ||
2920 | * We stop writing back only if we are | ||
2921 | * not doing integrity sync. In case of | ||
2922 | * integrity sync we have to keep going | ||
2923 | * because someone may be concurrently | ||
2924 | * dirtying pages, and we might have | ||
2925 | * synced a lot of newly appeared dirty | ||
2926 | * pages, but have not synced all of the | ||
2927 | * old dirty pages. | ||
2928 | */ | ||
2929 | done = 1; | ||
2930 | break; | ||
2931 | } | ||
2932 | } | ||
2933 | } | ||
2934 | pagevec_release(&pvec); | ||
2935 | cond_resched(); | ||
2936 | } | ||
2937 | return ret; | ||
2938 | } | ||
2939 | |||
2940 | |||
2834 | static int ext4_da_writepages(struct address_space *mapping, | 2941 | static int ext4_da_writepages(struct address_space *mapping, |
2835 | struct writeback_control *wbc) | 2942 | struct writeback_control *wbc) |
2836 | { | 2943 | { |
@@ -2839,7 +2946,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2839 | handle_t *handle = NULL; | 2946 | handle_t *handle = NULL; |
2840 | struct mpage_da_data mpd; | 2947 | struct mpage_da_data mpd; |
2841 | struct inode *inode = mapping->host; | 2948 | struct inode *inode = mapping->host; |
2842 | int no_nrwrite_index_update; | ||
2843 | int pages_written = 0; | 2949 | int pages_written = 0; |
2844 | long pages_skipped; | 2950 | long pages_skipped; |
2845 | unsigned int max_pages; | 2951 | unsigned int max_pages; |
@@ -2919,12 +3025,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2919 | mpd.wbc = wbc; | 3025 | mpd.wbc = wbc; |
2920 | mpd.inode = mapping->host; | 3026 | mpd.inode = mapping->host; |
2921 | 3027 | ||
2922 | /* | ||
2923 | * we don't want write_cache_pages to update | ||
2924 | * nr_to_write and writeback_index | ||
2925 | */ | ||
2926 | no_nrwrite_index_update = wbc->no_nrwrite_index_update; | ||
2927 | wbc->no_nrwrite_index_update = 1; | ||
2928 | pages_skipped = wbc->pages_skipped; | 3028 | pages_skipped = wbc->pages_skipped; |
2929 | 3029 | ||
2930 | retry: | 3030 | retry: |
@@ -2966,8 +3066,7 @@ retry: | |||
2966 | mpd.io_done = 0; | 3066 | mpd.io_done = 0; |
2967 | mpd.pages_written = 0; | 3067 | mpd.pages_written = 0; |
2968 | mpd.retval = 0; | 3068 | mpd.retval = 0; |
2969 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, | 3069 | ret = write_cache_pages_da(mapping, wbc, &mpd); |
2970 | &mpd); | ||
2971 | /* | 3070 | /* |
2972 | * If we have a contiguous extent of pages and we | 3071 | * If we have a contiguous extent of pages and we |
2973 | * haven't done the I/O yet, map the blocks and submit | 3072 | * haven't done the I/O yet, map the blocks and submit |
@@ -3033,8 +3132,6 @@ retry: | |||
3033 | mapping->writeback_index = index; | 3132 | mapping->writeback_index = index; |
3034 | 3133 | ||
3035 | out_writepages: | 3134 | out_writepages: |
3036 | if (!no_nrwrite_index_update) | ||
3037 | wbc->no_nrwrite_index_update = 0; | ||
3038 | wbc->nr_to_write -= nr_to_writebump; | 3135 | wbc->nr_to_write -= nr_to_writebump; |
3039 | wbc->range_start = range_start; | 3136 | wbc->range_start = range_start; |
3040 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 3137 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |