diff options
| author | Theodore Ts'o <tytso@mit.edu> | 2010-05-16 18:00:00 -0400 | 
|---|---|---|
| committer | Theodore Ts'o <tytso@mit.edu> | 2010-05-16 18:00:00 -0400 | 
| commit | 8e48dcfbd7c0892b4cfd064d682cc4c95a29df32 (patch) | |
| tree | 653f12cc4eea9606c80142752612ad28f1ccdb45 | |
| parent | 39a4bade8c1826b658316d66ee81c09b0a4d7d42 (diff) | |
ext4: Use our own write_cache_pages()
Make a copy of write_cache_pages() for the benefit of
ext4_da_writepages().  This allows us to simplify the code some, and
will allow us to further customize the code in future patches.
There are some nasty hacks in write_cache_pages(), which Linus has
(correctly) characterized as vile.  I've just copied it into
write_cache_pages_da(), without trying to clean those bits up lest I
break something in the ext4's delalloc implementation, which is a bit
fragile right now.  This will allow Dave Chinner to clean up
write_cache_pages() in mm/page-writeback.c, without worrying about
breaking ext4.  Eventually write_cache_pages_da() will go away when I
rewrite ext4's delayed allocation and create a general
ext4_writepages() which is used for all of ext4's writeback.  Until
now this is the lowest risk way to clean up the core
write_cache_pages() function.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Dave Chinner <david@fromorbit.com>
| -rw-r--r-- | fs/ext4/inode.c | 141 | 
1 files changed, 119 insertions, 22 deletions
| diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6aa0442811d2..830336d3911b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -2426,17 +2426,6 @@ static int __mpage_da_writepage(struct page *page, | |||
| 2426 | struct buffer_head *bh, *head; | 2426 | struct buffer_head *bh, *head; | 
| 2427 | sector_t logical; | 2427 | sector_t logical; | 
| 2428 | 2428 | ||
| 2429 | if (mpd->io_done) { | ||
| 2430 | /* | ||
| 2431 | * Rest of the page in the page_vec | ||
| 2432 | * redirty then and skip then. We will | ||
| 2433 | * try to write them again after | ||
| 2434 | * starting a new transaction | ||
| 2435 | */ | ||
| 2436 | redirty_page_for_writepage(wbc, page); | ||
| 2437 | unlock_page(page); | ||
| 2438 | return MPAGE_DA_EXTENT_TAIL; | ||
| 2439 | } | ||
| 2440 | /* | 2429 | /* | 
| 2441 | * Can we merge this page to current extent? | 2430 | * Can we merge this page to current extent? | 
| 2442 | */ | 2431 | */ | 
| @@ -2831,6 +2820,124 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
| 2831 | return ext4_chunk_trans_blocks(inode, max_blocks); | 2820 | return ext4_chunk_trans_blocks(inode, max_blocks); | 
| 2832 | } | 2821 | } | 
| 2833 | 2822 | ||
| 2823 | /* | ||
| 2824 | * write_cache_pages_da - walk the list of dirty pages of the given | ||
| 2825 | * address space and call the callback function (which usually writes | ||
| 2826 | * the pages). | ||
| 2827 | * | ||
| 2828 | * This is a forked version of write_cache_pages(). Differences: | ||
| 2829 | * Range cyclic is ignored. | ||
| 2830 | * no_nrwrite_index_update is always presumed true | ||
| 2831 | */ | ||
| 2832 | static int write_cache_pages_da(struct address_space *mapping, | ||
| 2833 | struct writeback_control *wbc, | ||
| 2834 | struct mpage_da_data *mpd) | ||
| 2835 | { | ||
| 2836 | int ret = 0; | ||
| 2837 | int done = 0; | ||
| 2838 | struct pagevec pvec; | ||
| 2839 | int nr_pages; | ||
| 2840 | pgoff_t index; | ||
| 2841 | pgoff_t end; /* Inclusive */ | ||
| 2842 | long nr_to_write = wbc->nr_to_write; | ||
| 2843 | |||
| 2844 | pagevec_init(&pvec, 0); | ||
| 2845 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
| 2846 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
| 2847 | |||
| 2848 | while (!done && (index <= end)) { | ||
| 2849 | int i; | ||
| 2850 | |||
| 2851 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
| 2852 | PAGECACHE_TAG_DIRTY, | ||
| 2853 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
| 2854 | if (nr_pages == 0) | ||
| 2855 | break; | ||
| 2856 | |||
| 2857 | for (i = 0; i < nr_pages; i++) { | ||
| 2858 | struct page *page = pvec.pages[i]; | ||
| 2859 | |||
| 2860 | /* | ||
| 2861 | * At this point, the page may be truncated or | ||
| 2862 | * invalidated (changing page->mapping to NULL), or | ||
| 2863 | * even swizzled back from swapper_space to tmpfs file | ||
| 2864 | * mapping. However, page->index will not change | ||
| 2865 | * because we have a reference on the page. | ||
| 2866 | */ | ||
| 2867 | if (page->index > end) { | ||
| 2868 | done = 1; | ||
| 2869 | break; | ||
| 2870 | } | ||
| 2871 | |||
| 2872 | lock_page(page); | ||
| 2873 | |||
| 2874 | /* | ||
| 2875 | * Page truncated or invalidated. We can freely skip it | ||
| 2876 | * then, even for data integrity operations: the page | ||
| 2877 | * has disappeared concurrently, so there could be no | ||
| 2878 | * real expectation of this data interity operation | ||
| 2879 | * even if there is now a new, dirty page at the same | ||
| 2880 | * pagecache address. | ||
| 2881 | */ | ||
| 2882 | if (unlikely(page->mapping != mapping)) { | ||
| 2883 | continue_unlock: | ||
| 2884 | unlock_page(page); | ||
| 2885 | continue; | ||
| 2886 | } | ||
| 2887 | |||
| 2888 | if (!PageDirty(page)) { | ||
| 2889 | /* someone wrote it for us */ | ||
| 2890 | goto continue_unlock; | ||
| 2891 | } | ||
| 2892 | |||
| 2893 | if (PageWriteback(page)) { | ||
| 2894 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
| 2895 | wait_on_page_writeback(page); | ||
| 2896 | else | ||
| 2897 | goto continue_unlock; | ||
| 2898 | } | ||
| 2899 | |||
| 2900 | BUG_ON(PageWriteback(page)); | ||
| 2901 | if (!clear_page_dirty_for_io(page)) | ||
| 2902 | goto continue_unlock; | ||
| 2903 | |||
| 2904 | ret = __mpage_da_writepage(page, wbc, mpd); | ||
| 2905 | if (unlikely(ret)) { | ||
| 2906 | if (ret == AOP_WRITEPAGE_ACTIVATE) { | ||
| 2907 | unlock_page(page); | ||
| 2908 | ret = 0; | ||
| 2909 | } else { | ||
| 2910 | done = 1; | ||
| 2911 | break; | ||
| 2912 | } | ||
| 2913 | } | ||
| 2914 | |||
| 2915 | if (nr_to_write > 0) { | ||
| 2916 | nr_to_write--; | ||
| 2917 | if (nr_to_write == 0 && | ||
| 2918 | wbc->sync_mode == WB_SYNC_NONE) { | ||
| 2919 | /* | ||
| 2920 | * We stop writing back only if we are | ||
| 2921 | * not doing integrity sync. In case of | ||
| 2922 | * integrity sync we have to keep going | ||
| 2923 | * because someone may be concurrently | ||
| 2924 | * dirtying pages, and we might have | ||
| 2925 | * synced a lot of newly appeared dirty | ||
| 2926 | * pages, but have not synced all of the | ||
| 2927 | * old dirty pages. | ||
| 2928 | */ | ||
| 2929 | done = 1; | ||
| 2930 | break; | ||
| 2931 | } | ||
| 2932 | } | ||
| 2933 | } | ||
| 2934 | pagevec_release(&pvec); | ||
| 2935 | cond_resched(); | ||
| 2936 | } | ||
| 2937 | return ret; | ||
| 2938 | } | ||
| 2939 | |||
| 2940 | |||
| 2834 | static int ext4_da_writepages(struct address_space *mapping, | 2941 | static int ext4_da_writepages(struct address_space *mapping, | 
| 2835 | struct writeback_control *wbc) | 2942 | struct writeback_control *wbc) | 
| 2836 | { | 2943 | { | 
| @@ -2839,7 +2946,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 2839 | handle_t *handle = NULL; | 2946 | handle_t *handle = NULL; | 
| 2840 | struct mpage_da_data mpd; | 2947 | struct mpage_da_data mpd; | 
| 2841 | struct inode *inode = mapping->host; | 2948 | struct inode *inode = mapping->host; | 
| 2842 | int no_nrwrite_index_update; | ||
| 2843 | int pages_written = 0; | 2949 | int pages_written = 0; | 
| 2844 | long pages_skipped; | 2950 | long pages_skipped; | 
| 2845 | unsigned int max_pages; | 2951 | unsigned int max_pages; | 
| @@ -2919,12 +3025,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 2919 | mpd.wbc = wbc; | 3025 | mpd.wbc = wbc; | 
| 2920 | mpd.inode = mapping->host; | 3026 | mpd.inode = mapping->host; | 
| 2921 | 3027 | ||
| 2922 | /* | ||
| 2923 | * we don't want write_cache_pages to update | ||
| 2924 | * nr_to_write and writeback_index | ||
| 2925 | */ | ||
| 2926 | no_nrwrite_index_update = wbc->no_nrwrite_index_update; | ||
| 2927 | wbc->no_nrwrite_index_update = 1; | ||
| 2928 | pages_skipped = wbc->pages_skipped; | 3028 | pages_skipped = wbc->pages_skipped; | 
| 2929 | 3029 | ||
| 2930 | retry: | 3030 | retry: | 
| @@ -2966,8 +3066,7 @@ retry: | |||
| 2966 | mpd.io_done = 0; | 3066 | mpd.io_done = 0; | 
| 2967 | mpd.pages_written = 0; | 3067 | mpd.pages_written = 0; | 
| 2968 | mpd.retval = 0; | 3068 | mpd.retval = 0; | 
| 2969 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, | 3069 | ret = write_cache_pages_da(mapping, wbc, &mpd); | 
| 2970 | &mpd); | ||
| 2971 | /* | 3070 | /* | 
| 2972 | * If we have a contiguous extent of pages and we | 3071 | * If we have a contiguous extent of pages and we | 
| 2973 | * haven't done the I/O yet, map the blocks and submit | 3072 | * haven't done the I/O yet, map the blocks and submit | 
| @@ -3033,8 +3132,6 @@ retry: | |||
| 3033 | mapping->writeback_index = index; | 3132 | mapping->writeback_index = index; | 
| 3034 | 3133 | ||
| 3035 | out_writepages: | 3134 | out_writepages: | 
| 3036 | if (!no_nrwrite_index_update) | ||
| 3037 | wbc->no_nrwrite_index_update = 0; | ||
| 3038 | wbc->nr_to_write -= nr_to_writebump; | 3135 | wbc->nr_to_write -= nr_to_writebump; | 
| 3039 | wbc->range_start = range_start; | 3136 | wbc->range_start = range_start; | 
| 3040 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 3137 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 
