ext4: Use our own write_cache_pages()

Make a copy of write_cache_pages() for the benefit of ext4_da_writepages(). This allows us to simplify the code some, and will allow us to further customize the code in future patches. There are some nasty hacks in write_cache_pages(), which Linus has (correctly) characterized as vile. I've just copied it into write_cache_pages_da(), without trying to clean those bits up lest I break something in the ext4's delalloc implementation, which is a bit fragile right now. This will allow Dave Chinner to clean up write_cache_pages() in mm/page-writeback.c, without worrying about breaking ext4. Eventually write_cache_pages_da() will go away when I rewrite ext4's delayed allocation and create a general ext4_writepages() which is used for all of ext4's writeback. Until now this is the lowest risk way to clean up the core write_cache_pages() function. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: Dave Chinner <david@fromorbit.com>
author: Theodore Ts'o <tytso@mit.edu> 2010-05-16 18:00:00 -0400
committer: Theodore Ts'o <tytso@mit.edu> 2010-05-16 18:00:00 -0400
commit: 8e48dcfbd7c0892b4cfd064d682cc4c95a29df32 (patch)
tree: 653f12cc4eea9606c80142752612ad28f1ccdb45 /fs/ext4
parent: 39a4bade8c1826b658316d66ee81c09b0a4d7d42 (diff)
1 files changed, 119 insertions, 22 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6aa0442811d2..830336d3911b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2426,17 +2426,6 @@ static int __mpage_da_writepage(struct page *page,
        struct buffer_head *bh, *head;
        sector_t logical;
-        if (mpd->io_done) {
-                /*
-                 * Rest of the page in the page_vec
-                 * redirty then and skip then. We will
-                 * try to write them again after
-                 * starting a new transaction
-                 */
-                redirty_page_for_writepage(wbc, page);
-                unlock_page(page);
-                return MPAGE_DA_EXTENT_TAIL;
-        }
        /*
         * Can we merge this page to current extent?
         */
@@ -2831,6 +2820,124 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
        return ext4_chunk_trans_blocks(inode, max_blocks);
 }
+/*
+ * write_cache_pages_da - walk the list of dirty pages of the given
+ * address space and call the callback function (which usually writes
+ * the pages).
+ *
+ * This is a forked version of write_cache_pages().  Differences:
+ *      Range cyclic is ignored.
+ *      no_nrwrite_index_update is always presumed true
+ */
+static int write_cache_pages_da(struct address_space *mapping,
+                                struct writeback_control *wbc,
+                                struct mpage_da_data *mpd)
+{
+        int ret = 0;
+        int done = 0;
+        struct pagevec pvec;
+        int nr_pages;
+        pgoff_t index;
+        pgoff_t end;            /* Inclusive */
+        long nr_to_write = wbc->nr_to_write;
+        pagevec_init(&pvec, 0);
+        index = wbc->range_start >> PAGE_CACHE_SHIFT;
+        end = wbc->range_end >> PAGE_CACHE_SHIFT;
+        while (!done && (index <= end)) {
+                int i;
+                nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                              PAGECACHE_TAG_DIRTY,
+                              min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+                if (nr_pages == 0)
+                        break;
+                for (i = 0; i < nr_pages; i++) {
+                        struct page *page = pvec.pages[i];
+                        /*
+                         * At this point, the page may be truncated or
+                         * invalidated (changing page->mapping to NULL), or
+                         * even swizzled back from swapper_space to tmpfs file
+                         * mapping. However, page->index will not change
+                         * because we have a reference on the page.
+                         */
+                        if (page->index > end) {
+                                done = 1;
+                                break;
+                        }
+                        lock_page(page);
+                        /*
+                         * Page truncated or invalidated. We can freely skip it
+                         * then, even for data integrity operations: the page
+                         * has disappeared concurrently, so there could be no
+                         * real expectation of this data interity operation
+                         * even if there is now a new, dirty page at the same
+                         * pagecache address.
+                         */
+                        if (unlikely(page->mapping != mapping)) {
+continue_unlock:
+                                unlock_page(page);
+                                continue;
+                        }
+                        if (!PageDirty(page)) {
+                                /* someone wrote it for us */
+                                goto continue_unlock;
+                        }
+                        if (PageWriteback(page)) {
+                                if (wbc->sync_mode != WB_SYNC_NONE)
+                                        wait_on_page_writeback(page);
+                                else
+                                        goto continue_unlock;
+                        }
+                        BUG_ON(PageWriteback(page));
+                        if (!clear_page_dirty_for_io(page))
+                                goto continue_unlock;
+                        ret = __mpage_da_writepage(page, wbc, mpd);
+                        if (unlikely(ret)) {
+                                if (ret == AOP_WRITEPAGE_ACTIVATE) {
+                                        unlock_page(page);
+                                        ret = 0;
+                                } else {
+                                        done = 1;
+                                        break;
+                                }
+                        }
+                        if (nr_to_write > 0) {
+                                nr_to_write--;
+                                if (nr_to_write == 0 &&
+                                    wbc->sync_mode == WB_SYNC_NONE) {
+                                        /*
+                                         * We stop writing back only if we are
+                                         * not doing integrity sync. In case of
+                                         * integrity sync we have to keep going
+                                         * because someone may be concurrently
+                                         * dirtying pages, and we might have
+                                         * synced a lot of newly appeared dirty
+                                         * pages, but have not synced all of the
+                                         * old dirty pages.
+                                         */
+                                        done = 1;
+                                        break;
+                                }
+                        }
+                }
+                pagevec_release(&pvec);
+                cond_resched();
+        }
+        return ret;
+}
 static int ext4_da_writepages(struct address_space *mapping,
                              struct writeback_control *wbc)
 {
@@ -2839,7 +2946,6 @@ static int ext4_da_writepages(struct address_space *mapping,
        handle_t *handle = NULL;
        struct mpage_da_data mpd;
        struct inode *inode = mapping->host;
-        int no_nrwrite_index_update;
        int pages_written = 0;
        long pages_skipped;
        unsigned int max_pages;
@@ -2919,12 +3025,6 @@ static int ext4_da_writepages(struct address_space *mapping,
        mpd.wbc = wbc;
        mpd.inode = mapping->host;
-        /*
-         * we don't want write_cache_pages to update
-         * nr_to_write and writeback_index
-         */
-        no_nrwrite_index_update = wbc->no_nrwrite_index_update;
-        wbc->no_nrwrite_index_update = 1;
        pages_skipped = wbc->pages_skipped;
 retry:
@@ -2966,8 +3066,7 @@ retry:
                mpd.io_done = 0;
                mpd.pages_written = 0;
                mpd.retval = 0;
-                ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
+                ret = write_cache_pages_da(mapping, wbc, &mpd);
-                                        &mpd);
                /*
                 * If we have a contiguous extent of pages and we
                 * haven't done the I/O yet, map the blocks and submit
@@ -3033,8 +3132,6 @@ retry:
                mapping->writeback_index = index;
 out_writepages:
-        if (!no_nrwrite_index_update)
-                wbc->no_nrwrite_index_update = 0;
        wbc->nr_to_write -= nr_to_writebump;
        wbc->range_start = range_start;
        trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
author	Theodore Ts'o <tytso@mit.edu>	2010-05-16 18:00:00 -0400
committer	Theodore Ts'o <tytso@mit.edu>	2010-05-16 18:00:00 -0400
commit	8e48dcfbd7c0892b4cfd064d682cc4c95a29df32 (patch)
tree	653f12cc4eea9606c80142752612ad28f1ccdb45 /fs/ext4
parent	39a4bade8c1826b658316d66ee81c09b0a4d7d42 (diff)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6aa0442811d2..830336d3911b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c
@@ -2426,17 +2426,6 @@ static int __mpage_da_writepage(struct page *page,
2426	struct buffer_head bh, head;	2426	struct buffer_head bh, head;
2427	sector_t logical;	2427	sector_t logical;
2428		2428
2429	if (mpd->io_done) {
2430	/*
2431	* Rest of the page in the page_vec
2432	* redirty then and skip then. We will
2433	* try to write them again after
2434	* starting a new transaction
2435	*/
2436	redirty_page_for_writepage(wbc, page);
2437	unlock_page(page);
2438	return MPAGE_DA_EXTENT_TAIL;
2439	}
2440	/*	2429	/*
2441	* Can we merge this page to current extent?	2430	* Can we merge this page to current extent?
2442	*/	2431	*/
@@ -2831,6 +2820,124 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2831	return ext4_chunk_trans_blocks(inode, max_blocks);	2820	return ext4_chunk_trans_blocks(inode, max_blocks);
2832	}	2821	}
2833		2822
		2823	/*
		2824	* write_cache_pages_da - walk the list of dirty pages of the given
		2825	* address space and call the callback function (which usually writes
		2826	* the pages).
		2827	*
		2828	* This is a forked version of write_cache_pages(). Differences:
		2829	* Range cyclic is ignored.
		2830	* no_nrwrite_index_update is always presumed true
		2831	*/
		2832	static int write_cache_pages_da(struct address_space *mapping,
		2833	struct writeback_control *wbc,
		2834	struct mpage_da_data *mpd)
		2835	{
		2836	int ret = 0;
		2837	int done = 0;
		2838	struct pagevec pvec;
		2839	int nr_pages;
		2840	pgoff_t index;
		2841	pgoff_t end; /* Inclusive */
		2842	long nr_to_write = wbc->nr_to_write;
		2843
		2844	pagevec_init(&pvec, 0);
		2845	index = wbc->range_start >> PAGE_CACHE_SHIFT;
		2846	end = wbc->range_end >> PAGE_CACHE_SHIFT;
		2847
		2848	while (!done && (index <= end)) {
		2849	int i;
		2850
		2851	nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
		2852	PAGECACHE_TAG_DIRTY,
		2853	min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
		2854	if (nr_pages == 0)
		2855	break;
		2856
		2857	for (i = 0; i < nr_pages; i++) {
		2858	struct page *page = pvec.pages[i];
		2859
		2860	/*
		2861	* At this point, the page may be truncated or
		2862	* invalidated (changing page->mapping to NULL), or
		2863	* even swizzled back from swapper_space to tmpfs file
		2864	* mapping. However, page->index will not change
		2865	* because we have a reference on the page.
		2866	*/
		2867	if (page->index > end) {
		2868	done = 1;
		2869	break;
		2870	}
		2871
		2872	lock_page(page);
		2873
		2874	/*
		2875	* Page truncated or invalidated. We can freely skip it
		2876	* then, even for data integrity operations: the page
		2877	* has disappeared concurrently, so there could be no
		2878	* real expectation of this data interity operation
		2879	* even if there is now a new, dirty page at the same
		2880	* pagecache address.
		2881	*/
		2882	if (unlikely(page->mapping != mapping)) {
		2883	continue_unlock:
		2884	unlock_page(page);
		2885	continue;
		2886	}
		2887
		2888	if (!PageDirty(page)) {
		2889	/* someone wrote it for us */
		2890	goto continue_unlock;
		2891	}
		2892
		2893	if (PageWriteback(page)) {
		2894	if (wbc->sync_mode != WB_SYNC_NONE)
		2895	wait_on_page_writeback(page);
		2896	else
		2897	goto continue_unlock;
		2898	}
		2899
		2900	BUG_ON(PageWriteback(page));
		2901	if (!clear_page_dirty_for_io(page))
		2902	goto continue_unlock;
		2903
		2904	ret = __mpage_da_writepage(page, wbc, mpd);
		2905	if (unlikely(ret)) {
		2906	if (ret == AOP_WRITEPAGE_ACTIVATE) {
		2907	unlock_page(page);
		2908	ret = 0;
		2909	} else {
		2910	done = 1;
		2911	break;
		2912	}
		2913	}
		2914
		2915	if (nr_to_write > 0) {
		2916	nr_to_write--;
		2917	if (nr_to_write == 0 &&
		2918	wbc->sync_mode == WB_SYNC_NONE) {
		2919	/*
		2920	* We stop writing back only if we are
		2921	* not doing integrity sync. In case of
		2922	* integrity sync we have to keep going
		2923	* because someone may be concurrently
		2924	* dirtying pages, and we might have
		2925	* synced a lot of newly appeared dirty
		2926	* pages, but have not synced all of the
		2927	* old dirty pages.
		2928	*/
		2929	done = 1;
		2930	break;
		2931	}
		2932	}
		2933	}
		2934	pagevec_release(&pvec);
		2935	cond_resched();
		2936	}
		2937	return ret;
		2938	}
		2939
		2940
2834	static int ext4_da_writepages(struct address_space *mapping,	2941	static int ext4_da_writepages(struct address_space *mapping,
2835	struct writeback_control *wbc)	2942	struct writeback_control *wbc)
2836	{	2943	{
@@ -2839,7 +2946,6 @@ static int ext4_da_writepages(struct address_space *mapping,
2839	handle_t *handle = NULL;	2946	handle_t *handle = NULL;
2840	struct mpage_da_data mpd;	2947	struct mpage_da_data mpd;
2841	struct inode *inode = mapping->host;	2948	struct inode *inode = mapping->host;
2842	int no_nrwrite_index_update;
2843	int pages_written = 0;	2949	int pages_written = 0;
2844	long pages_skipped;	2950	long pages_skipped;
2845	unsigned int max_pages;	2951	unsigned int max_pages;
@@ -2919,12 +3025,6 @@ static int ext4_da_writepages(struct address_space *mapping,
2919	mpd.wbc = wbc;	3025	mpd.wbc = wbc;
2920	mpd.inode = mapping->host;	3026	mpd.inode = mapping->host;
2921		3027
2922	/*
2923	* we don't want write_cache_pages to update
2924	* nr_to_write and writeback_index
2925	*/
2926	no_nrwrite_index_update = wbc->no_nrwrite_index_update;
2927	wbc->no_nrwrite_index_update = 1;
2928	pages_skipped = wbc->pages_skipped;	3028	pages_skipped = wbc->pages_skipped;
2929		3029
2930	retry:	3030	retry:
@@ -2966,8 +3066,7 @@ retry:
2966	mpd.io_done = 0;	3066	mpd.io_done = 0;
2967	mpd.pages_written = 0;	3067	mpd.pages_written = 0;
2968	mpd.retval = 0;	3068	mpd.retval = 0;
2969	ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,	3069	ret = write_cache_pages_da(mapping, wbc, &mpd);
2970	&mpd);
2971	/*	3070	/*
2972	* If we have a contiguous extent of pages and we	3071	* If we have a contiguous extent of pages and we
2973	* haven't done the I/O yet, map the blocks and submit	3072	* haven't done the I/O yet, map the blocks and submit
@@ -3033,8 +3132,6 @@ retry:
3033	mapping->writeback_index = index;	3132	mapping->writeback_index = index;
3034		3133
3035	out_writepages:	3134	out_writepages:
3036	if (!no_nrwrite_index_update)
3037	wbc->no_nrwrite_index_update = 0;
3038	wbc->nr_to_write -= nr_to_writebump;	3135	wbc->nr_to_write -= nr_to_writebump;
3039	wbc->range_start = range_start;	3136	wbc->range_start = range_start;
3040	trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);	3137	trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);