ext4: fix performance regression in ext4_writepages

Commit 4e7ea81db5(ext4: restructure writeback path) introduces another performance regression on random write: - one more page may be added to ext4 extent in mpage_prepare_extent_to_map, and will be submitted for I/O so nr_to_write will become -1 before 'done' is set - the worse thing is that dirty pages may still be retrieved from page cache after nr_to_write becomes negative, so lots of small chunks can be submitted to block device when page writeback is catching up with write path, and performance is hurted. On one arm A15 board with sata 3.0 SSD(CPU: 1.5GHz dura core, RAM: 2GB, SATA controller: 3.0Gbps), this patch can improve below test's result from 157MB/sec to 174MB/sec(>10%): dd if=/dev/zero of=./z.img bs=8K count=512K The above test is actually prototype of block write in bonnie++ utility. This patch makes sure no more pages than nr_to_write can be added to extent for mapping, so that nr_to_write won't become negative. Cc: linux-ext4@vger.kernel.org Acked-by: Jan Kara <jack@suse.cz> Signed-off-by: Ming Lei <ming.lei@canonical.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
author: Ming Lei <ming.lei@canonical.com> 2013-10-17 18:56:16 -0400
committer: Theodore Ts'o <tytso@mit.edu> 2013-10-17 18:56:16 -0400
commit: aeac589a74b91c4c07458272767e089810fbd23d (patch)
tree: eb6b12341423a3520399d80a167ac82f0183bb50
parent: 7534e854b930a021dedf9e16396ced5e70e1aba3 (diff)
1 files changed, 13 insertions, 13 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e7e5b3d8f002..94aac67b55c9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2298,6 +2298,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
        struct address_space *mapping = mpd->inode->i_mapping;
        struct pagevec pvec;
        unsigned int nr_pages;
+        long left = mpd->wbc->nr_to_write;
        pgoff_t index = mpd->first_page;
        pgoff_t end = mpd->last_page;
        int tag;
@@ -2333,6 +2334,17 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
                        if (page->index > end)
                                goto out;
+                        /*
+                         * Accumulated enough dirty pages? This doesn't apply
+                         * to WB_SYNC_ALL mode. For integrity sync we have to
+                         * keep going because someone may be concurrently
+                         * dirtying pages, and we might have synced a lot of
+                         * newly appeared dirty pages, but have not synced all
+                         * of the old dirty pages.
+                         */
+                        if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0)
+                                goto out;
                        /* If we can't merge this page, we are done. */
                        if (mpd->map.m_len > 0 && mpd->next_page != page->index)
                                goto out;
@@ -2367,19 +2379,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
                        if (err <= 0)
                                goto out;
                        err = 0;
+                        left--;
-                        /*
-                         * Accumulated enough dirty pages? This doesn't apply
-                         * to WB_SYNC_ALL mode. For integrity sync we have to
-                         * keep going because someone may be concurrently
-                         * dirtying pages, and we might have synced a lot of
-                         * newly appeared dirty pages, but have not synced all
-                         * of the old dirty pages.
-                         */
-                        if (mpd->wbc->sync_mode == WB_SYNC_NONE &&
-                            mpd->next_page - mpd->first_page >=
-                                                        mpd->wbc->nr_to_write)
-                                goto out;
                }
                pagevec_release(&pvec);
                cond_resched();
author	Ming Lei <ming.lei@canonical.com>	2013-10-17 18:56:16 -0400
committer	Theodore Ts'o <tytso@mit.edu>	2013-10-17 18:56:16 -0400
commit	aeac589a74b91c4c07458272767e089810fbd23d (patch)
tree	eb6b12341423a3520399d80a167ac82f0183bb50
parent	7534e854b930a021dedf9e16396ced5e70e1aba3 (diff)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e7e5b3d8f002..94aac67b55c9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c
@@ -2298,6 +2298,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
2298	struct address_space *mapping = mpd->inode->i_mapping;	2298	struct address_space *mapping = mpd->inode->i_mapping;
2299	struct pagevec pvec;	2299	struct pagevec pvec;
2300	unsigned int nr_pages;	2300	unsigned int nr_pages;
		2301	long left = mpd->wbc->nr_to_write;
2301	pgoff_t index = mpd->first_page;	2302	pgoff_t index = mpd->first_page;
2302	pgoff_t end = mpd->last_page;	2303	pgoff_t end = mpd->last_page;
2303	int tag;	2304	int tag;
@@ -2333,6 +2334,17 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
2333	if (page->index > end)	2334	if (page->index > end)
2334	goto out;	2335	goto out;
2335		2336
		2337	/*
		2338	* Accumulated enough dirty pages? This doesn't apply
		2339	* to WB_SYNC_ALL mode. For integrity sync we have to
		2340	* keep going because someone may be concurrently
		2341	* dirtying pages, and we might have synced a lot of
		2342	* newly appeared dirty pages, but have not synced all
		2343	* of the old dirty pages.
		2344	*/
		2345	if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0)
		2346	goto out;
		2347
2336	/* If we can't merge this page, we are done. */	2348	/* If we can't merge this page, we are done. */
2337	if (mpd->map.m_len > 0 && mpd->next_page != page->index)	2349	if (mpd->map.m_len > 0 && mpd->next_page != page->index)
2338	goto out;	2350	goto out;
@@ -2367,19 +2379,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
2367	if (err <= 0)	2379	if (err <= 0)
2368	goto out;	2380	goto out;
2369	err = 0;	2381	err = 0;
2370		2382	left--;
2371	/*
2372	* Accumulated enough dirty pages? This doesn't apply
2373	* to WB_SYNC_ALL mode. For integrity sync we have to
2374	* keep going because someone may be concurrently
2375	* dirtying pages, and we might have synced a lot of
2376	* newly appeared dirty pages, but have not synced all
2377	* of the old dirty pages.
2378	*/
2379	if (mpd->wbc->sync_mode == WB_SYNC_NONE &&
2380	mpd->next_page - mpd->first_page >=
2381	mpd->wbc->nr_to_write)
2382	goto out;
2383	}	2383	}
2384	pagevec_release(&pvec);	2384	pagevec_release(&pvec);
2385	cond_resched();	2385	cond_resched();