ext4: fix ext4_writepages() in presence of truncate

Inode size can arbitrarily change while writeback is in progress. When ext4_writepages() has prepared a long extent for mapping and truncate then reduces i_size, mpage_map_and_submit_buffers() will always map just one buffer in a page instead of all of them due to lblk < blocks check. So we end up not using all blocks we've allocated (thus leaking them) and also delalloc accounting goes wrong manifesting as a warning like: ext4_da_release_space:1333: ext4_da_release_space: ino 12, to_free 1 with only 0 reserved data blocks Note that the problem can happen only when blocksize < pagesize because otherwise we have only a single buffer in the page. Fix the problem by removing the size check from the mapping loop. We have an extent allocated so we have to use it all before checking for i_size. We also rename add_page_bufs_to_extent() to mpage_process_page_bufs() and make that function submit the page for IO if all buffers (upto EOF) in it are mapped. Reported-by: Dave Jones <davej@redhat.com> Reported-by: Zheng Liu <gnehzuil.liu@gmail.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@vger.kernel.org
author: Jan Kara <jack@suse.cz> 2013-08-17 10:02:33 -0400
committer: Theodore Ts'o <tytso@mit.edu> 2013-08-17 10:02:33 -0400
commit: 5f1132b2ba8c873f25982cf45917e8455fb6c962 (patch)
tree: 91aeeb9c2268a57f9138dc571260b5b3692508ba /fs/ext4
parent: 09930042a2e94cf8ee79d22943915612c1e4ba51 (diff)
1 files changed, 66 insertions, 41 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 787497d536b6..19fa2e076275 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1890,6 +1890,26 @@ static int ext4_writepage(struct page *page,
        return ret;
 }
+static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
+{
+        int len;
+        loff_t size = i_size_read(mpd->inode);
+        int err;
+        BUG_ON(page->index != mpd->first_page);
+        if (page->index == size >> PAGE_CACHE_SHIFT)
+                len = size & ~PAGE_CACHE_MASK;
+        else
+                len = PAGE_CACHE_SIZE;
+        clear_page_dirty_for_io(page);
+        err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
+        if (!err)
+                mpd->wbc->nr_to_write--;
+        mpd->first_page++;
+        return err;
+}
 #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))
 /*
@@ -1948,12 +1968,29 @@ static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
        return false;
 }
-static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
+/*
-                                    struct buffer_head *head,
+ * mpage_process_page_bufs - submit page buffers for IO or add them to extent
-                                    struct buffer_head *bh,
+ *
-                                    ext4_lblk_t lblk)
+ * @mpd - extent of blocks for mapping
+ * @head - the first buffer in the page
+ * @bh - buffer we should start processing from
+ * @lblk - logical number of the block in the file corresponding to @bh
+ *
+ * Walk through page buffers from @bh upto @head (exclusive) and either submit
+ * the page for IO if all buffers in this page were mapped and there's no
+ * accumulated extent of buffers to map or add buffers in the page to the
+ * extent of buffers to map. The function returns 1 if the caller can continue
+ * by processing the next page, 0 if it should stop adding buffers to the
+ * extent to map because we cannot extend it anymore. It can also return value
+ * < 0 in case of error during IO submission.
+ */
+static int mpage_process_page_bufs(struct mpage_da_data *mpd,
+                                   struct buffer_head *head,
+                                   struct buffer_head *bh,
+                                   ext4_lblk_t lblk)
 {
        struct inode *inode = mpd->inode;
+        int err;
        ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
                                                        >> inode->i_blkbits;
@@ -1963,32 +2000,18 @@ static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
                if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) {
                        /* Found extent to map? */
                        if (mpd->map.m_len)
-                                return false;
+                                return 0;
                        /* Everything mapped so far and we hit EOF */
-                        return true;
+                        break;
                }
        } while (lblk++, (bh = bh->b_this_page) != head);
-        return true;
+        /* So far everything mapped? Submit the page for IO. */
-}
+        if (mpd->map.m_len == 0) {
+                err = mpage_submit_page(mpd, head->b_page);
-static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
+                if (err < 0)
-{
+                        return err;
-        int len;
+        }
-        loff_t size = i_size_read(mpd->inode);
+        return lblk < blocks;
-        int err;
-        BUG_ON(page->index != mpd->first_page);
-        if (page->index == size >> PAGE_CACHE_SHIFT)
-                len = size & ~PAGE_CACHE_MASK;
-        else
-                len = PAGE_CACHE_SIZE;
-        clear_page_dirty_for_io(page);
-        err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
-        if (!err)
-                mpd->wbc->nr_to_write--;
-        mpd->first_page++;
-        return err;
 }
 /*
@@ -2012,8 +2035,6 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
        struct inode *inode = mpd->inode;
        struct buffer_head *head, *bh;
        int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
-        ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
-                                                        >> inode->i_blkbits;
        pgoff_t start, end;
        ext4_lblk_t lblk;
        sector_t pblock;
@@ -2048,18 +2069,26 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
                                         */
                                        mpd->map.m_len = 0;
                                        mpd->map.m_flags = 0;
-                                        add_page_bufs_to_extent(mpd, head, bh,
+                                        /*
-                                                                lblk);
+                                         * FIXME: If dioread_nolock supports
+                                         * blocksize < pagesize, we need to make
+                                         * sure we add size mapped so far to
+                                         * io_end->size as the following call
+                                         * can submit the page for IO.
+                                         */
+                                        err = mpage_process_page_bufs(mpd, head,
+                                                                      bh, lblk);
                                        pagevec_release(&pvec);
-                                        return 0;
+                                        if (err > 0)
+                                                err = 0;
+                                        return err;
                                }
                                if (buffer_delay(bh)) {
                                        clear_buffer_delay(bh);
                                        bh->b_blocknr = pblock++;
                                }
                                clear_buffer_unwritten(bh);
-                        } while (++lblk < blocks &&
+                        } while (lblk++, (bh = bh->b_this_page) != head);
-                                 (bh = bh->b_this_page) != head);
                        /*
                         * FIXME: This is going to break if dioread_nolock
@@ -2328,14 +2357,10 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
                        lblk = ((ext4_lblk_t)page->index) <<
                                (PAGE_CACHE_SHIFT - blkbits);
                        head = page_buffers(page);
-                        if (!add_page_bufs_to_extent(mpd, head, head, lblk))
+                        err = mpage_process_page_bufs(mpd, head, head, lblk);
+                        if (err <= 0)
                                goto out;
-                        /* So far everything mapped? Submit the page for IO. */
+                        err = 0;
-                        if (mpd->map.m_len == 0) {
-                                err = mpage_submit_page(mpd, page);
-                                if (err < 0)
-                                        goto out;
-                        }
                        /*
                         * Accumulated enough dirty pages? This doesn't apply
author	Jan Kara <jack@suse.cz>	2013-08-17 10:02:33 -0400
committer	Theodore Ts'o <tytso@mit.edu>	2013-08-17 10:02:33 -0400
commit	5f1132b2ba8c873f25982cf45917e8455fb6c962 (patch)
tree	91aeeb9c2268a57f9138dc571260b5b3692508ba /fs/ext4
parent	09930042a2e94cf8ee79d22943915612c1e4ba51 (diff)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 787497d536b6..19fa2e076275 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c
@@ -1890,6 +1890,26 @@ static int ext4_writepage(struct page *page,
1890	return ret;	1890	return ret;
1891	}	1891	}
1892		1892
		1893	static int mpage_submit_page(struct mpage_da_data mpd, struct page page)
		1894	{
		1895	int len;
		1896	loff_t size = i_size_read(mpd->inode);
		1897	int err;
		1898
		1899	BUG_ON(page->index != mpd->first_page);
		1900	if (page->index == size >> PAGE_CACHE_SHIFT)
		1901	len = size & ~PAGE_CACHE_MASK;
		1902	else
		1903	len = PAGE_CACHE_SIZE;
		1904	clear_page_dirty_for_io(page);
		1905	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
		1906	if (!err)
		1907	mpd->wbc->nr_to_write--;
		1908	mpd->first_page++;
		1909
		1910	return err;
		1911	}
		1912
1893	#define BH_FLAGS ((1 << BH_Unwritten) \| (1 << BH_Delay))	1913	#define BH_FLAGS ((1 << BH_Unwritten) \| (1 << BH_Delay))
1894		1914
1895	/*	1915	/*
@@ -1948,12 +1968,29 @@ static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
1948	return false;	1968	return false;
1949	}	1969	}
1950		1970
1951	static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,	1971	/*
1952	struct buffer_head *head,	1972	* mpage_process_page_bufs - submit page buffers for IO or add them to extent
1953	struct buffer_head *bh,	1973	*
1954	ext4_lblk_t lblk)	1974	* @mpd - extent of blocks for mapping
		1975	* @head - the first buffer in the page
		1976	* @bh - buffer we should start processing from
		1977	* @lblk - logical number of the block in the file corresponding to @bh
		1978	*
		1979	* Walk through page buffers from @bh upto @head (exclusive) and either submit
		1980	* the page for IO if all buffers in this page were mapped and there's no
		1981	* accumulated extent of buffers to map or add buffers in the page to the
		1982	* extent of buffers to map. The function returns 1 if the caller can continue
		1983	* by processing the next page, 0 if it should stop adding buffers to the
		1984	* extent to map because we cannot extend it anymore. It can also return value
		1985	* < 0 in case of error during IO submission.
		1986	*/
		1987	static int mpage_process_page_bufs(struct mpage_da_data *mpd,
		1988	struct buffer_head *head,
		1989	struct buffer_head *bh,
		1990	ext4_lblk_t lblk)
1955	{	1991	{
1956	struct inode *inode = mpd->inode;	1992	struct inode *inode = mpd->inode;
		1993	int err;
1957	ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)	1994	ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
1958	>> inode->i_blkbits;	1995	>> inode->i_blkbits;
1959		1996
@@ -1963,32 +2000,18 @@ static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
1963	if (lblk >= blocks \|\| !mpage_add_bh_to_extent(mpd, lblk, bh)) {	2000	if (lblk >= blocks \|\| !mpage_add_bh_to_extent(mpd, lblk, bh)) {
1964	/* Found extent to map? */	2001	/* Found extent to map? */
1965	if (mpd->map.m_len)	2002	if (mpd->map.m_len)
1966	return false;	2003	return 0;
1967	/* Everything mapped so far and we hit EOF */	2004	/* Everything mapped so far and we hit EOF */
1968	return true;	2005	break;
1969	}	2006	}
1970	} while (lblk++, (bh = bh->b_this_page) != head);	2007	} while (lblk++, (bh = bh->b_this_page) != head);
1971	return true;	2008	/* So far everything mapped? Submit the page for IO. */
1972	}	2009	if (mpd->map.m_len == 0) {
1973		2010	err = mpage_submit_page(mpd, head->b_page);
1974	static int mpage_submit_page(struct mpage_da_data mpd, struct page page)	2011	if (err < 0)
1975	{	2012	return err;
1976	int len;	2013	}
1977	loff_t size = i_size_read(mpd->inode);	2014	return lblk < blocks;
1978	int err;
1979
1980	BUG_ON(page->index != mpd->first_page);
1981	if (page->index == size >> PAGE_CACHE_SHIFT)
1982	len = size & ~PAGE_CACHE_MASK;
1983	else
1984	len = PAGE_CACHE_SIZE;
1985	clear_page_dirty_for_io(page);
1986	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
1987	if (!err)
1988	mpd->wbc->nr_to_write--;
1989	mpd->first_page++;
1990
1991	return err;
1992	}	2015	}
1993		2016
1994	/*	2017	/*
@@ -2012,8 +2035,6 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
2012	struct inode *inode = mpd->inode;	2035	struct inode *inode = mpd->inode;
2013	struct buffer_head head, bh;	2036	struct buffer_head head, bh;
2014	int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;	2037	int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
2015	ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
2016	>> inode->i_blkbits;
2017	pgoff_t start, end;	2038	pgoff_t start, end;
2018	ext4_lblk_t lblk;	2039	ext4_lblk_t lblk;
2019	sector_t pblock;	2040	sector_t pblock;
@@ -2048,18 +2069,26 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
2048	*/	2069	*/
2049	mpd->map.m_len = 0;	2070	mpd->map.m_len = 0;
2050	mpd->map.m_flags = 0;	2071	mpd->map.m_flags = 0;
2051	add_page_bufs_to_extent(mpd, head, bh,	2072	/*
2052	lblk);	2073	* FIXME: If dioread_nolock supports
		2074	* blocksize < pagesize, we need to make
		2075	* sure we add size mapped so far to
		2076	* io_end->size as the following call
		2077	* can submit the page for IO.
		2078	*/
		2079	err = mpage_process_page_bufs(mpd, head,
		2080	bh, lblk);
2053	pagevec_release(&pvec);	2081	pagevec_release(&pvec);
2054	return 0;	2082	if (err > 0)
		2083	err = 0;
		2084	return err;
2055	}	2085	}
2056	if (buffer_delay(bh)) {	2086	if (buffer_delay(bh)) {
2057	clear_buffer_delay(bh);	2087	clear_buffer_delay(bh);
2058	bh->b_blocknr = pblock++;	2088	bh->b_blocknr = pblock++;
2059	}	2089	}
2060	clear_buffer_unwritten(bh);	2090	clear_buffer_unwritten(bh);
2061	} while (++lblk < blocks &&	2091	} while (lblk++, (bh = bh->b_this_page) != head);
2062	(bh = bh->b_this_page) != head);
2063		2092
2064	/*	2093	/*
2065	* FIXME: This is going to break if dioread_nolock	2094	* FIXME: This is going to break if dioread_nolock
@@ -2328,14 +2357,10 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
2328	lblk = ((ext4_lblk_t)page->index) <<	2357	lblk = ((ext4_lblk_t)page->index) <<
2329	(PAGE_CACHE_SHIFT - blkbits);	2358	(PAGE_CACHE_SHIFT - blkbits);
2330	head = page_buffers(page);	2359	head = page_buffers(page);
2331	if (!add_page_bufs_to_extent(mpd, head, head, lblk))	2360	err = mpage_process_page_bufs(mpd, head, head, lblk);
		2361	if (err <= 0)
2332	goto out;	2362	goto out;
2333	/* So far everything mapped? Submit the page for IO. */	2363	err = 0;
2334	if (mpd->map.m_len == 0) {
2335	err = mpage_submit_page(mpd, page);
2336	if (err < 0)
2337	goto out;
2338	}
2339		2364
2340	/*	2365	/*
2341	* Accumulated enough dirty pages? This doesn't apply	2366	* Accumulated enough dirty pages? This doesn't apply