xfs: don't map ranges that span EOF for direct IO

Al Viro tracked down the problem that has caused generic/263 to fail on XFS since the test was introduced. If is caused by xfs_get_blocks() mapping a single extent that spans EOF without marking it as buffer-new() so that the direct IO code does not zero the tail of the block at the new EOF. This is a long standing bug that has been around for many, many years. Because xfs_get_blocks() starts the map before EOF, it can't set buffer_new(), because that causes he direct IO code to also zero unaligned sectors at the head of the IO. This would overwrite valid data with zeros, and hence we cannot validly return a single extent that spans EOF to direct IO. Fix this by detecting a mapping that spans EOF and truncate it down to EOF. This results in the the direct IO code doing the right thing for unaligned data blocks before EOF, and then returning to get another mapping for the region beyond EOF which XFS treats correctly by setting buffer_new() on it. This makes direct Io behave correctly w.r.t. tail block zeroing beyond EOF, and fsx is happy about that. Again, thanks to Al Viro for finding what I couldn't. [ dchinner: Fix for __divdi3 build error: Reported-by: Paul Gortmaker <paul.gortmaker@windriver.com> Tested-by: Paul Gortmaker <paul.gortmaker@windriver.com> Signed-off-by: Mark Tinguely <tinguely@sgi.com> Reviewed-by: Eric Sandeen <sandeen@redhat.com> ] Signed-off-by: Dave Chinner <dchinner@redhat.com> Tested-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <david@fromorbit.com>
author: Dave Chinner <dchinner@redhat.com> 2014-04-16 18:15:19 -0400
committer: Dave Chinner <david@fromorbit.com> 2014-04-16 18:15:19 -0400
commit: 0e1f789d0dc38db79dfc4ddfd9cf541a8c198b7a (patch)
tree: b3c91cb7430f7e5e66eb1546c0897da1cb528c82 /fs
parent: 897b73b6a2ee5d3c06648b601beb1724f7fbd678 (diff)
1 files changed, 14 insertions, 0 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index e0a793113ea9..0479c32c5eb1 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1344,6 +1344,14 @@ __xfs_get_blocks(
        /*
         * If this is O_DIRECT or the mpage code calling tell them how large
         * the mapping is, so that we can avoid repeated get_blocks calls.
+         *
+         * If the mapping spans EOF, then we have to break the mapping up as the
+         * mapping for blocks beyond EOF must be marked new so that sub block
+         * regions can be correctly zeroed. We can't do this for mappings within
+         * EOF unless the mapping was just allocated or is unwritten, otherwise
+         * the callers would overwrite existing data with zeros. Hence we have
+         * to split the mapping into a range up to and including EOF, and a
+         * second mapping for beyond EOF.
         */
        if (direct || size > (1 << inode->i_blkbits)) {
                xfs_off_t               mapping_size;
@@ -1354,6 +1362,12 @@ __xfs_get_blocks(
                ASSERT(mapping_size > 0);
                if (mapping_size > size)
                        mapping_size = size;
+                if (offset < i_size_read(inode) &&
+                    offset + mapping_size >= i_size_read(inode)) {
+                        /* limit mapping to block that spans EOF */
+                        mapping_size = roundup_64(i_size_read(inode) - offset,
+                                                  1 << inode->i_blkbits);
+                }
                if (mapping_size > LONG_MAX)
                        mapping_size = LONG_MAX;
author	Dave Chinner <dchinner@redhat.com>	2014-04-16 18:15:19 -0400
committer	Dave Chinner <david@fromorbit.com>	2014-04-16 18:15:19 -0400
commit	0e1f789d0dc38db79dfc4ddfd9cf541a8c198b7a (patch)
tree	b3c91cb7430f7e5e66eb1546c0897da1cb528c82 /fs
parent	897b73b6a2ee5d3c06648b601beb1724f7fbd678 (diff)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index e0a793113ea9..0479c32c5eb1 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c
@@ -1344,6 +1344,14 @@ __xfs_get_blocks(
1344	/*	1344	/*
1345	* If this is O_DIRECT or the mpage code calling tell them how large	1345	* If this is O_DIRECT or the mpage code calling tell them how large
1346	* the mapping is, so that we can avoid repeated get_blocks calls.	1346	* the mapping is, so that we can avoid repeated get_blocks calls.
		1347	*
		1348	* If the mapping spans EOF, then we have to break the mapping up as the
		1349	* mapping for blocks beyond EOF must be marked new so that sub block
		1350	* regions can be correctly zeroed. We can't do this for mappings within
		1351	* EOF unless the mapping was just allocated or is unwritten, otherwise
		1352	* the callers would overwrite existing data with zeros. Hence we have
		1353	* to split the mapping into a range up to and including EOF, and a
		1354	* second mapping for beyond EOF.
1347	*/	1355	*/
1348	if (direct \|\| size > (1 << inode->i_blkbits)) {	1356	if (direct \|\| size > (1 << inode->i_blkbits)) {
1349	xfs_off_t mapping_size;	1357	xfs_off_t mapping_size;
@@ -1354,6 +1362,12 @@ __xfs_get_blocks(
1354	ASSERT(mapping_size > 0);	1362	ASSERT(mapping_size > 0);
1355	if (mapping_size > size)	1363	if (mapping_size > size)
1356	mapping_size = size;	1364	mapping_size = size;
		1365	if (offset < i_size_read(inode) &&
		1366	offset + mapping_size >= i_size_read(inode)) {
		1367	/* limit mapping to block that spans EOF */
		1368	mapping_size = roundup_64(i_size_read(inode) - offset,
		1369	1 << inode->i_blkbits);
		1370	}
1357	if (mapping_size > LONG_MAX)	1371	if (mapping_size > LONG_MAX)
1358	mapping_size = LONG_MAX;	1372	mapping_size = LONG_MAX;
1359		1373