xfs: fix failed write truncation handling.

Since the move to the new truncate sequence we call xfs_setattr to truncate down excessively instanciated blocks. As shown by the testcase in kernel.org BZ #22452 that doesn't work too well. Due to the confusion of the internal inode size, and the VFS inode i_size it zeroes data that it shouldn't. But full blown truncate seems like overkill here. We only instanciate delayed allocations in the write path, and given that we never released the iolock we can't have converted them to real allocations yet either. The only nasty case is pre-existing preallocation which we need to skip. We already do this for page discard during writeback, so make the delayed allocation block punching a generic function and call it from the failed write path as well as xfs_aops_discard_page. The callers are responsible for ensuring that partial blocks are not truncated away, and that they hold the ilock. Based on a fix originally from Christoph Hellwig. This version used filesystem blocks as the range unit. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
author: Dave Chinner <dchinner@redhat.com> 2010-11-29 23:14:39 -0500
committer: Alex Elder <aelder@sgi.com> 2010-12-01 08:40:19 -0500
commit: c726de4409a8d3a03877b1ef4342bfe8a15f5e5e (patch)
tree: f1212b8f61f6dcdb52206842e8436a45f878a9e9 /fs/xfs
parent: e8a7e48bb248a1196484d3f8afa53bded2b24e71 (diff)
3 files changed, 121 insertions, 54 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 7d287afccde5..691f61223ed6 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -934,7 +934,6 @@ xfs_aops_discard_page(
        struct xfs_inode        *ip = XFS_I(inode);
        struct buffer_head      *bh, *head;
        loff_t                  offset = page_offset(page);
-        ssize_t                 len = 1 << inode->i_blkbits;
        if (!xfs_is_delayed_page(page, IO_DELAY))
                goto out_invalidate;
@@ -949,58 +948,14 @@ xfs_aops_discard_page(
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        bh = head = page_buffers(page);
        do {
-                int             done;
-                xfs_fileoff_t   offset_fsb;
-                xfs_bmbt_irec_t imap;
-                int             nimaps = 1;
                int             error;
-                xfs_fsblock_t   firstblock;
+                xfs_fileoff_t   start_fsb;
-                xfs_bmap_free_t flist;
                if (!buffer_delay(bh))
                        goto next_buffer;
-                offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+                start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+                error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
-                /*
-                 * Map the range first and check that it is a delalloc extent
-                 * before trying to unmap the range. Otherwise we will be
-                 * trying to remove a real extent (which requires a
-                 * transaction) or a hole, which is probably a bad idea...
-                 */
-                error = xfs_bmapi(NULL, ip, offset_fsb, 1,
-                                XFS_BMAPI_ENTIRE,  NULL, 0, &imap,
-                                &nimaps, NULL);
-                if (error) {
-                        /* something screwed, just bail */
-                        if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-                                xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
-                                "page discard failed delalloc mapping lookup.");
-                        }
-                        break;
-                }
-                if (!nimaps) {
-                        /* nothing there */
-                        goto next_buffer;
-                }
-                if (imap.br_startblock != DELAYSTARTBLOCK) {
-                        /* been converted, ignore */
-                        goto next_buffer;
-                }
-                WARN_ON(imap.br_blockcount == 0);
-                /*
-                 * Note: while we initialise the firstblock/flist pair, they
-                 * should never be used because blocks should never be
-                 * allocated or freed for a delalloc extent and hence we need
-                 * don't cancel or finish them after the xfs_bunmapi() call.
-                 */
-                xfs_bmap_init(&flist, &firstblock);
-                error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
-                                        &flist, &done);
-                ASSERT(!flist.xbf_count && !flist.xbf_first);
                if (error) {
                        /* something screwed, just bail */
                        if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
@@ -1010,7 +965,7 @@ xfs_aops_discard_page(
                        break;
                }
 next_buffer:
-                offset += len;
+                offset += 1 << inode->i_blkbits;
        } while ((bh = bh->b_this_page) != head);
@@ -1505,11 +1460,42 @@ xfs_vm_write_failed(
        struct inode            *inode = mapping->host;
        if (to > inode->i_size) {
-                struct iattr    ia = {
+                /*
-                        .ia_valid       = ATTR_SIZE | ATTR_FORCE,
+                 * punch out the delalloc blocks we have already allocated. We
-                        .ia_size        = inode->i_size,
+                 * don't call xfs_setattr() to do this as we may be in the
-                };
+                 * middle of a multi-iovec write and so the vfs inode->i_size
-                xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK);
+                 * will not match the xfs ip->i_size and so it will zero too
+                 * much. Hence we jus truncate the page cache to zero what is
+                 * necessary and punch the delalloc blocks directly.
+                 */
+                struct xfs_inode        *ip = XFS_I(inode);
+                xfs_fileoff_t           start_fsb;
+                xfs_fileoff_t           end_fsb;
+                int                     error;
+                truncate_pagecache(inode, to, inode->i_size);
+                /*
+                 * Check if there are any blocks that are outside of i_size
+                 * that need to be trimmed back.
+                 */
+                start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
+                end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
+                if (end_fsb <= start_fsb)
+                        return;
+                xfs_ilock(ip, XFS_ILOCK_EXCL);
+                error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+                                                        end_fsb - start_fsb);
+                if (error) {
+                        /* something screwed, just bail */
+                        if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+                                xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+                        "xfs_vm_write_failed: unable to clean up ino %lld",
+                                                ip->i_ino);
+                        }
+                }
+                xfs_iunlock(ip, XFS_ILOCK_EXCL);
        }
 }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 8abd12e32e13..08b179fa9e8f 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -6070,3 +6070,79 @@ xfs_bmap_disk_count_leaves(
                *count += xfs_bmbt_disk_get_blockcount(frp);
        }
 }
+/*
+ * dead simple method of punching delalyed allocation blocks from a range in
+ * the inode. Walks a block at a time so will be slow, but is only executed in
+ * rare error cases so the overhead is not critical. This will alays punch out
+ * both the start and end blocks, even if the ranges only partially overlap
+ * them, so it is up to the caller to ensure that partial blocks are not
+ * passed in.
+ */
+int
+xfs_bmap_punch_delalloc_range(
+        struct xfs_inode        *ip,
+        xfs_fileoff_t           start_fsb,
+        xfs_fileoff_t           length)
+{
+        xfs_fileoff_t           remaining = length;
+        int                     error = 0;
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+        do {
+                int             done;
+                xfs_bmbt_irec_t imap;
+                int             nimaps = 1;
+                xfs_fsblock_t   firstblock;
+                xfs_bmap_free_t flist;
+                /*
+                 * Map the range first and check that it is a delalloc extent
+                 * before trying to unmap the range. Otherwise we will be
+                 * trying to remove a real extent (which requires a
+                 * transaction) or a hole, which is probably a bad idea...
+                 */
+                error = xfs_bmapi(NULL, ip, start_fsb, 1,
+                                XFS_BMAPI_ENTIRE,  NULL, 0, &imap,
+                                &nimaps, NULL);
+                if (error) {
+                        /* something screwed, just bail */
+                        if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+                                xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+                        "Failed delalloc mapping lookup ino %lld fsb %lld.",
+                                                ip->i_ino, start_fsb);
+                        }
+                        break;
+                }
+                if (!nimaps) {
+                        /* nothing there */
+                        goto next_block;
+                }
+                if (imap.br_startblock != DELAYSTARTBLOCK) {
+                        /* been converted, ignore */
+                        goto next_block;
+                }
+                WARN_ON(imap.br_blockcount == 0);
+                /*
+                 * Note: while we initialise the firstblock/flist pair, they
+                 * should never be used because blocks should never be
+                 * allocated or freed for a delalloc extent and hence we need
+                 * don't cancel or finish them after the xfs_bunmapi() call.
+                 */
+                xfs_bmap_init(&flist, &firstblock);
+                error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
+                                        &flist, &done);
+                if (error)
+                        break;
+                ASSERT(!flist.xbf_count && !flist.xbf_first);
+next_block:
+                start_fsb++;
+                remaining--;
+        } while(remaining > 0);
+        return error;
+}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 71ec9b6ecdfc..3651191daea1 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -394,6 +394,11 @@ xfs_bmap_count_blocks(
        int                     whichfork,
        int                     *count);
+int
+xfs_bmap_punch_delalloc_range(
+        struct xfs_inode        *ip,
+        xfs_fileoff_t           start_fsb,
+        xfs_fileoff_t           length);
 #endif  /* __KERNEL__ */
 #endif  /* __XFS_BMAP_H__ */
author	Dave Chinner <dchinner@redhat.com>	2010-11-29 23:14:39 -0500
committer	Alex Elder <aelder@sgi.com>	2010-12-01 08:40:19 -0500
commit	c726de4409a8d3a03877b1ef4342bfe8a15f5e5e (patch)
tree	f1212b8f61f6dcdb52206842e8436a45f878a9e9 /fs/xfs
parent	e8a7e48bb248a1196484d3f8afa53bded2b24e71 (diff)

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 7d287afccde5..691f61223ed6 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -934,7 +934,6 @@ xfs_aops_discard_page(
934	struct xfs_inode *ip = XFS_I(inode);	934	struct xfs_inode *ip = XFS_I(inode);
935	struct buffer_head bh, head;	935	struct buffer_head bh, head;
936	loff_t offset = page_offset(page);	936	loff_t offset = page_offset(page);
937	ssize_t len = 1 << inode->i_blkbits;
938		937
939	if (!xfs_is_delayed_page(page, IO_DELAY))	938	if (!xfs_is_delayed_page(page, IO_DELAY))
940	goto out_invalidate;	939	goto out_invalidate;
@@ -949,58 +948,14 @@ xfs_aops_discard_page(
949	xfs_ilock(ip, XFS_ILOCK_EXCL);	948	xfs_ilock(ip, XFS_ILOCK_EXCL);
950	bh = head = page_buffers(page);	949	bh = head = page_buffers(page);
951	do {	950	do {
952	int done;
953	xfs_fileoff_t offset_fsb;
954	xfs_bmbt_irec_t imap;
955	int nimaps = 1;
956	int error;	951	int error;
957	xfs_fsblock_t firstblock;	952	xfs_fileoff_t start_fsb;
958	xfs_bmap_free_t flist;
959		953
960	if (!buffer_delay(bh))	954	if (!buffer_delay(bh))
961	goto next_buffer;	955	goto next_buffer;
962		956
963	offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);	957	start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
964		958	error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
965	/*
966	* Map the range first and check that it is a delalloc extent
967	* before trying to unmap the range. Otherwise we will be
968	* trying to remove a real extent (which requires a
969	* transaction) or a hole, which is probably a bad idea...
970	*/
971	error = xfs_bmapi(NULL, ip, offset_fsb, 1,
972	XFS_BMAPI_ENTIRE, NULL, 0, &imap,
973	&nimaps, NULL);
974
975	if (error) {
976	/* something screwed, just bail */
977	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
978	xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
979	"page discard failed delalloc mapping lookup.");
980	}
981	break;
982	}
983	if (!nimaps) {
984	/* nothing there */
985	goto next_buffer;
986	}
987	if (imap.br_startblock != DELAYSTARTBLOCK) {
988	/* been converted, ignore */
989	goto next_buffer;
990	}
991	WARN_ON(imap.br_blockcount == 0);
992
993	/*
994	* Note: while we initialise the firstblock/flist pair, they
995	* should never be used because blocks should never be
996	* allocated or freed for a delalloc extent and hence we need
997	* don't cancel or finish them after the xfs_bunmapi() call.
998	*/
999	xfs_bmap_init(&flist, &firstblock);
1000	error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
1001	&flist, &done);
1002
1003	ASSERT(!flist.xbf_count && !flist.xbf_first);
1004	if (error) {	959	if (error) {
1005	/* something screwed, just bail */	960	/* something screwed, just bail */
1006	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {	961	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
@@ -1010,7 +965,7 @@ xfs_aops_discard_page(
1010	break;	965	break;
1011	}	966	}
1012	next_buffer:	967	next_buffer:
1013	offset += len;	968	offset += 1 << inode->i_blkbits;
1014		969
1015	} while ((bh = bh->b_this_page) != head);	970	} while ((bh = bh->b_this_page) != head);
1016		971
@@ -1505,11 +1460,42 @@ xfs_vm_write_failed(
1505	struct inode *inode = mapping->host;	1460	struct inode *inode = mapping->host;
1506		1461
1507	if (to > inode->i_size) {	1462	if (to > inode->i_size) {
1508	struct iattr ia = {	1463	/*
1509	.ia_valid = ATTR_SIZE \| ATTR_FORCE,	1464	* punch out the delalloc blocks we have already allocated. We
1510	.ia_size = inode->i_size,	1465	* don't call xfs_setattr() to do this as we may be in the
1511	};	1466	* middle of a multi-iovec write and so the vfs inode->i_size
1512	xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK);	1467	* will not match the xfs ip->i_size and so it will zero too
		1468	* much. Hence we jus truncate the page cache to zero what is
		1469	* necessary and punch the delalloc blocks directly.
		1470	*/
		1471	struct xfs_inode *ip = XFS_I(inode);
		1472	xfs_fileoff_t start_fsb;
		1473	xfs_fileoff_t end_fsb;
		1474	int error;
		1475
		1476	truncate_pagecache(inode, to, inode->i_size);
		1477
		1478	/*
		1479	* Check if there are any blocks that are outside of i_size
		1480	* that need to be trimmed back.
		1481	*/
		1482	start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
		1483	end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
		1484	if (end_fsb <= start_fsb)
		1485	return;
		1486
		1487	xfs_ilock(ip, XFS_ILOCK_EXCL);
		1488	error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
		1489	end_fsb - start_fsb);
		1490	if (error) {
		1491	/* something screwed, just bail */
		1492	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
		1493	xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
		1494	"xfs_vm_write_failed: unable to clean up ino %lld",
		1495	ip->i_ino);
		1496	}
		1497	}
		1498	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1513	}	1499	}
1514	}	1500	}
1515		1501


diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 8abd12e32e13..08b179fa9e8f 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c
@@ -6070,3 +6070,79 @@ xfs_bmap_disk_count_leaves(
6070	*count += xfs_bmbt_disk_get_blockcount(frp);	6070	*count += xfs_bmbt_disk_get_blockcount(frp);
6071	}	6071	}
6072	}	6072	}
		6073
		6074	/*
		6075	* dead simple method of punching delalyed allocation blocks from a range in
		6076	* the inode. Walks a block at a time so will be slow, but is only executed in
		6077	* rare error cases so the overhead is not critical. This will alays punch out
		6078	* both the start and end blocks, even if the ranges only partially overlap
		6079	* them, so it is up to the caller to ensure that partial blocks are not
		6080	* passed in.
		6081	*/
		6082	int
		6083	xfs_bmap_punch_delalloc_range(
		6084	struct xfs_inode *ip,
		6085	xfs_fileoff_t start_fsb,
		6086	xfs_fileoff_t length)
		6087	{
		6088	xfs_fileoff_t remaining = length;
		6089	int error = 0;
		6090
		6091	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
		6092
		6093	do {
		6094	int done;
		6095	xfs_bmbt_irec_t imap;
		6096	int nimaps = 1;
		6097	xfs_fsblock_t firstblock;
		6098	xfs_bmap_free_t flist;
		6099
		6100	/*
		6101	* Map the range first and check that it is a delalloc extent
		6102	* before trying to unmap the range. Otherwise we will be
		6103	* trying to remove a real extent (which requires a
		6104	* transaction) or a hole, which is probably a bad idea...
		6105	*/
		6106	error = xfs_bmapi(NULL, ip, start_fsb, 1,
		6107	XFS_BMAPI_ENTIRE, NULL, 0, &imap,
		6108	&nimaps, NULL);
		6109
		6110	if (error) {
		6111	/* something screwed, just bail */
		6112	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
		6113	xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
		6114	"Failed delalloc mapping lookup ino %lld fsb %lld.",
		6115	ip->i_ino, start_fsb);
		6116	}
		6117	break;
		6118	}
		6119	if (!nimaps) {
		6120	/* nothing there */
		6121	goto next_block;
		6122	}
		6123	if (imap.br_startblock != DELAYSTARTBLOCK) {
		6124	/* been converted, ignore */
		6125	goto next_block;
		6126	}
		6127	WARN_ON(imap.br_blockcount == 0);
		6128
		6129	/*
		6130	* Note: while we initialise the firstblock/flist pair, they
		6131	* should never be used because blocks should never be
		6132	* allocated or freed for a delalloc extent and hence we need
		6133	* don't cancel or finish them after the xfs_bunmapi() call.
		6134	*/
		6135	xfs_bmap_init(&flist, &firstblock);
		6136	error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
		6137	&flist, &done);
		6138	if (error)
		6139	break;
		6140
		6141	ASSERT(!flist.xbf_count && !flist.xbf_first);
		6142	next_block:
		6143	start_fsb++;
		6144	remaining--;
		6145	} while(remaining > 0);
		6146
		6147	return error;
		6148	}


diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 71ec9b6ecdfc..3651191daea1 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h
@@ -394,6 +394,11 @@ xfs_bmap_count_blocks(
394	int whichfork,	394	int whichfork,
395	int *count);	395	int *count);
396		396
		397	int
		398	xfs_bmap_punch_delalloc_range(
		399	struct xfs_inode *ip,
		400	xfs_fileoff_t start_fsb,
		401	xfs_fileoff_t length);
397	#endif /* __KERNEL__ */	402	#endif /* __KERNEL__ */
398		403
399	#endif /* __XFS_BMAP_H__ */	404	#endif /* __XFS_BMAP_H__ */