[XFS] Fix transaction overrun during writeback.

Prevent transaction overrun in xfs_iomap_write_allocate() if we race with a truncate that overlaps the delalloc range we were planning to allocate. If we race, we may allocate into a hole and that requires block allocation. At this point in time we don't have a reservation for block allocation (apart from metadata blocks) and so allocating into a hole rather than a delalloc region results in overflowing the transaction block reservation. Fix it by only allowing a single extent to be allocated at a time. SGI-PV: 972757 SGI-Modid: xfs-linux-melb:xfs-kern:30005a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
author: David Chinner <dgc@sgi.com> 2007-11-23 00:29:11 -0500
committer: Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> 2008-02-07 02:15:55 -0500
commit: e4143a1cf5973e3443c0650fc4c35292d3b7baa8 (patch)
tree: 320e3fcd98e8a17e250272c01c4518524ce255b0
parent: 786f486f8154b94b36182d2b53df3bf2b40d85e7 (diff)
1 files changed, 50 insertions, 25 deletions
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index d16f40cdf5f6..637a24473f9b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -702,6 +702,9 @@ retry:
 * the originating callers request.
 *
 * Called without a lock on the inode.
+ *
+ * We no longer bother to look at the incoming map - all we have to
+ * guarantee is that whatever we allocate fills the required range.
 */
 int
 xfs_iomap_write_allocate(
@@ -717,9 +720,9 @@ xfs_iomap_write_allocate(
        xfs_fsblock_t   first_block;
        xfs_bmap_free_t free_list;
        xfs_filblks_t   count_fsb;
-        xfs_bmbt_irec_t imap[XFS_STRAT_WRITE_IMAPS];
+        xfs_bmbt_irec_t imap;
        xfs_trans_t     *tp;
-        int             i, nimaps, committed;
+        int             nimaps, committed;
        int             error = 0;
        int             nres;
@@ -766,13 +769,38 @@ xfs_iomap_write_allocate(
                        XFS_BMAP_INIT(&free_list, &first_block);
-                        nimaps = XFS_STRAT_WRITE_IMAPS;
                        /*
-                         * Ensure we don't go beyond eof - it is possible
+                         * it is possible that the extents have changed since
-                         * the extents changed since we did the read call,
+                         * we did the read call as we dropped the ilock for a
-                         * we dropped the ilock in the interim.
+                         * while. We have to be careful about truncates or hole
+                         * punchs here - we are not allowed to allocate
+                         * non-delalloc blocks here.
+                         *
+                         * The only protection against truncation is the pages
+                         * for the range we are being asked to convert are
+                         * locked and hence a truncate will block on them
+                         * first.
+                         *
+                         * As a result, if we go beyond the range we really
+                         * need and hit an delalloc extent boundary followed by
+                         * a hole while we have excess blocks in the map, we
+                         * will fill the hole incorrectly and overrun the
+                         * transaction reservation.
+                         *
+                         * Using a single map prevents this as we are forced to
+                         * check each map we look for overlap with the desired
+                         * range and abort as soon as we find it. Also, given
+                         * that we only return a single map, having one beyond
+                         * what we can return is probably a bit silly.
+                         *
+                         * We also need to check that we don't go beyond EOF;
+                         * this is a truncate optimisation as a truncate sets
+                         * the new file size before block on the pages we
+                         * currently have locked under writeback. Because they
+                         * are about to be tossed, we don't need to write them
+                         * back....
                         */
+                        nimaps = 1;
                        end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
                        xfs_bmap_last_offset(NULL, ip, &last_block,
                                XFS_DATA_FORK);
@@ -788,7 +816,7 @@ xfs_iomap_write_allocate(
                        /* Go get the actual blocks */
                        error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
                                        XFS_BMAPI_WRITE, &first_block, 1,
-                                        imap, &nimaps, &free_list, NULL);
+                                        &imap, &nimaps, &free_list, NULL);
                        if (error)
                                goto trans_cancel;
@@ -807,27 +835,24 @@ xfs_iomap_write_allocate(
                 * See if we were able to allocate an extent that
                 * covers at least part of the callers request
                 */
-                for (i = 0; i < nimaps; i++) {
+                if (unlikely(!imap.br_startblock &&
-                        if (unlikely(!imap[i].br_startblock &&
+                             XFS_IS_REALTIME_INODE(ip)))
-                                     !(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)))
+                        return xfs_cmn_err_fsblock_zero(ip, &imap);
-                                return xfs_cmn_err_fsblock_zero(ip, &imap[i]);
+                if ((offset_fsb >= imap.br_startoff) &&
-                        if ((offset_fsb >= imap[i].br_startoff) &&
+                    (offset_fsb < (imap.br_startoff +
-                            (offset_fsb < (imap[i].br_startoff +
+                                   imap.br_blockcount))) {
-                                           imap[i].br_blockcount))) {
+                        *map = imap;
-                                *map = imap[i];
+                        *retmap = 1;
-                                *retmap = 1;
+                        XFS_STATS_INC(xs_xstrat_quick);
-                                XFS_STATS_INC(xs_xstrat_quick);
+                        return 0;
-                                return 0;
-                        }
-                        count_fsb -= imap[i].br_blockcount;
                }
-                /* So far we have not mapped the requested part of the
+                /*
+                 * So far we have not mapped the requested part of the
                 * file, just surrounding data, try again.
                 */
-                nimaps--;
+                count_fsb -= imap.br_blockcount;
-                map_start_fsb = imap[nimaps].br_startoff +
+                map_start_fsb = imap.br_startoff + imap.br_blockcount;
-                                imap[nimaps].br_blockcount;
        }
 trans_cancel:
author	David Chinner <dgc@sgi.com>	2007-11-23 00:29:11 -0500
committer	Lachlan McIlroy <lachlan@redback.melbourne.sgi.com>	2008-02-07 02:15:55 -0500
commit	e4143a1cf5973e3443c0650fc4c35292d3b7baa8 (patch)
tree	320e3fcd98e8a17e250272c01c4518524ce255b0
parent	786f486f8154b94b36182d2b53df3bf2b40d85e7 (diff)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index d16f40cdf5f6..637a24473f9b 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c
@@ -702,6 +702,9 @@ retry:
702	* the originating callers request.	702	* the originating callers request.
703	*	703	*
704	* Called without a lock on the inode.	704	* Called without a lock on the inode.
		705	*
		706	* We no longer bother to look at the incoming map - all we have to
		707	* guarantee is that whatever we allocate fills the required range.
705	*/	708	*/
706	int	709	int
707	xfs_iomap_write_allocate(	710	xfs_iomap_write_allocate(
@@ -717,9 +720,9 @@ xfs_iomap_write_allocate(
717	xfs_fsblock_t first_block;	720	xfs_fsblock_t first_block;
718	xfs_bmap_free_t free_list;	721	xfs_bmap_free_t free_list;
719	xfs_filblks_t count_fsb;	722	xfs_filblks_t count_fsb;
720	xfs_bmbt_irec_t imap[XFS_STRAT_WRITE_IMAPS];	723	xfs_bmbt_irec_t imap;
721	xfs_trans_t *tp;	724	xfs_trans_t *tp;
722	int i, nimaps, committed;	725	int nimaps, committed;
723	int error = 0;	726	int error = 0;
724	int nres;	727	int nres;
725		728
@@ -766,13 +769,38 @@ xfs_iomap_write_allocate(
766		769
767	XFS_BMAP_INIT(&free_list, &first_block);	770	XFS_BMAP_INIT(&free_list, &first_block);
768		771
769	nimaps = XFS_STRAT_WRITE_IMAPS;
770	/*	772	/*
771	* Ensure we don't go beyond eof - it is possible	773	* it is possible that the extents have changed since
772	* the extents changed since we did the read call,	774	* we did the read call as we dropped the ilock for a
773	* we dropped the ilock in the interim.	775	* while. We have to be careful about truncates or hole
		776	* punchs here - we are not allowed to allocate
		777	* non-delalloc blocks here.
		778	*
		779	* The only protection against truncation is the pages
		780	* for the range we are being asked to convert are
		781	* locked and hence a truncate will block on them
		782	* first.
		783	*
		784	* As a result, if we go beyond the range we really
		785	* need and hit an delalloc extent boundary followed by
		786	* a hole while we have excess blocks in the map, we
		787	* will fill the hole incorrectly and overrun the
		788	* transaction reservation.
		789	*
		790	* Using a single map prevents this as we are forced to
		791	* check each map we look for overlap with the desired
		792	* range and abort as soon as we find it. Also, given
		793	* that we only return a single map, having one beyond
		794	* what we can return is probably a bit silly.
		795	*
		796	* We also need to check that we don't go beyond EOF;
		797	* this is a truncate optimisation as a truncate sets
		798	* the new file size before block on the pages we
		799	* currently have locked under writeback. Because they
		800	* are about to be tossed, we don't need to write them
		801	* back....
774	*/	802	*/
775		803	nimaps = 1;
776	end_fsb = XFS_B_TO_FSB(mp, ip->i_size);	804	end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
777	xfs_bmap_last_offset(NULL, ip, &last_block,	805	xfs_bmap_last_offset(NULL, ip, &last_block,
778	XFS_DATA_FORK);	806	XFS_DATA_FORK);
@@ -788,7 +816,7 @@ xfs_iomap_write_allocate(
788	/* Go get the actual blocks */	816	/* Go get the actual blocks */
789	error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,	817	error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
790	XFS_BMAPI_WRITE, &first_block, 1,	818	XFS_BMAPI_WRITE, &first_block, 1,
791	imap, &nimaps, &free_list, NULL);	819	&imap, &nimaps, &free_list, NULL);
792	if (error)	820	if (error)
793	goto trans_cancel;	821	goto trans_cancel;
794		822
@@ -807,27 +835,24 @@ xfs_iomap_write_allocate(
807	* See if we were able to allocate an extent that	835	* See if we were able to allocate an extent that
808	* covers at least part of the callers request	836	* covers at least part of the callers request
809	*/	837	*/
810	for (i = 0; i < nimaps; i++) {	838	if (unlikely(!imap.br_startblock &&
811	if (unlikely(!imap[i].br_startblock &&	839	XFS_IS_REALTIME_INODE(ip)))
812	!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)))	840	return xfs_cmn_err_fsblock_zero(ip, &imap);
813	return xfs_cmn_err_fsblock_zero(ip, &imap[i]);	841	if ((offset_fsb >= imap.br_startoff) &&
814	if ((offset_fsb >= imap[i].br_startoff) &&	842	(offset_fsb < (imap.br_startoff +
815	(offset_fsb < (imap[i].br_startoff +	843	imap.br_blockcount))) {
816	imap[i].br_blockcount))) {	844	*map = imap;
817	*map = imap[i];	845	*retmap = 1;
818	*retmap = 1;	846	XFS_STATS_INC(xs_xstrat_quick);
819	XFS_STATS_INC(xs_xstrat_quick);	847	return 0;
820	return 0;
821	}
822	count_fsb -= imap[i].br_blockcount;
823	}	848	}
824		849
825	/* So far we have not mapped the requested part of the	850	/*
		851	* So far we have not mapped the requested part of the
826	* file, just surrounding data, try again.	852	* file, just surrounding data, try again.
827	*/	853	*/
828	nimaps--;	854	count_fsb -= imap.br_blockcount;
829	map_start_fsb = imap[nimaps].br_startoff +	855	map_start_fsb = imap.br_startoff + imap.br_blockcount;
830	imap[nimaps].br_blockcount;
831	}	856	}
832		857
833	trans_cancel:	858	trans_cancel: