aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2007-11-23 00:29:11 -0500
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-02-07 02:15:55 -0500
commite4143a1cf5973e3443c0650fc4c35292d3b7baa8 (patch)
tree320e3fcd98e8a17e250272c01c4518524ce255b0
parent786f486f8154b94b36182d2b53df3bf2b40d85e7 (diff)
[XFS] Fix transaction overrun during writeback.
Prevent transaction overrun in xfs_iomap_write_allocate() if we race with a truncate that overlaps the delalloc range we were planning to allocate. If we race, we may allocate into a hole and that requires block allocation. At this point in time we don't have a reservation for block allocation (apart from metadata blocks) and so allocating into a hole rather than a delalloc region results in overflowing the transaction block reservation. Fix it by only allowing a single extent to be allocated at a time. SGI-PV: 972757 SGI-Modid: xfs-linux-melb:xfs-kern:30005a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
-rw-r--r--fs/xfs/xfs_iomap.c75
1 files changed, 50 insertions, 25 deletions
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index d16f40cdf5f6..637a24473f9b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -702,6 +702,9 @@ retry:
702 * the originating callers request. 702 * the originating callers request.
703 * 703 *
704 * Called without a lock on the inode. 704 * Called without a lock on the inode.
705 *
706 * We no longer bother to look at the incoming map - all we have to
707 * guarantee is that whatever we allocate fills the required range.
705 */ 708 */
706int 709int
707xfs_iomap_write_allocate( 710xfs_iomap_write_allocate(
@@ -717,9 +720,9 @@ xfs_iomap_write_allocate(
717 xfs_fsblock_t first_block; 720 xfs_fsblock_t first_block;
718 xfs_bmap_free_t free_list; 721 xfs_bmap_free_t free_list;
719 xfs_filblks_t count_fsb; 722 xfs_filblks_t count_fsb;
720 xfs_bmbt_irec_t imap[XFS_STRAT_WRITE_IMAPS]; 723 xfs_bmbt_irec_t imap;
721 xfs_trans_t *tp; 724 xfs_trans_t *tp;
722 int i, nimaps, committed; 725 int nimaps, committed;
723 int error = 0; 726 int error = 0;
724 int nres; 727 int nres;
725 728
@@ -766,13 +769,38 @@ xfs_iomap_write_allocate(
766 769
767 XFS_BMAP_INIT(&free_list, &first_block); 770 XFS_BMAP_INIT(&free_list, &first_block);
768 771
769 nimaps = XFS_STRAT_WRITE_IMAPS;
770 /* 772 /*
771 * Ensure we don't go beyond eof - it is possible 773 * it is possible that the extents have changed since
772 * the extents changed since we did the read call, 774 * we did the read call as we dropped the ilock for a
773 * we dropped the ilock in the interim. 775 * while. We have to be careful about truncates or hole
776 * punchs here - we are not allowed to allocate
777 * non-delalloc blocks here.
778 *
779 * The only protection against truncation is the pages
780 * for the range we are being asked to convert are
781 * locked and hence a truncate will block on them
782 * first.
783 *
784 * As a result, if we go beyond the range we really
785 * need and hit an delalloc extent boundary followed by
786 * a hole while we have excess blocks in the map, we
787 * will fill the hole incorrectly and overrun the
788 * transaction reservation.
789 *
790 * Using a single map prevents this as we are forced to
791 * check each map we look for overlap with the desired
792 * range and abort as soon as we find it. Also, given
793 * that we only return a single map, having one beyond
794 * what we can return is probably a bit silly.
795 *
796 * We also need to check that we don't go beyond EOF;
797 * this is a truncate optimisation as a truncate sets
798 * the new file size before block on the pages we
799 * currently have locked under writeback. Because they
800 * are about to be tossed, we don't need to write them
801 * back....
774 */ 802 */
775 803 nimaps = 1;
776 end_fsb = XFS_B_TO_FSB(mp, ip->i_size); 804 end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
777 xfs_bmap_last_offset(NULL, ip, &last_block, 805 xfs_bmap_last_offset(NULL, ip, &last_block,
778 XFS_DATA_FORK); 806 XFS_DATA_FORK);
@@ -788,7 +816,7 @@ xfs_iomap_write_allocate(
788 /* Go get the actual blocks */ 816 /* Go get the actual blocks */
789 error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb, 817 error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
790 XFS_BMAPI_WRITE, &first_block, 1, 818 XFS_BMAPI_WRITE, &first_block, 1,
791 imap, &nimaps, &free_list, NULL); 819 &imap, &nimaps, &free_list, NULL);
792 if (error) 820 if (error)
793 goto trans_cancel; 821 goto trans_cancel;
794 822
@@ -807,27 +835,24 @@ xfs_iomap_write_allocate(
807 * See if we were able to allocate an extent that 835 * See if we were able to allocate an extent that
808 * covers at least part of the callers request 836 * covers at least part of the callers request
809 */ 837 */
810 for (i = 0; i < nimaps; i++) { 838 if (unlikely(!imap.br_startblock &&
811 if (unlikely(!imap[i].br_startblock && 839 XFS_IS_REALTIME_INODE(ip)))
812 !(ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) 840 return xfs_cmn_err_fsblock_zero(ip, &imap);
813 return xfs_cmn_err_fsblock_zero(ip, &imap[i]); 841 if ((offset_fsb >= imap.br_startoff) &&
814 if ((offset_fsb >= imap[i].br_startoff) && 842 (offset_fsb < (imap.br_startoff +
815 (offset_fsb < (imap[i].br_startoff + 843 imap.br_blockcount))) {
816 imap[i].br_blockcount))) { 844 *map = imap;
817 *map = imap[i]; 845 *retmap = 1;
818 *retmap = 1; 846 XFS_STATS_INC(xs_xstrat_quick);
819 XFS_STATS_INC(xs_xstrat_quick); 847 return 0;
820 return 0;
821 }
822 count_fsb -= imap[i].br_blockcount;
823 } 848 }
824 849
825 /* So far we have not mapped the requested part of the 850 /*
851 * So far we have not mapped the requested part of the
826 * file, just surrounding data, try again. 852 * file, just surrounding data, try again.
827 */ 853 */
828 nimaps--; 854 count_fsb -= imap.br_blockcount;
829 map_start_fsb = imap[nimaps].br_startoff + 855 map_start_fsb = imap.br_startoff + imap.br_blockcount;
830 imap[nimaps].br_blockcount;
831 } 856 }
832 857
833trans_cancel: 858trans_cancel: