aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_iops.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2014-05-06 18:05:45 -0400
committerDave Chinner <david@fromorbit.com>2014-05-06 18:05:45 -0400
commit49abc3a8f84146f74daadbaa7cde7d34f2bb40a8 (patch)
treeaefab4081f7024f857b0e17be9851fe4b07da237 /fs/xfs/xfs_iops.c
parentb28fd7b5fe232d7643d7c0595938e998ceb58508 (diff)
xfs: truncate_setsize should be outside transactions
truncate_setsize() removes pages from the page cache, and hence requires page locks to be held. It is not valid to lock a page cache page inside a transaction context as we can hold page locks when we we reserve space for a transaction. If we do, then we expose an ABBA deadlock between log space reservation and page locks. That is, both the write path and writeback lock a page, then start a transaction for block allocation, which means they can block waiting for a log reservation with the page lock held. If we hold a log reservation and then do something that locks a page (e.g. truncate_setsize in xfs_setattr_size) then that page lock can block on the page locked and waiting for a log reservation. If the transaction that is waiting for the page lock is the only active transaction in the system that can free log space via a commit, then writeback will never make progress and so log space will never free up. This issue with xfs_setattr_size() was introduced back in 2010 by commit fa9b227 ("xfs: new truncate sequence") which moved the page cache truncate from outside the transaction context (what was xfs_itruncate_data()) to inside the transaction context as a call to truncate_setsize(). The reason truncate_setsize() was located where in this place was that we can't shouldn't change the file size until after we are in the transaction context and the operation will either succeed or shut down the filesystem on failure. However, block_truncate_page() already modifies the file contents before we enter the transaction context, so we can't really fulfill this guarantee in any way. Hence we may as well ensure that on success or failure, the in-memory inode and data is truncated away and that the application cleans up the mess appropriately. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/xfs_iops.c')
-rw-r--r--fs/xfs/xfs_iops.c20
1 files changed, 16 insertions, 4 deletions
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 89b07e43ca28..7ee5a9d56787 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -808,22 +808,34 @@ xfs_setattr_size(
808 */ 808 */
809 inode_dio_wait(inode); 809 inode_dio_wait(inode);
810 810
811 /*
812 * Do all the page cache truncate work outside the transaction context
813 * as the "lock" order is page lock->log space reservation. i.e.
814 * locking pages inside the transaction can ABBA deadlock with
815 * writeback. We have to do the VFS inode size update before we truncate
816 * the pagecache, however, to avoid racing with page faults beyond the
817 * new EOF they are not serialised against truncate operations except by
818 * page locks and size updates.
819 *
820 * Hence we are in a situation where a truncate can fail with ENOMEM
821 * from xfs_trans_reserve(), but having already truncated the in-memory
822 * version of the file (i.e. made user visible changes). There's not
823 * much we can do about this, except to hope that the caller sees ENOMEM
824 * and retries the truncate operation.
825 */
811 error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); 826 error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
812 if (error) 827 if (error)
813 return error; 828 return error;
829 truncate_setsize(inode, newsize);
814 830
815 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 831 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
816 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 832 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
817 if (error) 833 if (error)
818 goto out_trans_cancel; 834 goto out_trans_cancel;
819 835
820 truncate_setsize(inode, newsize);
821
822 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 836 commit_flags = XFS_TRANS_RELEASE_LOG_RES;
823 lock_flags |= XFS_ILOCK_EXCL; 837 lock_flags |= XFS_ILOCK_EXCL;
824
825 xfs_ilock(ip, XFS_ILOCK_EXCL); 838 xfs_ilock(ip, XFS_ILOCK_EXCL);
826
827 xfs_trans_ijoin(tp, ip, 0); 839 xfs_trans_ijoin(tp, ip, 0);
828 840
829 /* 841 /*