aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_iops.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_iops.c')
-rw-r--r--fs/xfs/xfs_iops.c63
1 files changed, 20 insertions, 43 deletions
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 3ccc28e8d3a0..8b9e6887e315 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -771,6 +771,7 @@ xfs_setattr_size(
771 return error; 771 return error;
772 772
773 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 773 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
774 ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
774 ASSERT(S_ISREG(ip->i_d.di_mode)); 775 ASSERT(S_ISREG(ip->i_d.di_mode));
775 ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| 776 ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
776 ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); 777 ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
@@ -834,55 +835,27 @@ xfs_setattr_size(
834 inode_dio_wait(inode); 835 inode_dio_wait(inode);
835 836
836 /* 837 /*
837 * Do all the page cache truncate work outside the transaction context 838 * We've already locked out new page faults, so now we can safely remove
838 * as the "lock" order is page lock->log space reservation. i.e. 839 * pages from the page cache knowing they won't get refaulted until we
839 * locking pages inside the transaction can ABBA deadlock with 840 * drop the XFS_MMAP_EXCL lock after the extent manipulations are
840 * writeback. We have to do the VFS inode size update before we truncate 841 * complete. The truncate_setsize() call also cleans partial EOF page
841 * the pagecache, however, to avoid racing with page faults beyond the 842 * PTEs on extending truncates and hence ensures sub-page block size
842 * new EOF they are not serialised against truncate operations except by 843 * filesystems are correctly handled, too.
843 * page locks and size updates.
844 * 844 *
845 * Hence we are in a situation where a truncate can fail with ENOMEM 845 * We have to do all the page cache truncate work outside the
846 * from xfs_trans_reserve(), but having already truncated the in-memory 846 * transaction context as the "lock" order is page lock->log space
847 * version of the file (i.e. made user visible changes). There's not 847 * reservation as defined by extent allocation in the writeback path.
848 * much we can do about this, except to hope that the caller sees ENOMEM 848 * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
849 * and retries the truncate operation. 849 * having already truncated the in-memory version of the file (i.e. made
850 * user visible changes). There's not much we can do about this, except
851 * to hope that the caller sees ENOMEM and retries the truncate
852 * operation.
850 */ 853 */
851 error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); 854 error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
852 if (error) 855 if (error)
853 return error; 856 return error;
854 truncate_setsize(inode, newsize); 857 truncate_setsize(inode, newsize);
855 858
856 /*
857 * The "we can't serialise against page faults" pain gets worse.
858 *
859 * If the file is mapped then we have to clean the page at the old EOF
860 * when extending the file. Extending the file can expose changes the
861 * underlying page mapping (e.g. from beyond EOF to a hole or
862 * unwritten), and so on the next attempt to write to that page we need
863 * to remap it for write. i.e. we need .page_mkwrite() to be called.
864 * Hence we need to clean the page to clean the pte and so a new write
865 * fault will be triggered appropriately.
866 *
867 * If we do it before we change the inode size, then we can race with a
868 * page fault that maps the page with exactly the same problem. If we do
869 * it after we change the file size, then a new page fault can come in
870 * and allocate space before we've run the rest of the truncate
871 * transaction. That's kinda grotesque, but it's better than have data
872 * over a hole, and so that's the lesser evil that has been chosen here.
873 *
874 * The real solution, however, is to have some mechanism for locking out
875 * page faults while a truncate is in progress.
876 */
877 if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
878 error = filemap_write_and_wait_range(
879 VFS_I(ip)->i_mapping,
880 round_down(oldsize, PAGE_CACHE_SIZE),
881 round_up(oldsize, PAGE_CACHE_SIZE) - 1);
882 if (error)
883 return error;
884 }
885
886 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 859 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
887 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 860 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
888 if (error) 861 if (error)
@@ -981,8 +954,12 @@ xfs_vn_setattr(
981 954
982 xfs_ilock(ip, iolock); 955 xfs_ilock(ip, iolock);
983 error = xfs_break_layouts(dentry->d_inode, &iolock); 956 error = xfs_break_layouts(dentry->d_inode, &iolock);
984 if (!error) 957 if (!error) {
958 xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
959 iolock |= XFS_MMAPLOCK_EXCL;
960
985 error = xfs_setattr_size(ip, iattr); 961 error = xfs_setattr_size(ip, iattr);
962 }
986 xfs_iunlock(ip, iolock); 963 xfs_iunlock(ip, iolock);
987 } else { 964 } else {
988 error = xfs_setattr_nonsize(ip, iattr, 0); 965 error = xfs_setattr_nonsize(ip, iattr, 0);