aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2015-02-23 05:46:58 -0500
committerDave Chinner <david@fromorbit.com>2015-02-23 05:46:58 -0500
commit0f9160b444e4de33b65dfcd3b901358a3129461a (patch)
tree60b861d303238aa3ec033992a66804bc4bfaa6db /fs/xfs
parente8e9ad42c1f1e1bfbe0e8c32c8cac02e9ebfb7ef (diff)
xfs: xfs_setattr_size no longer races with page faults
Now that truncate locks out new page faults, we no longer need to do special writeback hacks in truncate to work around potential races between page faults, page cache truncation and file size updates to ensure we get write page faults for extending truncates on sub-page block size filesystems. Hence we can remove the code in xfs_setattr_size() that handles this and update the comments around the code tha thandles page cache truncate and size updates to reflect the new reality. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_iops.c56
1 files changed, 14 insertions, 42 deletions
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 7f59ad34b5c5..5c0c27c6fb75 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -838,55 +838,27 @@ xfs_setattr_size(
838 inode_dio_wait(inode); 838 inode_dio_wait(inode);
839 839
840 /* 840 /*
841 * Do all the page cache truncate work outside the transaction context 841 * We've already locked out new page faults, so now we can safely remove
842 * as the "lock" order is page lock->log space reservation. i.e. 842 * pages from the page cache knowing they won't get refaulted until we
843 * locking pages inside the transaction can ABBA deadlock with 843 * drop the XFS_MMAP_EXCL lock after the extent manipulations are
844 * writeback. We have to do the VFS inode size update before we truncate 844 * complete. The truncate_setsize() call also cleans partial EOF page
845 * the pagecache, however, to avoid racing with page faults beyond the 845 * PTEs on extending truncates and hence ensures sub-page block size
846 * new EOF they are not serialised against truncate operations except by 846 * filesystems are correctly handled, too.
847 * page locks and size updates.
848 * 847 *
849 * Hence we are in a situation where a truncate can fail with ENOMEM 848 * We have to do all the page cache truncate work outside the
850 * from xfs_trans_reserve(), but having already truncated the in-memory 849 * transaction context as the "lock" order is page lock->log space
851 * version of the file (i.e. made user visible changes). There's not 850 * reservation as defined by extent allocation in the writeback path.
852 * much we can do about this, except to hope that the caller sees ENOMEM 851 * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
853 * and retries the truncate operation. 852 * having already truncated the in-memory version of the file (i.e. made
853 * user visible changes). There's not much we can do about this, except
854 * to hope that the caller sees ENOMEM and retries the truncate
855 * operation.
854 */ 856 */
855 error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); 857 error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
856 if (error) 858 if (error)
857 return error; 859 return error;
858 truncate_setsize(inode, newsize); 860 truncate_setsize(inode, newsize);
859 861
860 /*
861 * The "we can't serialise against page faults" pain gets worse.
862 *
863 * If the file is mapped then we have to clean the page at the old EOF
864 * when extending the file. Extending the file can expose changes the
865 * underlying page mapping (e.g. from beyond EOF to a hole or
866 * unwritten), and so on the next attempt to write to that page we need
867 * to remap it for write. i.e. we need .page_mkwrite() to be called.
868 * Hence we need to clean the page to clean the pte and so a new write
869 * fault will be triggered appropriately.
870 *
871 * If we do it before we change the inode size, then we can race with a
872 * page fault that maps the page with exactly the same problem. If we do
873 * it after we change the file size, then a new page fault can come in
874 * and allocate space before we've run the rest of the truncate
875 * transaction. That's kinda grotesque, but it's better than have data
876 * over a hole, and so that's the lesser evil that has been chosen here.
877 *
878 * The real solution, however, is to have some mechanism for locking out
879 * page faults while a truncate is in progress.
880 */
881 if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
882 error = filemap_write_and_wait_range(
883 VFS_I(ip)->i_mapping,
884 round_down(oldsize, PAGE_CACHE_SIZE),
885 round_up(oldsize, PAGE_CACHE_SIZE) - 1);
886 if (error)
887 return error;
888 }
889
890 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 862 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
891 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 863 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
892 if (error) 864 if (error)