diff options
Diffstat (limited to 'fs/xfs/xfs_iops.c')
-rw-r--r-- | fs/xfs/xfs_iops.c | 63 |
1 files changed, 20 insertions, 43 deletions
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 3ccc28e8d3a0..8b9e6887e315 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -771,6 +771,7 @@ xfs_setattr_size( | |||
771 | return error; | 771 | return error; |
772 | 772 | ||
773 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | 773 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); |
774 | ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); | ||
774 | ASSERT(S_ISREG(ip->i_d.di_mode)); | 775 | ASSERT(S_ISREG(ip->i_d.di_mode)); |
775 | ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| | 776 | ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| |
776 | ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); | 777 | ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); |
@@ -834,55 +835,27 @@ xfs_setattr_size( | |||
834 | inode_dio_wait(inode); | 835 | inode_dio_wait(inode); |
835 | 836 | ||
836 | /* | 837 | /* |
837 | * Do all the page cache truncate work outside the transaction context | 838 | * We've already locked out new page faults, so now we can safely remove |
838 | * as the "lock" order is page lock->log space reservation. i.e. | 839 | * pages from the page cache knowing they won't get refaulted until we |
839 | * locking pages inside the transaction can ABBA deadlock with | 840 | * drop the XFS_MMAP_EXCL lock after the extent manipulations are |
840 | * writeback. We have to do the VFS inode size update before we truncate | 841 | * complete. The truncate_setsize() call also cleans partial EOF page |
841 | * the pagecache, however, to avoid racing with page faults beyond the | 842 | * PTEs on extending truncates and hence ensures sub-page block size |
842 | * new EOF they are not serialised against truncate operations except by | 843 | * filesystems are correctly handled, too. |
843 | * page locks and size updates. | ||
844 | * | 844 | * |
845 | * Hence we are in a situation where a truncate can fail with ENOMEM | 845 | * We have to do all the page cache truncate work outside the |
846 | * from xfs_trans_reserve(), but having already truncated the in-memory | 846 | * transaction context as the "lock" order is page lock->log space |
847 | * version of the file (i.e. made user visible changes). There's not | 847 | * reservation as defined by extent allocation in the writeback path. |
848 | * much we can do about this, except to hope that the caller sees ENOMEM | 848 | * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but |
849 | * and retries the truncate operation. | 849 | * having already truncated the in-memory version of the file (i.e. made |
850 | * user visible changes). There's not much we can do about this, except | ||
851 | * to hope that the caller sees ENOMEM and retries the truncate | ||
852 | * operation. | ||
850 | */ | 853 | */ |
851 | error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); | 854 | error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); |
852 | if (error) | 855 | if (error) |
853 | return error; | 856 | return error; |
854 | truncate_setsize(inode, newsize); | 857 | truncate_setsize(inode, newsize); |
855 | 858 | ||
856 | /* | ||
857 | * The "we can't serialise against page faults" pain gets worse. | ||
858 | * | ||
859 | * If the file is mapped then we have to clean the page at the old EOF | ||
860 | * when extending the file. Extending the file can expose changes the | ||
861 | * underlying page mapping (e.g. from beyond EOF to a hole or | ||
862 | * unwritten), and so on the next attempt to write to that page we need | ||
863 | * to remap it for write. i.e. we need .page_mkwrite() to be called. | ||
864 | * Hence we need to clean the page to clean the pte and so a new write | ||
865 | * fault will be triggered appropriately. | ||
866 | * | ||
867 | * If we do it before we change the inode size, then we can race with a | ||
868 | * page fault that maps the page with exactly the same problem. If we do | ||
869 | * it after we change the file size, then a new page fault can come in | ||
870 | * and allocate space before we've run the rest of the truncate | ||
871 | * transaction. That's kinda grotesque, but it's better than have data | ||
872 | * over a hole, and so that's the lesser evil that has been chosen here. | ||
873 | * | ||
874 | * The real solution, however, is to have some mechanism for locking out | ||
875 | * page faults while a truncate is in progress. | ||
876 | */ | ||
877 | if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) { | ||
878 | error = filemap_write_and_wait_range( | ||
879 | VFS_I(ip)->i_mapping, | ||
880 | round_down(oldsize, PAGE_CACHE_SIZE), | ||
881 | round_up(oldsize, PAGE_CACHE_SIZE) - 1); | ||
882 | if (error) | ||
883 | return error; | ||
884 | } | ||
885 | |||
886 | tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); | 859 | tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); |
887 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); | 860 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); |
888 | if (error) | 861 | if (error) |
@@ -981,8 +954,12 @@ xfs_vn_setattr( | |||
981 | 954 | ||
982 | xfs_ilock(ip, iolock); | 955 | xfs_ilock(ip, iolock); |
983 | error = xfs_break_layouts(dentry->d_inode, &iolock); | 956 | error = xfs_break_layouts(dentry->d_inode, &iolock); |
984 | if (!error) | 957 | if (!error) { |
958 | xfs_ilock(ip, XFS_MMAPLOCK_EXCL); | ||
959 | iolock |= XFS_MMAPLOCK_EXCL; | ||
960 | |||
985 | error = xfs_setattr_size(ip, iattr); | 961 | error = xfs_setattr_size(ip, iattr); |
962 | } | ||
986 | xfs_iunlock(ip, iolock); | 963 | xfs_iunlock(ip, iolock); |
987 | } else { | 964 | } else { |
988 | error = xfs_setattr_nonsize(ip, iattr, 0); | 965 | error = xfs_setattr_nonsize(ip, iattr, 0); |