aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_vnodeops.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-12-22 20:02:31 -0500
committerDave Chinner <david@fromorbit.com>2010-12-22 20:02:31 -0500
commit6e857567dbbfe14dd6cc3f7414671b047b1ff5c7 (patch)
treedde314cd6a22066e4cd7f3c498f89b00984838d7 /fs/xfs/xfs_vnodeops.c
parent055388a3188f56676c21e92962fc366ac8b5cb72 (diff)
xfs: don't truncate prealloc from frequently accessed inodes
A long standing problem for streaming writeѕ through the NFS server has been that the NFS server opens and closes file descriptors on an inode for every write. The result of this behaviour is that the ->release() function is called on every close and that results in XFS truncating speculative preallocation beyond the EOF. This has an adverse effect on file layout when multiple files are being written at the same time - they interleave their extents and can result in severe fragmentation. To avoid this problem, keep track of ->release calls made on a dirty inode. For most cases, an inode is only going to be opened once for writing and then closed again during it's lifetime in cache. Hence if there are multiple ->release calls when the inode is dirty, there is a good chance that the inode is being accessed by the NFS server. Hence set a flag the first time ->release is called while there are delalloc blocks still outstanding on the inode. If this flag is set when ->release is next called, then do no truncate away the speculative preallocation - leave it there so that subsequent writes do not need to reallocate the delalloc space. This will prevent interleaving of extents of different inodes written concurrently to the same AG. If we get this wrong, it is not a big deal as we truncate speculative allocation beyond EOF anyway in xfs_inactive() when the inode is thrown out of the cache. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/xfs/xfs_vnodeops.c')
-rw-r--r--fs/xfs/xfs_vnodeops.c61
1 files changed, 40 insertions, 21 deletions
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 8e4a63c4151a..d8e6f8cd6f0c 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -964,29 +964,48 @@ xfs_release(
964 xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); 964 xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
965 } 965 }
966 966
967 if (ip->i_d.di_nlink != 0) { 967 if (ip->i_d.di_nlink == 0)
968 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 968 return 0;
969 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
970 ip->i_delayed_blks > 0)) &&
971 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
972 (!(ip->i_d.di_flags &
973 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
974 969
975 /* 970 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
976 * If we can't get the iolock just skip truncating 971 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
977 * the blocks past EOF because we could deadlock 972 ip->i_delayed_blks > 0)) &&
978 * with the mmap_sem otherwise. We'll get another 973 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
979 * chance to drop them once the last reference to 974 (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
980 * the inode is dropped, so we'll never leak blocks
981 * permanently.
982 */
983 error = xfs_free_eofblocks(mp, ip,
984 XFS_FREE_EOF_TRYLOCK);
985 if (error)
986 return error;
987 }
988 }
989 975
976 /*
977 * If we can't get the iolock just skip truncating the blocks
978 * past EOF because we could deadlock with the mmap_sem
979 * otherwise. We'll get another chance to drop them once the
980 * last reference to the inode is dropped, so we'll never leak
981 * blocks permanently.
982 *
983 * Further, check if the inode is being opened, written and
984 * closed frequently and we have delayed allocation blocks
985 * oustanding (e.g. streaming writes from the NFS server),
986 * truncating the blocks past EOF will cause fragmentation to
987 * occur.
988 *
989 * In this case don't do the truncation, either, but we have to
990 * be careful how we detect this case. Blocks beyond EOF show
991 * up as i_delayed_blks even when the inode is clean, so we
992 * need to truncate them away first before checking for a dirty
993 * release. Hence on the first dirty close we will still remove
994 * the speculative allocation, but after that we will leave it
995 * in place.
996 */
997 if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
998 return 0;
999
1000 error = xfs_free_eofblocks(mp, ip,
1001 XFS_FREE_EOF_TRYLOCK);
1002 if (error)
1003 return error;
1004
1005 /* delalloc blocks after truncation means it really is dirty */
1006 if (ip->i_delayed_blks)
1007 xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
1008 }
990 return 0; 1009 return 0;
991} 1010}
992 1011