From 848ce8f731aed0a2d4ab5884a4f6664af73d2dd0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Sep 2009 13:48:56 +0000 Subject: xfs: simplify inode teardown Currently the reclaim code for the case where we don't reclaim the final reclaim is overly complicated. We know that the inode is clean but instead of just directly reclaiming the clean inode we go through the whole process of marking the inode reclaimable just to directly reclaim it from the calling context. Besides being overly complicated this introduces a race where iget could recycle an inode between marked reclaimable and actually being reclaimed leading to panics. This patch gets rid of the existing reclaim path, and replaces it with a simple call to xfs_ireclaim if the inode was clean. While we're at it we also use the slightly more lax xfs_inode_clean check we'd use later to determine if we need to flush the inode here. Finally get rid of xfs_reclaim function and place the remaining small bits of reclaim code directly into xfs_fs_destroy_inode. Signed-off-by: Christoph Hellwig Reported-by: Patrick Schreurs Reported-by: Tommy van Leeuwen Tested-by: Patrick Schreurs Reviewed-by: Alex Elder Signed-off-by: Alex Elder --- fs/xfs/xfs_vnodeops.c | 40 ---------------------------------------- 1 file changed, 40 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index b572f7e840e0..3fac146b3b7d 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2456,46 +2456,6 @@ xfs_set_dmattrs( return error; } -int -xfs_reclaim( - xfs_inode_t *ip) -{ - - xfs_itrace_entry(ip); - - ASSERT(!VN_MAPPED(VFS_I(ip))); - - /* bad inode, get out here ASAP */ - if (is_bad_inode(VFS_I(ip))) { - xfs_ireclaim(ip); - return 0; - } - - xfs_ioend_wait(ip); - - ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); - - /* - * If we have nothing to flush with this inode then complete the - * teardown now, otherwise break the link between the xfs inode and the - * linux inode and clean up the xfs inode later. This avoids flushing - * the inode to disk during the delete operation itself. - * - * When breaking the link, we need to set the XFS_IRECLAIMABLE flag - * first to ensure that xfs_iunpin() will never see an xfs inode - * that has a linux inode being reclaimed. Synchronisation is provided - * by the i_flags_lock. - */ - if (!ip->i_update_core && (ip->i_itemp == NULL)) { - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_iflock(ip); - xfs_iflags_set(ip, XFS_IRECLAIMABLE); - return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); - } - xfs_inode_set_reclaim_tag(ip); - return 0; -} - /* * xfs_alloc_file_space() * This routine allocates disk space for the given file. -- cgit v1.2.2 From c56c9631cbe88f08854a56ff9776c1f310916830 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Oct 2009 04:03:46 +0000 Subject: xfs: fix mmap_sem/iolock inversion in xfs_free_eofblocks When xfs_free_eofblocks is called from ->release the VM might already hold the mmap_sem, but in the write path we take the iolock before taking the mmap_sem in the generic write code. Switch xfs_free_eofblocks to only trylock the iolock if called from ->release and skip trimming the prellocated blocks in that case. We'll still free them later on the final iput. Signed-off-by: Christoph Hellwig Reviewed-by: Alex Elder Signed-off-by: Alex Elder --- fs/xfs/xfs_vnodeops.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 3fac146b3b7d..d98401470cf0 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -708,6 +708,11 @@ xfs_fsync( return error; } +/* + * Flags for xfs_free_eofblocks + */ +#define XFS_FREE_EOF_TRYLOCK (1<<0) + /* * This is called by xfs_inactive to free any blocks beyond eof * when the link count isn't zero and by xfs_dm_punch_hole() when @@ -726,7 +731,6 @@ xfs_free_eofblocks( xfs_filblks_t map_len; int nimaps; xfs_bmbt_irec_t imap; - int use_iolock = (flags & XFS_FREE_EOF_LOCK); /* * Figure out if there are any blocks beyond the end @@ -768,14 +772,19 @@ xfs_free_eofblocks( * cache and we can't * do that within a transaction. */ - if (use_iolock) + if (flags & XFS_FREE_EOF_TRYLOCK) { + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { + xfs_trans_cancel(tp, 0); + return 0; + } + } else { xfs_ilock(ip, XFS_IOLOCK_EXCL); + } error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, ip->i_size); if (error) { xfs_trans_cancel(tp, 0); - if (use_iolock) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } @@ -812,8 +821,7 @@ xfs_free_eofblocks( error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); } - xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) - : XFS_ILOCK_EXCL)); + xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL); } return error; } @@ -1113,7 +1121,17 @@ xfs_release( (ip->i_df.if_flags & XFS_IFEXTENTS)) && (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { - error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); + + /* + * If we can't get the iolock just skip truncating + * the blocks past EOF because we could deadlock + * with the mmap_sem otherwise. We'll get another + * chance to drop them once the last reference to + * the inode is dropped, so we'll never leak blocks + * permanently. + */ + error = xfs_free_eofblocks(mp, ip, + XFS_FREE_EOF_TRYLOCK); if (error) return error; } @@ -1184,7 +1202,7 @@ xfs_inactive( (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || (ip->i_delayed_blks != 0)))) { - error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); + error = xfs_free_eofblocks(mp, ip, 0); if (error) return VN_INACTIVE_CACHE; } -- cgit v1.2.2 From 6ad112bfb5af537e9e3103c807748bb4a99bbd9e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2009 18:02:23 +0000 Subject: xfs: simplify xfs_buf_get / xfs_buf_read interfaces Currently the low-level buffer cache interfaces are highly confusing as we have a _flags variant of each that does actually respect the flags, and one without _flags which has a flags argument that gets ignored and overriden with a default set. Given that very few places use the default arguments get rid of the duplication and convert all callers to pass the flags explicitly. Also remove the now confusing _flags postfix. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_vnodeops.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index d98401470cf0..578f3f59b789 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -538,9 +538,8 @@ xfs_readlink_bmap( d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); - bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt), - XBF_LOCK | XBF_MAPPED | - XBF_DONT_BLOCK); + bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), + XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK); error = XFS_BUF_GETERROR(bp); if (error) { xfs_ioerror_alert("xfs_readlink", -- cgit v1.2.2