aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_sync.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2011-03-25 18:13:55 -0400
committerDave Chinner <david@fromorbit.com>2011-03-25 18:13:55 -0400
commit1bfd8d04190c615bb8d1d98188dead0c09702208 (patch)
treef2eb5d4e9b730b7ecef25bcc1cc26caa2b306339 /fs/xfs/linux-2.6/xfs_sync.c
parenta19fb380961f209a3a406443686647bcd01bb9a6 (diff)
xfs: introduce inode cluster buffer trylocks for xfs_iflush
There is an ABBA deadlock between synchronous inode flushing in xfs_reclaim_inode and xfs_icluster_free. xfs_icluster_free locks the buffer, then takes inode ilocks, whilst synchronous reclaim takes the ilock followed by the buffer lock in xfs_iflush(). To avoid this deadlock, separate the inode cluster buffer locking semantics from the synchronous inode flush semantics, allowing callers to attempt to lock the buffer but still issue synchronous IO if it can get the buffer. This requires xfs_iflush() calls that currently use non-blocking semantics to pass SYNC_TRYLOCK rather than 0 as the flags parameter. This allows xfs_reclaim_inode to avoid the deadlock on the buffer lock and detect the failure so that it can drop the inode ilock and restart the reclaim attempt on the inode. This allows xfs_ifree_cluster to obtain the inode lock, mark the inode stale and release it and hence defuse the deadlock situation. It also has the pleasant side effect of avoiding IO in xfs_reclaim_inode when it tries to next reclaim the inode as it is now marked stale. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_sync.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c30
1 files changed, 27 insertions, 3 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 6c10f1d2e3d3..594cd822d84d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -761,8 +761,10 @@ xfs_reclaim_inode(
761 struct xfs_perag *pag, 761 struct xfs_perag *pag,
762 int sync_mode) 762 int sync_mode)
763{ 763{
764 int error = 0; 764 int error;
765 765
766restart:
767 error = 0;
766 xfs_ilock(ip, XFS_ILOCK_EXCL); 768 xfs_ilock(ip, XFS_ILOCK_EXCL);
767 if (!xfs_iflock_nowait(ip)) { 769 if (!xfs_iflock_nowait(ip)) {
768 if (!(sync_mode & SYNC_WAIT)) 770 if (!(sync_mode & SYNC_WAIT))
@@ -788,9 +790,31 @@ xfs_reclaim_inode(
788 if (xfs_inode_clean(ip)) 790 if (xfs_inode_clean(ip))
789 goto reclaim; 791 goto reclaim;
790 792
791 /* Now we have an inode that needs flushing */ 793 /*
792 error = xfs_iflush(ip, sync_mode); 794 * Now we have an inode that needs flushing.
795 *
796 * We do a nonblocking flush here even if we are doing a SYNC_WAIT
797 * reclaim as we can deadlock with inode cluster removal.
798 * xfs_ifree_cluster() can lock the inode buffer before it locks the
799 * ip->i_lock, and we are doing the exact opposite here. As a result,
800 * doing a blocking xfs_itobp() to get the cluster buffer will result
801 * in an ABBA deadlock with xfs_ifree_cluster().
802 *
803 * As xfs_ifree_cluser() must gather all inodes that are active in the
804 * cache to mark them stale, if we hit this case we don't actually want
805 * to do IO here - we want the inode marked stale so we can simply
806 * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
807 * just unlock the inode, back off and try again. Hopefully the next
808 * pass through will see the stale flag set on the inode.
809 */
810 error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
793 if (sync_mode & SYNC_WAIT) { 811 if (sync_mode & SYNC_WAIT) {
812 if (error == EAGAIN) {
813 xfs_iunlock(ip, XFS_ILOCK_EXCL);
814 /* backoff longer than in xfs_ifree_cluster */
815 delay(2);
816 goto restart;
817 }
794 xfs_iflock(ip); 818 xfs_iflock(ip);
795 goto reclaim; 819 goto reclaim;
796 } 820 }