aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2006-11-11 02:05:00 -0500
committerTim Shimmin <tes@sgi.com>2006-11-11 02:05:00 -0500
commit4c60658e0f4e253cf275f12b7c76bf128515a774 (patch)
tree72d591ce30b7bcc8e08b20aa325ac4360921f028
parent7a18c386078eaf17ae54595f66c0d64d9c1cb29c (diff)
[XFS] Prevent a deadlock when xfslogd unpins inodes.
The previous fixes for the use after free in xfs_iunpin left a nasty log deadlock when xfslogd unpinned the inode and dropped the last reference to the inode. the ->clear_inode() method can issue transactions, and if the log was full, the transaction could push on the log and get stuck trying to push the inode it was currently unpinning. To fix this, we provide xfs_iunpin a guarantee that it will always have a valid xfs_inode <-> linux inode link or a particular flag will be set on the inode. We then use log forces during lookup to ensure transactions are completed before we recycle the inode. This ensures that xfs_iunpin will never use the linux inode after it is being freed, and any lookup on an inode on the reclaim list will wait until it is safe to attach a new linux inode to the xfs inode. SGI-PV: 956832 SGI-Modid: xfs-linux-melb:xfs-kern:27359a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Shailendra Tripathi <stripathi@agami.com> Signed-off-by: Takenori Nagano <t-nagano@ah.jp.nec.com> Signed-off-by: Tim Shimmin <tes@sgi.com>
-rw-r--r--fs/xfs/xfs_iget.c30
-rw-r--r--fs/xfs/xfs_inode.c47
-rw-r--r--fs/xfs/xfs_vnodeops.c21
3 files changed, 66 insertions, 32 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 4b0c1881d6d5..c1c89dac19cc 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -237,6 +237,36 @@ again:
237 237
238 goto again; 238 goto again;
239 } 239 }
240 ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
241
242 /*
243 * If lookup is racing with unlink, then we
244 * should return an error immediately so we
245 * don't remove it from the reclaim list and
246 * potentially leak the inode.
247 */
248 if ((ip->i_d.di_mode == 0) &&
249 !(flags & XFS_IGET_CREATE)) {
250 read_unlock(&ih->ih_lock);
251 return ENOENT;
252 }
253
254 /*
255 * There may be transactions sitting in the
256 * incore log buffers or being flushed to disk
257 * at this time. We can't clear the
258 * XFS_IRECLAIMABLE flag until these
259 * transactions have hit the disk, otherwise we
260 * will void the guarantee the flag provides
261 * xfs_iunpin()
262 */
263 if (xfs_ipincount(ip)) {
264 read_unlock(&ih->ih_lock);
265 xfs_log_force(mp, 0,
266 XFS_LOG_FORCE|XFS_LOG_SYNC);
267 XFS_STATS_INC(xs_ig_frecycle);
268 goto again;
269 }
240 270
241 vn_trace_exit(vp, "xfs_iget.alloc", 271 vn_trace_exit(vp, "xfs_iget.alloc",
242 (inst_t *)__return_address); 272 (inst_t *)__return_address);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 17d2a471751e..d72c80dbfbb1 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2741,42 +2741,39 @@ xfs_iunpin(
2741{ 2741{
2742 ASSERT(atomic_read(&ip->i_pincount) > 0); 2742 ASSERT(atomic_read(&ip->i_pincount) > 0);
2743 2743
2744 if (atomic_dec_and_test(&ip->i_pincount)) { 2744 if (atomic_dec_and_lock(&ip->i_pincount, &ip->i_flags_lock)) {
2745
2745 /* 2746 /*
2746 * If the inode is currently being reclaimed, the 2747 * If the inode is currently being reclaimed, the link between
2747 * linux inode _and_ the xfs vnode may have been 2748 * the bhv_vnode and the xfs_inode will be broken after the
2748 * freed so we cannot reference either of them safely. 2749 * XFS_IRECLAIM* flag is set. Hence, if these flags are not
2749 * Hence we should not try to do anything to them 2750 * set, then we can move forward and mark the linux inode dirty
2750 * if the xfs inode is currently in the reclaim 2751 * knowing that it is still valid as it won't freed until after
2751 * path. 2752 * the bhv_vnode<->xfs_inode link is broken in xfs_reclaim. The
2753 * i_flags_lock is used to synchronise the setting of the
2754 * XFS_IRECLAIM* flags and the breaking of the link, and so we
2755 * can execute atomically w.r.t to reclaim by holding this lock
2756 * here.
2752 * 2757 *
2753 * However, we still need to issue the unpin wakeup 2758 * However, we still need to issue the unpin wakeup call as the
2754 * call as the inode reclaim may be blocked waiting for 2759 * inode reclaim may be blocked waiting for the inode to become
2755 * the inode to become unpinned. 2760 * unpinned.
2756 */ 2761 */
2757 struct inode *inode = NULL;
2758 2762
2759 spin_lock(&ip->i_flags_lock);
2760 if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) { 2763 if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) {
2761 bhv_vnode_t *vp = XFS_ITOV_NULL(ip); 2764 bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
2765 struct inode *inode = NULL;
2766
2767 BUG_ON(vp == NULL);
2768 inode = vn_to_inode(vp);
2769 BUG_ON(inode->i_state & I_CLEAR);
2762 2770
2763 /* make sync come back and flush this inode */ 2771 /* make sync come back and flush this inode */
2764 if (vp) { 2772 if (!(inode->i_state & (I_NEW|I_FREEING)))
2765 inode = vn_to_inode(vp); 2773 mark_inode_dirty_sync(inode);
2766
2767 if (!(inode->i_state &
2768 (I_NEW|I_FREEING|I_CLEAR))) {
2769 inode = igrab(inode);
2770 if (inode)
2771 mark_inode_dirty_sync(inode);
2772 } else
2773 inode = NULL;
2774 }
2775 } 2774 }
2776 spin_unlock(&ip->i_flags_lock); 2775 spin_unlock(&ip->i_flags_lock);
2777 wake_up(&ip->i_ipin_wait); 2776 wake_up(&ip->i_ipin_wait);
2778 if (inode)
2779 iput(inode);
2780 } 2777 }
2781} 2778}
2782 2779
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 4c5d73cbb901..bda774a04b8f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3827,11 +3827,16 @@ xfs_reclaim(
3827 */ 3827 */
3828 xfs_synchronize_atime(ip); 3828 xfs_synchronize_atime(ip);
3829 3829
3830 /* If we have nothing to flush with this inode then complete the 3830 /*
3831 * teardown now, otherwise break the link between the xfs inode 3831 * If we have nothing to flush with this inode then complete the
3832 * and the linux inode and clean up the xfs inode later. This 3832 * teardown now, otherwise break the link between the xfs inode and the
3833 * avoids flushing the inode to disk during the delete operation 3833 * linux inode and clean up the xfs inode later. This avoids flushing
3834 * itself. 3834 * the inode to disk during the delete operation itself.
3835 *
3836 * When breaking the link, we need to set the XFS_IRECLAIMABLE flag
3837 * first to ensure that xfs_iunpin() will never see an xfs inode
3838 * that has a linux inode being reclaimed. Synchronisation is provided
3839 * by the i_flags_lock.
3835 */ 3840 */
3836 if (!ip->i_update_core && (ip->i_itemp == NULL)) { 3841 if (!ip->i_update_core && (ip->i_itemp == NULL)) {
3837 xfs_ilock(ip, XFS_ILOCK_EXCL); 3842 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -3840,11 +3845,13 @@ xfs_reclaim(
3840 } else { 3845 } else {
3841 xfs_mount_t *mp = ip->i_mount; 3846 xfs_mount_t *mp = ip->i_mount;
3842 3847
3843 /* Protect sync from us */ 3848 /* Protect sync and unpin from us */
3844 XFS_MOUNT_ILOCK(mp); 3849 XFS_MOUNT_ILOCK(mp);
3850 spin_lock(&ip->i_flags_lock);
3851 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
3845 vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); 3852 vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
3853 spin_unlock(&ip->i_flags_lock);
3846 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); 3854 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
3847 xfs_iflags_set(ip, XFS_IRECLAIMABLE);
3848 XFS_MOUNT_IUNLOCK(mp); 3855 XFS_MOUNT_IUNLOCK(mp);
3849 } 3856 }
3850 return 0; 3857 return 0;