diff options
author | David Chinner <dgc@sgi.com> | 2006-11-11 02:05:00 -0500 |
---|---|---|
committer | Tim Shimmin <tes@sgi.com> | 2006-11-11 02:05:00 -0500 |
commit | 4c60658e0f4e253cf275f12b7c76bf128515a774 (patch) | |
tree | 72d591ce30b7bcc8e08b20aa325ac4360921f028 | |
parent | 7a18c386078eaf17ae54595f66c0d64d9c1cb29c (diff) |
[XFS] Prevent a deadlock when xfslogd unpins inodes.
The previous fixes for the use after free in xfs_iunpin left a nasty log
deadlock when xfslogd unpinned the inode and dropped the last reference to
the inode. the ->clear_inode() method can issue transactions, and if the
log was full, the transaction could push on the log and get stuck trying
to push the inode it was currently unpinning.
To fix this, we provide xfs_iunpin a guarantee that it will always have a
valid xfs_inode <-> linux inode link or a particular flag will be set on
the inode. We then use log forces during lookup to ensure transactions are
completed before we recycle the inode. This ensures that xfs_iunpin will
never use the linux inode after it is being freed, and any lookup on an
inode on the reclaim list will wait until it is safe to attach a new linux
inode to the xfs inode.
SGI-PV: 956832
SGI-Modid: xfs-linux-melb:xfs-kern:27359a
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Shailendra Tripathi <stripathi@agami.com>
Signed-off-by: Takenori Nagano <t-nagano@ah.jp.nec.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
-rw-r--r-- | fs/xfs/xfs_iget.c | 30 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 47 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 21 |
3 files changed, 66 insertions, 32 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 4b0c1881d6d5..c1c89dac19cc 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -237,6 +237,36 @@ again: | |||
237 | 237 | ||
238 | goto again; | 238 | goto again; |
239 | } | 239 | } |
240 | ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | ||
241 | |||
242 | /* | ||
243 | * If lookup is racing with unlink, then we | ||
244 | * should return an error immediately so we | ||
245 | * don't remove it from the reclaim list and | ||
246 | * potentially leak the inode. | ||
247 | */ | ||
248 | if ((ip->i_d.di_mode == 0) && | ||
249 | !(flags & XFS_IGET_CREATE)) { | ||
250 | read_unlock(&ih->ih_lock); | ||
251 | return ENOENT; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * There may be transactions sitting in the | ||
256 | * incore log buffers or being flushed to disk | ||
257 | * at this time. We can't clear the | ||
258 | * XFS_IRECLAIMABLE flag until these | ||
259 | * transactions have hit the disk, otherwise we | ||
260 | * will void the guarantee the flag provides | ||
261 | * xfs_iunpin() | ||
262 | */ | ||
263 | if (xfs_ipincount(ip)) { | ||
264 | read_unlock(&ih->ih_lock); | ||
265 | xfs_log_force(mp, 0, | ||
266 | XFS_LOG_FORCE|XFS_LOG_SYNC); | ||
267 | XFS_STATS_INC(xs_ig_frecycle); | ||
268 | goto again; | ||
269 | } | ||
240 | 270 | ||
241 | vn_trace_exit(vp, "xfs_iget.alloc", | 271 | vn_trace_exit(vp, "xfs_iget.alloc", |
242 | (inst_t *)__return_address); | 272 | (inst_t *)__return_address); |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 17d2a471751e..d72c80dbfbb1 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -2741,42 +2741,39 @@ xfs_iunpin( | |||
2741 | { | 2741 | { |
2742 | ASSERT(atomic_read(&ip->i_pincount) > 0); | 2742 | ASSERT(atomic_read(&ip->i_pincount) > 0); |
2743 | 2743 | ||
2744 | if (atomic_dec_and_test(&ip->i_pincount)) { | 2744 | if (atomic_dec_and_lock(&ip->i_pincount, &ip->i_flags_lock)) { |
2745 | |||
2745 | /* | 2746 | /* |
2746 | * If the inode is currently being reclaimed, the | 2747 | * If the inode is currently being reclaimed, the link between |
2747 | * linux inode _and_ the xfs vnode may have been | 2748 | * the bhv_vnode and the xfs_inode will be broken after the |
2748 | * freed so we cannot reference either of them safely. | 2749 | * XFS_IRECLAIM* flag is set. Hence, if these flags are not |
2749 | * Hence we should not try to do anything to them | 2750 | * set, then we can move forward and mark the linux inode dirty |
2750 | * if the xfs inode is currently in the reclaim | 2751 | * knowing that it is still valid as it won't freed until after |
2751 | * path. | 2752 | * the bhv_vnode<->xfs_inode link is broken in xfs_reclaim. The |
2753 | * i_flags_lock is used to synchronise the setting of the | ||
2754 | * XFS_IRECLAIM* flags and the breaking of the link, and so we | ||
2755 | * can execute atomically w.r.t to reclaim by holding this lock | ||
2756 | * here. | ||
2752 | * | 2757 | * |
2753 | * However, we still need to issue the unpin wakeup | 2758 | * However, we still need to issue the unpin wakeup call as the |
2754 | * call as the inode reclaim may be blocked waiting for | 2759 | * inode reclaim may be blocked waiting for the inode to become |
2755 | * the inode to become unpinned. | 2760 | * unpinned. |
2756 | */ | 2761 | */ |
2757 | struct inode *inode = NULL; | ||
2758 | 2762 | ||
2759 | spin_lock(&ip->i_flags_lock); | ||
2760 | if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) { | 2763 | if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) { |
2761 | bhv_vnode_t *vp = XFS_ITOV_NULL(ip); | 2764 | bhv_vnode_t *vp = XFS_ITOV_NULL(ip); |
2765 | struct inode *inode = NULL; | ||
2766 | |||
2767 | BUG_ON(vp == NULL); | ||
2768 | inode = vn_to_inode(vp); | ||
2769 | BUG_ON(inode->i_state & I_CLEAR); | ||
2762 | 2770 | ||
2763 | /* make sync come back and flush this inode */ | 2771 | /* make sync come back and flush this inode */ |
2764 | if (vp) { | 2772 | if (!(inode->i_state & (I_NEW|I_FREEING))) |
2765 | inode = vn_to_inode(vp); | 2773 | mark_inode_dirty_sync(inode); |
2766 | |||
2767 | if (!(inode->i_state & | ||
2768 | (I_NEW|I_FREEING|I_CLEAR))) { | ||
2769 | inode = igrab(inode); | ||
2770 | if (inode) | ||
2771 | mark_inode_dirty_sync(inode); | ||
2772 | } else | ||
2773 | inode = NULL; | ||
2774 | } | ||
2775 | } | 2774 | } |
2776 | spin_unlock(&ip->i_flags_lock); | 2775 | spin_unlock(&ip->i_flags_lock); |
2777 | wake_up(&ip->i_ipin_wait); | 2776 | wake_up(&ip->i_ipin_wait); |
2778 | if (inode) | ||
2779 | iput(inode); | ||
2780 | } | 2777 | } |
2781 | } | 2778 | } |
2782 | 2779 | ||
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 4c5d73cbb901..bda774a04b8f 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -3827,11 +3827,16 @@ xfs_reclaim( | |||
3827 | */ | 3827 | */ |
3828 | xfs_synchronize_atime(ip); | 3828 | xfs_synchronize_atime(ip); |
3829 | 3829 | ||
3830 | /* If we have nothing to flush with this inode then complete the | 3830 | /* |
3831 | * teardown now, otherwise break the link between the xfs inode | 3831 | * If we have nothing to flush with this inode then complete the |
3832 | * and the linux inode and clean up the xfs inode later. This | 3832 | * teardown now, otherwise break the link between the xfs inode and the |
3833 | * avoids flushing the inode to disk during the delete operation | 3833 | * linux inode and clean up the xfs inode later. This avoids flushing |
3834 | * itself. | 3834 | * the inode to disk during the delete operation itself. |
3835 | * | ||
3836 | * When breaking the link, we need to set the XFS_IRECLAIMABLE flag | ||
3837 | * first to ensure that xfs_iunpin() will never see an xfs inode | ||
3838 | * that has a linux inode being reclaimed. Synchronisation is provided | ||
3839 | * by the i_flags_lock. | ||
3835 | */ | 3840 | */ |
3836 | if (!ip->i_update_core && (ip->i_itemp == NULL)) { | 3841 | if (!ip->i_update_core && (ip->i_itemp == NULL)) { |
3837 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 3842 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
@@ -3840,11 +3845,13 @@ xfs_reclaim( | |||
3840 | } else { | 3845 | } else { |
3841 | xfs_mount_t *mp = ip->i_mount; | 3846 | xfs_mount_t *mp = ip->i_mount; |
3842 | 3847 | ||
3843 | /* Protect sync from us */ | 3848 | /* Protect sync and unpin from us */ |
3844 | XFS_MOUNT_ILOCK(mp); | 3849 | XFS_MOUNT_ILOCK(mp); |
3850 | spin_lock(&ip->i_flags_lock); | ||
3851 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | ||
3845 | vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); | 3852 | vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); |
3853 | spin_unlock(&ip->i_flags_lock); | ||
3846 | list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); | 3854 | list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); |
3847 | xfs_iflags_set(ip, XFS_IRECLAIMABLE); | ||
3848 | XFS_MOUNT_IUNLOCK(mp); | 3855 | XFS_MOUNT_IUNLOCK(mp); |
3849 | } | 3856 | } |
3850 | return 0; | 3857 | return 0; |