diff options
author | David Chinner <david@fromorbit.com> | 2008-10-30 03:32:43 -0400 |
---|---|---|
committer | Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> | 2008-10-30 03:32:43 -0400 |
commit | 6bfb3d065f4c498c17a3a07f3dc08cedff53aff4 (patch) | |
tree | c5c528c77e44584616a175e0dcc89713e7b76d0a | |
parent | e0b8e8b65d578f5d5538465dff8392cf02e1cc5d (diff) |
[XFS] Fix race when looking up reclaimable inodes
If we get a race looking up a reclaimable inode, we can end up with the
winner proceeding to use the inode before it has been completely
re-initialised. This is a Bad Thing.
Fix the race by checking whether we are still initialising the inod eonce
we have a reference to it, and if so wait for the initialisation to
complete before continuing.
While there, fix a leaked reference count in the same code when
encountering an unlinked inode and we are not doing a lookup for a create
operation.
SGI-PV: 987246
SGI-Modid: xfs-linux-melb:xfs-kern:32429a
Signed-off-by: David Chinner <david@fromorbit.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
-rw-r--r-- | fs/xfs/linux-2.6/xfs_linux.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_iget.c | 32 |
2 files changed, 23 insertions, 10 deletions
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 214717650b23..77d6ddcaf547 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -77,6 +77,7 @@ | |||
77 | #include <linux/spinlock.h> | 77 | #include <linux/spinlock.h> |
78 | #include <linux/random.h> | 78 | #include <linux/random.h> |
79 | #include <linux/ctype.h> | 79 | #include <linux/ctype.h> |
80 | #include <linux/writeback.h> | ||
80 | 81 | ||
81 | #include <asm/page.h> | 82 | #include <asm/page.h> |
82 | #include <asm/div64.h> | 83 | #include <asm/div64.h> |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 837cae781536..bf4dc5eb4cfc 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -52,7 +52,7 @@ xfs_iget_cache_hit( | |||
52 | int lock_flags) __releases(pag->pag_ici_lock) | 52 | int lock_flags) __releases(pag->pag_ici_lock) |
53 | { | 53 | { |
54 | struct xfs_mount *mp = ip->i_mount; | 54 | struct xfs_mount *mp = ip->i_mount; |
55 | int error = 0; | 55 | int error = EAGAIN; |
56 | 56 | ||
57 | /* | 57 | /* |
58 | * If INEW is set this inode is being set up | 58 | * If INEW is set this inode is being set up |
@@ -60,7 +60,6 @@ xfs_iget_cache_hit( | |||
60 | * Pause and try again. | 60 | * Pause and try again. |
61 | */ | 61 | */ |
62 | if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { | 62 | if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { |
63 | error = EAGAIN; | ||
64 | XFS_STATS_INC(xs_ig_frecycle); | 63 | XFS_STATS_INC(xs_ig_frecycle); |
65 | goto out_error; | 64 | goto out_error; |
66 | } | 65 | } |
@@ -73,7 +72,6 @@ xfs_iget_cache_hit( | |||
73 | * error immediately so we don't remove it from the reclaim | 72 | * error immediately so we don't remove it from the reclaim |
74 | * list and potentially leak the inode. | 73 | * list and potentially leak the inode. |
75 | */ | 74 | */ |
76 | |||
77 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | 75 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { |
78 | error = ENOENT; | 76 | error = ENOENT; |
79 | goto out_error; | 77 | goto out_error; |
@@ -91,27 +89,42 @@ xfs_iget_cache_hit( | |||
91 | error = ENOMEM; | 89 | error = ENOMEM; |
92 | goto out_error; | 90 | goto out_error; |
93 | } | 91 | } |
92 | |||
93 | /* | ||
94 | * We must set the XFS_INEW flag before clearing the | ||
95 | * XFS_IRECLAIMABLE flag so that if a racing lookup does | ||
96 | * not find the XFS_IRECLAIMABLE above but has the igrab() | ||
97 | * below succeed we can safely check XFS_INEW to detect | ||
98 | * that this inode is still being initialised. | ||
99 | */ | ||
94 | xfs_iflags_set(ip, XFS_INEW); | 100 | xfs_iflags_set(ip, XFS_INEW); |
95 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); | 101 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); |
96 | 102 | ||
97 | /* clear the radix tree reclaim flag as well. */ | 103 | /* clear the radix tree reclaim flag as well. */ |
98 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 104 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); |
99 | read_unlock(&pag->pag_ici_lock); | ||
100 | } else if (!igrab(VFS_I(ip))) { | 105 | } else if (!igrab(VFS_I(ip))) { |
101 | /* If the VFS inode is being torn down, pause and try again. */ | 106 | /* If the VFS inode is being torn down, pause and try again. */ |
102 | error = EAGAIN; | ||
103 | XFS_STATS_INC(xs_ig_frecycle); | 107 | XFS_STATS_INC(xs_ig_frecycle); |
104 | goto out_error; | 108 | goto out_error; |
105 | } else { | 109 | } else if (xfs_iflags_test(ip, XFS_INEW)) { |
106 | /* we've got a live one */ | 110 | /* |
107 | read_unlock(&pag->pag_ici_lock); | 111 | * We are racing with another cache hit that is |
112 | * currently recycling this inode out of the XFS_IRECLAIMABLE | ||
113 | * state. Wait for the initialisation to complete before | ||
114 | * continuing. | ||
115 | */ | ||
116 | wait_on_inode(VFS_I(ip)); | ||
108 | } | 117 | } |
109 | 118 | ||
110 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { | 119 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { |
111 | error = ENOENT; | 120 | error = ENOENT; |
112 | goto out; | 121 | iput(VFS_I(ip)); |
122 | goto out_error; | ||
113 | } | 123 | } |
114 | 124 | ||
125 | /* We've got a live one. */ | ||
126 | read_unlock(&pag->pag_ici_lock); | ||
127 | |||
115 | if (lock_flags != 0) | 128 | if (lock_flags != 0) |
116 | xfs_ilock(ip, lock_flags); | 129 | xfs_ilock(ip, lock_flags); |
117 | 130 | ||
@@ -122,7 +135,6 @@ xfs_iget_cache_hit( | |||
122 | 135 | ||
123 | out_error: | 136 | out_error: |
124 | read_unlock(&pag->pag_ici_lock); | 137 | read_unlock(&pag->pag_ici_lock); |
125 | out: | ||
126 | return error; | 138 | return error; |
127 | } | 139 | } |
128 | 140 | ||