aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Chinner <david@fromorbit.com>2008-10-30 03:32:43 -0400
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-10-30 03:32:43 -0400
commit6bfb3d065f4c498c17a3a07f3dc08cedff53aff4 (patch)
treec5c528c77e44584616a175e0dcc89713e7b76d0a
parente0b8e8b65d578f5d5538465dff8392cf02e1cc5d (diff)
[XFS] Fix race when looking up reclaimable inodes
If we get a race looking up a reclaimable inode, we can end up with the winner proceeding to use the inode before it has been completely re-initialised. This is a Bad Thing. Fix the race by checking whether we are still initialising the inod eonce we have a reference to it, and if so wait for the initialisation to complete before continuing. While there, fix a leaked reference count in the same code when encountering an unlinked inode and we are not doing a lookup for a create operation. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32429a Signed-off-by: David Chinner <david@fromorbit.com> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/xfs_iget.c32
2 files changed, 23 insertions, 10 deletions
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 214717650b2..77d6ddcaf54 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -77,6 +77,7 @@
77#include <linux/spinlock.h> 77#include <linux/spinlock.h>
78#include <linux/random.h> 78#include <linux/random.h>
79#include <linux/ctype.h> 79#include <linux/ctype.h>
80#include <linux/writeback.h>
80 81
81#include <asm/page.h> 82#include <asm/page.h>
82#include <asm/div64.h> 83#include <asm/div64.h>
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 837cae78153..bf4dc5eb4cf 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -52,7 +52,7 @@ xfs_iget_cache_hit(
52 int lock_flags) __releases(pag->pag_ici_lock) 52 int lock_flags) __releases(pag->pag_ici_lock)
53{ 53{
54 struct xfs_mount *mp = ip->i_mount; 54 struct xfs_mount *mp = ip->i_mount;
55 int error = 0; 55 int error = EAGAIN;
56 56
57 /* 57 /*
58 * If INEW is set this inode is being set up 58 * If INEW is set this inode is being set up
@@ -60,7 +60,6 @@ xfs_iget_cache_hit(
60 * Pause and try again. 60 * Pause and try again.
61 */ 61 */
62 if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { 62 if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) {
63 error = EAGAIN;
64 XFS_STATS_INC(xs_ig_frecycle); 63 XFS_STATS_INC(xs_ig_frecycle);
65 goto out_error; 64 goto out_error;
66 } 65 }
@@ -73,7 +72,6 @@ xfs_iget_cache_hit(
73 * error immediately so we don't remove it from the reclaim 72 * error immediately so we don't remove it from the reclaim
74 * list and potentially leak the inode. 73 * list and potentially leak the inode.
75 */ 74 */
76
77 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 75 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
78 error = ENOENT; 76 error = ENOENT;
79 goto out_error; 77 goto out_error;
@@ -91,27 +89,42 @@ xfs_iget_cache_hit(
91 error = ENOMEM; 89 error = ENOMEM;
92 goto out_error; 90 goto out_error;
93 } 91 }
92
93 /*
94 * We must set the XFS_INEW flag before clearing the
95 * XFS_IRECLAIMABLE flag so that if a racing lookup does
96 * not find the XFS_IRECLAIMABLE above but has the igrab()
97 * below succeed we can safely check XFS_INEW to detect
98 * that this inode is still being initialised.
99 */
94 xfs_iflags_set(ip, XFS_INEW); 100 xfs_iflags_set(ip, XFS_INEW);
95 xfs_iflags_clear(ip, XFS_IRECLAIMABLE); 101 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
96 102
97 /* clear the radix tree reclaim flag as well. */ 103 /* clear the radix tree reclaim flag as well. */
98 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 104 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
99 read_unlock(&pag->pag_ici_lock);
100 } else if (!igrab(VFS_I(ip))) { 105 } else if (!igrab(VFS_I(ip))) {
101 /* If the VFS inode is being torn down, pause and try again. */ 106 /* If the VFS inode is being torn down, pause and try again. */
102 error = EAGAIN;
103 XFS_STATS_INC(xs_ig_frecycle); 107 XFS_STATS_INC(xs_ig_frecycle);
104 goto out_error; 108 goto out_error;
105 } else { 109 } else if (xfs_iflags_test(ip, XFS_INEW)) {
106 /* we've got a live one */ 110 /*
107 read_unlock(&pag->pag_ici_lock); 111 * We are racing with another cache hit that is
112 * currently recycling this inode out of the XFS_IRECLAIMABLE
113 * state. Wait for the initialisation to complete before
114 * continuing.
115 */
116 wait_on_inode(VFS_I(ip));
108 } 117 }
109 118
110 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { 119 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
111 error = ENOENT; 120 error = ENOENT;
112 goto out; 121 iput(VFS_I(ip));
122 goto out_error;
113 } 123 }
114 124
125 /* We've got a live one. */
126 read_unlock(&pag->pag_ici_lock);
127
115 if (lock_flags != 0) 128 if (lock_flags != 0)
116 xfs_ilock(ip, lock_flags); 129 xfs_ilock(ip, lock_flags);
117 130
@@ -122,7 +135,6 @@ xfs_iget_cache_hit(
122 135
123out_error: 136out_error:
124 read_unlock(&pag->pag_ici_lock); 137 read_unlock(&pag->pag_ici_lock);
125out:
126 return error; 138 return error;
127} 139}
128 140