aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2009-08-16 20:36:34 -0400
committerFelix Blyakher <felixb@sgi.com>2009-08-18 01:57:14 -0400
commita022fe09700365c51d1f55884bca9754eb96a802 (patch)
treef70d3ed8662bfd576f58f3382b5a119a8df32433
parent79dd43bb85d64ba14a781f940c858d7bbe8c9a6d (diff)
xfs: fix locking in xfs_iget_cache_hit
The locking in xfs_iget_cache_hit currently has numerous problems: - we clear the reclaim tag without i_flags_lock which protects modifications to it - we call inode_init_always which can sleep with pag_ici_lock held (this is oss.sgi.com BZ #819) - we acquire and drop i_flags_lock a lot and thus provide no consistency between the various flags we set/clear under it This patch fixes all that with a major revamp of the locking in the function. The new version acquires i_flags_lock early and only drops it once we need to call into inode_init_always or before calling xfs_ilock. This patch fixes a bug seen in the wild where we race modifying the reclaim tag. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Felix Blyakher <felixb@sgi.com> Reviewed-by: Eric Sandeen <sandeen@sandeen.net> Signed-off-by: Felix Blyakher <felixb@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c13
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h1
-rw-r--r--fs/xfs/xfs_iget.c113
3 files changed, 70 insertions, 57 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index fbf3e0288b34..320be6aea492 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -708,6 +708,16 @@ xfs_reclaim_inode(
708 return 0; 708 return 0;
709} 709}
710 710
711void
712__xfs_inode_set_reclaim_tag(
713 struct xfs_perag *pag,
714 struct xfs_inode *ip)
715{
716 radix_tree_tag_set(&pag->pag_ici_root,
717 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
718 XFS_ICI_RECLAIM_TAG);
719}
720
711/* 721/*
712 * We set the inode flag atomically with the radix tree tag. 722 * We set the inode flag atomically with the radix tree tag.
713 * Once we get tag lookups on the radix tree, this inode flag 723 * Once we get tag lookups on the radix tree, this inode flag
@@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag(
722 732
723 read_lock(&pag->pag_ici_lock); 733 read_lock(&pag->pag_ici_lock);
724 spin_lock(&ip->i_flags_lock); 734 spin_lock(&ip->i_flags_lock);
725 radix_tree_tag_set(&pag->pag_ici_root, 735 __xfs_inode_set_reclaim_tag(pag, ip);
726 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
727 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 736 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
728 spin_unlock(&ip->i_flags_lock); 737 spin_unlock(&ip->i_flags_lock);
729 read_unlock(&pag->pag_ici_lock); 738 read_unlock(&pag->pag_ici_lock);
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 23e7e7e6e136..27920eb7a820 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -48,6 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
48int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 48int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
49 49
50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
51void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
51void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, 52void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
52 struct xfs_inode *ip); 53 struct xfs_inode *ip);
53 54
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 76c540f719e4..91adfab2f45f 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -134,80 +134,82 @@ xfs_iget_cache_hit(
134 int flags, 134 int flags,
135 int lock_flags) __releases(pag->pag_ici_lock) 135 int lock_flags) __releases(pag->pag_ici_lock)
136{ 136{
137 struct inode *inode = VFS_I(ip);
137 struct xfs_mount *mp = ip->i_mount; 138 struct xfs_mount *mp = ip->i_mount;
138 int error = EAGAIN; 139 int error;
140
141 spin_lock(&ip->i_flags_lock);
139 142
140 /* 143 /*
141 * If INEW is set this inode is being set up 144 * If we are racing with another cache hit that is currently
142 * If IRECLAIM is set this inode is being torn down 145 * instantiating this inode or currently recycling it out of
143 * Pause and try again. 146 * reclaimabe state, wait for the initialisation to complete
147 * before continuing.
148 *
149 * XXX(hch): eventually we should do something equivalent to
150 * wait_on_inode to wait for these flags to be cleared
151 * instead of polling for it.
144 */ 152 */
145 if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { 153 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
146 XFS_STATS_INC(xs_ig_frecycle); 154 XFS_STATS_INC(xs_ig_frecycle);
155 error = EAGAIN;
147 goto out_error; 156 goto out_error;
148 } 157 }
149 158
150 /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ 159 /*
151 if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { 160 * If lookup is racing with unlink return an error immediately.
152 161 */
153 /* 162 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
154 * If lookup is racing with unlink, then we should return an 163 error = ENOENT;
155 * error immediately so we don't remove it from the reclaim 164 goto out_error;
156 * list and potentially leak the inode. 165 }
157 */
158 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
159 error = ENOENT;
160 goto out_error;
161 }
162 166
167 /*
168 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
169 * Need to carefully get it back into useable state.
170 */
171 if (ip->i_flags & XFS_IRECLAIMABLE) {
163 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 172 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
164 173
165 /* 174 /*
166 * We need to re-initialise the VFS inode as it has been 175 * We need to set XFS_INEW atomically with clearing the
167 * 'freed' by the VFS. Do this here so we can deal with 176 * reclaimable tag so that we do have an indicator of the
168 * errors cleanly, then tag it so it can be set up correctly 177 * inode still being initialized.
169 * later.
170 */ 178 */
171 if (!inode_init_always(mp->m_super, VFS_I(ip))) { 179 ip->i_flags |= XFS_INEW;
172 error = ENOMEM; 180 ip->i_flags &= ~XFS_IRECLAIMABLE;
173 goto out_error; 181 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
174 }
175 182
176 /* 183 spin_unlock(&ip->i_flags_lock);
177 * We must set the XFS_INEW flag before clearing the 184 read_unlock(&pag->pag_ici_lock);
178 * XFS_IRECLAIMABLE flag so that if a racing lookup does
179 * not find the XFS_IRECLAIMABLE above but has the igrab()
180 * below succeed we can safely check XFS_INEW to detect
181 * that this inode is still being initialised.
182 */
183 xfs_iflags_set(ip, XFS_INEW);
184 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
185 185
186 /* clear the radix tree reclaim flag as well. */ 186 error = -inode_init_always(mp->m_super, inode);
187 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 187 if (error) {
188 } else if (!igrab(VFS_I(ip))) { 188 /*
189 * Re-initializing the inode failed, and we are in deep
190 * trouble. Try to re-add it to the reclaim list.
191 */
192 read_lock(&pag->pag_ici_lock);
193 spin_lock(&ip->i_flags_lock);
194
195 ip->i_flags &= ~XFS_INEW;
196 ip->i_flags |= XFS_IRECLAIMABLE;
197 __xfs_inode_set_reclaim_tag(pag, ip);
198 goto out_error;
199 }
200 inode->i_state = I_LOCK|I_NEW;
201 } else {
189 /* If the VFS inode is being torn down, pause and try again. */ 202 /* If the VFS inode is being torn down, pause and try again. */
190 XFS_STATS_INC(xs_ig_frecycle); 203 if (!igrab(inode)) {
191 goto out_error; 204 error = EAGAIN;
192 } else if (xfs_iflags_test(ip, XFS_INEW)) { 205 goto out_error;
193 /* 206 }
194 * We are racing with another cache hit that is
195 * currently recycling this inode out of the XFS_IRECLAIMABLE
196 * state. Wait for the initialisation to complete before
197 * continuing.
198 */
199 wait_on_inode(VFS_I(ip));
200 }
201 207
202 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { 208 /* We've got a live one. */
203 error = ENOENT; 209 spin_unlock(&ip->i_flags_lock);
204 iput(VFS_I(ip)); 210 read_unlock(&pag->pag_ici_lock);
205 goto out_error;
206 } 211 }
207 212
208 /* We've got a live one. */
209 read_unlock(&pag->pag_ici_lock);
210
211 if (lock_flags != 0) 213 if (lock_flags != 0)
212 xfs_ilock(ip, lock_flags); 214 xfs_ilock(ip, lock_flags);
213 215
@@ -217,6 +219,7 @@ xfs_iget_cache_hit(
217 return 0; 219 return 0;
218 220
219out_error: 221out_error:
222 spin_unlock(&ip->i_flags_lock);
220 read_unlock(&pag->pag_ici_lock); 223 read_unlock(&pag->pag_ici_lock);
221 return error; 224 return error;
222} 225}