diff options
author | Christoph Hellwig <hch@infradead.org> | 2009-08-16 20:36:34 -0400 |
---|---|---|
committer | Felix Blyakher <felixb@sgi.com> | 2009-08-17 02:23:48 -0400 |
commit | bc990f5cb424cdca9dda866785d088e2c2110ecc (patch) | |
tree | 4264beb9538533d136e41f0e93933160fe925008 | |
parent | 894ef820b10d77e2d6d717342fc408bdd9825139 (diff) |
xfs: fix locking in xfs_iget_cache_hit
The locking in xfs_iget_cache_hit currently has numerous problems:
- we clear the reclaim tag without i_flags_lock which protects
modifications to it
- we call inode_init_always which can sleep with pag_ici_lock
held (this is oss.sgi.com BZ #819)
- we acquire and drop i_flags_lock a lot and thus provide no
consistency between the various flags we set/clear under it
This patch fixes all that with a major revamp of the locking in
the function. The new version acquires i_flags_lock early and
only drops it once we need to call into inode_init_always or before
calling xfs_ilock.
This patch fixes a bug seen in the wild where we race modifying the
reclaim tag.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Felix Blyakher <felixb@sgi.com>
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 13 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_iget.c | 113 |
3 files changed, 70 insertions, 57 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index b619d6b8ca43..98ef624d9baf 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -708,6 +708,16 @@ xfs_reclaim_inode( | |||
708 | return 0; | 708 | return 0; |
709 | } | 709 | } |
710 | 710 | ||
711 | void | ||
712 | __xfs_inode_set_reclaim_tag( | ||
713 | struct xfs_perag *pag, | ||
714 | struct xfs_inode *ip) | ||
715 | { | ||
716 | radix_tree_tag_set(&pag->pag_ici_root, | ||
717 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), | ||
718 | XFS_ICI_RECLAIM_TAG); | ||
719 | } | ||
720 | |||
711 | /* | 721 | /* |
712 | * We set the inode flag atomically with the radix tree tag. | 722 | * We set the inode flag atomically with the radix tree tag. |
713 | * Once we get tag lookups on the radix tree, this inode flag | 723 | * Once we get tag lookups on the radix tree, this inode flag |
@@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag( | |||
722 | 732 | ||
723 | read_lock(&pag->pag_ici_lock); | 733 | read_lock(&pag->pag_ici_lock); |
724 | spin_lock(&ip->i_flags_lock); | 734 | spin_lock(&ip->i_flags_lock); |
725 | radix_tree_tag_set(&pag->pag_ici_root, | 735 | __xfs_inode_set_reclaim_tag(pag, ip); |
726 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
727 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | 736 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); |
728 | spin_unlock(&ip->i_flags_lock); | 737 | spin_unlock(&ip->i_flags_lock); |
729 | read_unlock(&pag->pag_ici_lock); | 738 | read_unlock(&pag->pag_ici_lock); |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 2a10301c99c7..59120602588a 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -48,6 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); | |||
48 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); | 48 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); |
49 | 49 | ||
50 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); | 50 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); |
51 | void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); | ||
51 | void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); | 52 | void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); |
52 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, | 53 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, |
53 | struct xfs_inode *ip); | 54 | struct xfs_inode *ip); |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 34ec86923f7e..ecbf8b4d2e2e 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -191,80 +191,82 @@ xfs_iget_cache_hit( | |||
191 | int flags, | 191 | int flags, |
192 | int lock_flags) __releases(pag->pag_ici_lock) | 192 | int lock_flags) __releases(pag->pag_ici_lock) |
193 | { | 193 | { |
194 | struct inode *inode = VFS_I(ip); | ||
194 | struct xfs_mount *mp = ip->i_mount; | 195 | struct xfs_mount *mp = ip->i_mount; |
195 | int error = EAGAIN; | 196 | int error; |
197 | |||
198 | spin_lock(&ip->i_flags_lock); | ||
196 | 199 | ||
197 | /* | 200 | /* |
198 | * If INEW is set this inode is being set up | 201 | * If we are racing with another cache hit that is currently |
199 | * If IRECLAIM is set this inode is being torn down | 202 | * instantiating this inode or currently recycling it out of |
200 | * Pause and try again. | 203 | * reclaimabe state, wait for the initialisation to complete |
204 | * before continuing. | ||
205 | * | ||
206 | * XXX(hch): eventually we should do something equivalent to | ||
207 | * wait_on_inode to wait for these flags to be cleared | ||
208 | * instead of polling for it. | ||
201 | */ | 209 | */ |
202 | if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { | 210 | if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { |
203 | XFS_STATS_INC(xs_ig_frecycle); | 211 | XFS_STATS_INC(xs_ig_frecycle); |
212 | error = EAGAIN; | ||
204 | goto out_error; | 213 | goto out_error; |
205 | } | 214 | } |
206 | 215 | ||
207 | /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ | 216 | /* |
208 | if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { | 217 | * If lookup is racing with unlink return an error immediately. |
209 | 218 | */ | |
210 | /* | 219 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { |
211 | * If lookup is racing with unlink, then we should return an | 220 | error = ENOENT; |
212 | * error immediately so we don't remove it from the reclaim | 221 | goto out_error; |
213 | * list and potentially leak the inode. | 222 | } |
214 | */ | ||
215 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | ||
216 | error = ENOENT; | ||
217 | goto out_error; | ||
218 | } | ||
219 | 223 | ||
224 | /* | ||
225 | * If IRECLAIMABLE is set, we've torn down the VFS inode already. | ||
226 | * Need to carefully get it back into useable state. | ||
227 | */ | ||
228 | if (ip->i_flags & XFS_IRECLAIMABLE) { | ||
220 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); | 229 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); |
221 | 230 | ||
222 | /* | 231 | /* |
223 | * We need to re-initialise the VFS inode as it has been | 232 | * We need to set XFS_INEW atomically with clearing the |
224 | * 'freed' by the VFS. Do this here so we can deal with | 233 | * reclaimable tag so that we do have an indicator of the |
225 | * errors cleanly, then tag it so it can be set up correctly | 234 | * inode still being initialized. |
226 | * later. | ||
227 | */ | 235 | */ |
228 | if (inode_init_always(mp->m_super, VFS_I(ip))) { | 236 | ip->i_flags |= XFS_INEW; |
229 | error = ENOMEM; | 237 | ip->i_flags &= ~XFS_IRECLAIMABLE; |
230 | goto out_error; | 238 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); |
231 | } | ||
232 | 239 | ||
233 | /* | 240 | spin_unlock(&ip->i_flags_lock); |
234 | * We must set the XFS_INEW flag before clearing the | 241 | read_unlock(&pag->pag_ici_lock); |
235 | * XFS_IRECLAIMABLE flag so that if a racing lookup does | ||
236 | * not find the XFS_IRECLAIMABLE above but has the igrab() | ||
237 | * below succeed we can safely check XFS_INEW to detect | ||
238 | * that this inode is still being initialised. | ||
239 | */ | ||
240 | xfs_iflags_set(ip, XFS_INEW); | ||
241 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); | ||
242 | 242 | ||
243 | /* clear the radix tree reclaim flag as well. */ | 243 | error = -inode_init_always(mp->m_super, inode); |
244 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 244 | if (error) { |
245 | } else if (!igrab(VFS_I(ip))) { | 245 | /* |
246 | * Re-initializing the inode failed, and we are in deep | ||
247 | * trouble. Try to re-add it to the reclaim list. | ||
248 | */ | ||
249 | read_lock(&pag->pag_ici_lock); | ||
250 | spin_lock(&ip->i_flags_lock); | ||
251 | |||
252 | ip->i_flags &= ~XFS_INEW; | ||
253 | ip->i_flags |= XFS_IRECLAIMABLE; | ||
254 | __xfs_inode_set_reclaim_tag(pag, ip); | ||
255 | goto out_error; | ||
256 | } | ||
257 | inode->i_state = I_LOCK|I_NEW; | ||
258 | } else { | ||
246 | /* If the VFS inode is being torn down, pause and try again. */ | 259 | /* If the VFS inode is being torn down, pause and try again. */ |
247 | XFS_STATS_INC(xs_ig_frecycle); | 260 | if (!igrab(inode)) { |
248 | goto out_error; | 261 | error = EAGAIN; |
249 | } else if (xfs_iflags_test(ip, XFS_INEW)) { | 262 | goto out_error; |
250 | /* | 263 | } |
251 | * We are racing with another cache hit that is | ||
252 | * currently recycling this inode out of the XFS_IRECLAIMABLE | ||
253 | * state. Wait for the initialisation to complete before | ||
254 | * continuing. | ||
255 | */ | ||
256 | wait_on_inode(VFS_I(ip)); | ||
257 | } | ||
258 | 264 | ||
259 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { | 265 | /* We've got a live one. */ |
260 | error = ENOENT; | 266 | spin_unlock(&ip->i_flags_lock); |
261 | iput(VFS_I(ip)); | 267 | read_unlock(&pag->pag_ici_lock); |
262 | goto out_error; | ||
263 | } | 268 | } |
264 | 269 | ||
265 | /* We've got a live one. */ | ||
266 | read_unlock(&pag->pag_ici_lock); | ||
267 | |||
268 | if (lock_flags != 0) | 270 | if (lock_flags != 0) |
269 | xfs_ilock(ip, lock_flags); | 271 | xfs_ilock(ip, lock_flags); |
270 | 272 | ||
@@ -274,6 +276,7 @@ xfs_iget_cache_hit( | |||
274 | return 0; | 276 | return 0; |
275 | 277 | ||
276 | out_error: | 278 | out_error: |
279 | spin_unlock(&ip->i_flags_lock); | ||
277 | read_unlock(&pag->pag_ici_lock); | 280 | read_unlock(&pag->pag_ici_lock); |
278 | return error; | 281 | return error; |
279 | } | 282 | } |