diff options
author | Christoph Hellwig <hch@infradead.org> | 2009-08-16 20:36:34 -0400 |
---|---|---|
committer | Felix Blyakher <felixb@sgi.com> | 2009-08-18 01:57:14 -0400 |
commit | a022fe09700365c51d1f55884bca9754eb96a802 (patch) | |
tree | f70d3ed8662bfd576f58f3382b5a119a8df32433 | |
parent | 79dd43bb85d64ba14a781f940c858d7bbe8c9a6d (diff) |
xfs: fix locking in xfs_iget_cache_hit
The locking in xfs_iget_cache_hit currently has numerous problems:
- we clear the reclaim tag without i_flags_lock which protects
modifications to it
- we call inode_init_always which can sleep with pag_ici_lock
held (this is oss.sgi.com BZ #819)
- we acquire and drop i_flags_lock a lot and thus provide no
consistency between the various flags we set/clear under it
This patch fixes all that with a major revamp of the locking in
the function. The new version acquires i_flags_lock early and
only drops it once we need to call into inode_init_always or before
calling xfs_ilock.
This patch fixes a bug seen in the wild where we race modifying the
reclaim tag.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Felix Blyakher <felixb@sgi.com>
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 13 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_iget.c | 113 |
3 files changed, 70 insertions, 57 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index fbf3e0288b34..320be6aea492 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -708,6 +708,16 @@ xfs_reclaim_inode( | |||
708 | return 0; | 708 | return 0; |
709 | } | 709 | } |
710 | 710 | ||
711 | void | ||
712 | __xfs_inode_set_reclaim_tag( | ||
713 | struct xfs_perag *pag, | ||
714 | struct xfs_inode *ip) | ||
715 | { | ||
716 | radix_tree_tag_set(&pag->pag_ici_root, | ||
717 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), | ||
718 | XFS_ICI_RECLAIM_TAG); | ||
719 | } | ||
720 | |||
711 | /* | 721 | /* |
712 | * We set the inode flag atomically with the radix tree tag. | 722 | * We set the inode flag atomically with the radix tree tag. |
713 | * Once we get tag lookups on the radix tree, this inode flag | 723 | * Once we get tag lookups on the radix tree, this inode flag |
@@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag( | |||
722 | 732 | ||
723 | read_lock(&pag->pag_ici_lock); | 733 | read_lock(&pag->pag_ici_lock); |
724 | spin_lock(&ip->i_flags_lock); | 734 | spin_lock(&ip->i_flags_lock); |
725 | radix_tree_tag_set(&pag->pag_ici_root, | 735 | __xfs_inode_set_reclaim_tag(pag, ip); |
726 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
727 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | 736 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); |
728 | spin_unlock(&ip->i_flags_lock); | 737 | spin_unlock(&ip->i_flags_lock); |
729 | read_unlock(&pag->pag_ici_lock); | 738 | read_unlock(&pag->pag_ici_lock); |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 23e7e7e6e136..27920eb7a820 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -48,6 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); | |||
48 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); | 48 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); |
49 | 49 | ||
50 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); | 50 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); |
51 | void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); | ||
51 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, | 52 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, |
52 | struct xfs_inode *ip); | 53 | struct xfs_inode *ip); |
53 | 54 | ||
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 76c540f719e4..91adfab2f45f 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -134,80 +134,82 @@ xfs_iget_cache_hit( | |||
134 | int flags, | 134 | int flags, |
135 | int lock_flags) __releases(pag->pag_ici_lock) | 135 | int lock_flags) __releases(pag->pag_ici_lock) |
136 | { | 136 | { |
137 | struct inode *inode = VFS_I(ip); | ||
137 | struct xfs_mount *mp = ip->i_mount; | 138 | struct xfs_mount *mp = ip->i_mount; |
138 | int error = EAGAIN; | 139 | int error; |
140 | |||
141 | spin_lock(&ip->i_flags_lock); | ||
139 | 142 | ||
140 | /* | 143 | /* |
141 | * If INEW is set this inode is being set up | 144 | * If we are racing with another cache hit that is currently |
142 | * If IRECLAIM is set this inode is being torn down | 145 | * instantiating this inode or currently recycling it out of |
143 | * Pause and try again. | 146 | * reclaimabe state, wait for the initialisation to complete |
147 | * before continuing. | ||
148 | * | ||
149 | * XXX(hch): eventually we should do something equivalent to | ||
150 | * wait_on_inode to wait for these flags to be cleared | ||
151 | * instead of polling for it. | ||
144 | */ | 152 | */ |
145 | if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { | 153 | if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { |
146 | XFS_STATS_INC(xs_ig_frecycle); | 154 | XFS_STATS_INC(xs_ig_frecycle); |
155 | error = EAGAIN; | ||
147 | goto out_error; | 156 | goto out_error; |
148 | } | 157 | } |
149 | 158 | ||
150 | /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ | 159 | /* |
151 | if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { | 160 | * If lookup is racing with unlink return an error immediately. |
152 | 161 | */ | |
153 | /* | 162 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { |
154 | * If lookup is racing with unlink, then we should return an | 163 | error = ENOENT; |
155 | * error immediately so we don't remove it from the reclaim | 164 | goto out_error; |
156 | * list and potentially leak the inode. | 165 | } |
157 | */ | ||
158 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | ||
159 | error = ENOENT; | ||
160 | goto out_error; | ||
161 | } | ||
162 | 166 | ||
167 | /* | ||
168 | * If IRECLAIMABLE is set, we've torn down the VFS inode already. | ||
169 | * Need to carefully get it back into useable state. | ||
170 | */ | ||
171 | if (ip->i_flags & XFS_IRECLAIMABLE) { | ||
163 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); | 172 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); |
164 | 173 | ||
165 | /* | 174 | /* |
166 | * We need to re-initialise the VFS inode as it has been | 175 | * We need to set XFS_INEW atomically with clearing the |
167 | * 'freed' by the VFS. Do this here so we can deal with | 176 | * reclaimable tag so that we do have an indicator of the |
168 | * errors cleanly, then tag it so it can be set up correctly | 177 | * inode still being initialized. |
169 | * later. | ||
170 | */ | 178 | */ |
171 | if (!inode_init_always(mp->m_super, VFS_I(ip))) { | 179 | ip->i_flags |= XFS_INEW; |
172 | error = ENOMEM; | 180 | ip->i_flags &= ~XFS_IRECLAIMABLE; |
173 | goto out_error; | 181 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); |
174 | } | ||
175 | 182 | ||
176 | /* | 183 | spin_unlock(&ip->i_flags_lock); |
177 | * We must set the XFS_INEW flag before clearing the | 184 | read_unlock(&pag->pag_ici_lock); |
178 | * XFS_IRECLAIMABLE flag so that if a racing lookup does | ||
179 | * not find the XFS_IRECLAIMABLE above but has the igrab() | ||
180 | * below succeed we can safely check XFS_INEW to detect | ||
181 | * that this inode is still being initialised. | ||
182 | */ | ||
183 | xfs_iflags_set(ip, XFS_INEW); | ||
184 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); | ||
185 | 185 | ||
186 | /* clear the radix tree reclaim flag as well. */ | 186 | error = -inode_init_always(mp->m_super, inode); |
187 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 187 | if (error) { |
188 | } else if (!igrab(VFS_I(ip))) { | 188 | /* |
189 | * Re-initializing the inode failed, and we are in deep | ||
190 | * trouble. Try to re-add it to the reclaim list. | ||
191 | */ | ||
192 | read_lock(&pag->pag_ici_lock); | ||
193 | spin_lock(&ip->i_flags_lock); | ||
194 | |||
195 | ip->i_flags &= ~XFS_INEW; | ||
196 | ip->i_flags |= XFS_IRECLAIMABLE; | ||
197 | __xfs_inode_set_reclaim_tag(pag, ip); | ||
198 | goto out_error; | ||
199 | } | ||
200 | inode->i_state = I_LOCK|I_NEW; | ||
201 | } else { | ||
189 | /* If the VFS inode is being torn down, pause and try again. */ | 202 | /* If the VFS inode is being torn down, pause and try again. */ |
190 | XFS_STATS_INC(xs_ig_frecycle); | 203 | if (!igrab(inode)) { |
191 | goto out_error; | 204 | error = EAGAIN; |
192 | } else if (xfs_iflags_test(ip, XFS_INEW)) { | 205 | goto out_error; |
193 | /* | 206 | } |
194 | * We are racing with another cache hit that is | ||
195 | * currently recycling this inode out of the XFS_IRECLAIMABLE | ||
196 | * state. Wait for the initialisation to complete before | ||
197 | * continuing. | ||
198 | */ | ||
199 | wait_on_inode(VFS_I(ip)); | ||
200 | } | ||
201 | 207 | ||
202 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { | 208 | /* We've got a live one. */ |
203 | error = ENOENT; | 209 | spin_unlock(&ip->i_flags_lock); |
204 | iput(VFS_I(ip)); | 210 | read_unlock(&pag->pag_ici_lock); |
205 | goto out_error; | ||
206 | } | 211 | } |
207 | 212 | ||
208 | /* We've got a live one. */ | ||
209 | read_unlock(&pag->pag_ici_lock); | ||
210 | |||
211 | if (lock_flags != 0) | 213 | if (lock_flags != 0) |
212 | xfs_ilock(ip, lock_flags); | 214 | xfs_ilock(ip, lock_flags); |
213 | 215 | ||
@@ -217,6 +219,7 @@ xfs_iget_cache_hit( | |||
217 | return 0; | 219 | return 0; |
218 | 220 | ||
219 | out_error: | 221 | out_error: |
222 | spin_unlock(&ip->i_flags_lock); | ||
220 | read_unlock(&pag->pag_ici_lock); | 223 | read_unlock(&pag->pag_ici_lock); |
221 | return error; | 224 | return error; |
222 | } | 225 | } |