diff options
Diffstat (limited to 'fs/xfs/xfs_iget.c')
-rw-r--r-- | fs/xfs/xfs_iget.c | 253 |
1 files changed, 130 insertions, 123 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 5fcec6f020a7..ecbf8b4d2e2e 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -64,6 +64,10 @@ xfs_inode_alloc( | |||
64 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); | 64 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); |
65 | if (!ip) | 65 | if (!ip) |
66 | return NULL; | 66 | return NULL; |
67 | if (inode_init_always(mp->m_super, VFS_I(ip))) { | ||
68 | kmem_zone_free(xfs_inode_zone, ip); | ||
69 | return NULL; | ||
70 | } | ||
67 | 71 | ||
68 | ASSERT(atomic_read(&ip->i_iocount) == 0); | 72 | ASSERT(atomic_read(&ip->i_iocount) == 0); |
69 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 73 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
@@ -105,17 +109,6 @@ xfs_inode_alloc( | |||
105 | #ifdef XFS_DIR2_TRACE | 109 | #ifdef XFS_DIR2_TRACE |
106 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); | 110 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); |
107 | #endif | 111 | #endif |
108 | /* | ||
109 | * Now initialise the VFS inode. We do this after the xfs_inode | ||
110 | * initialisation as internal failures will result in ->destroy_inode | ||
111 | * being called and that will pass down through the reclaim path and | ||
112 | * free the XFS inode. This path requires the XFS inode to already be | ||
113 | * initialised. Hence if this call fails, the xfs_inode has already | ||
114 | * been freed and we should not reference it at all in the error | ||
115 | * handling. | ||
116 | */ | ||
117 | if (!inode_init_always(mp->m_super, VFS_I(ip))) | ||
118 | return NULL; | ||
119 | 112 | ||
120 | /* prevent anyone from using this yet */ | 113 | /* prevent anyone from using this yet */ |
121 | VFS_I(ip)->i_state = I_NEW|I_LOCK; | 114 | VFS_I(ip)->i_state = I_NEW|I_LOCK; |
@@ -123,6 +116,71 @@ xfs_inode_alloc( | |||
123 | return ip; | 116 | return ip; |
124 | } | 117 | } |
125 | 118 | ||
119 | STATIC void | ||
120 | xfs_inode_free( | ||
121 | struct xfs_inode *ip) | ||
122 | { | ||
123 | switch (ip->i_d.di_mode & S_IFMT) { | ||
124 | case S_IFREG: | ||
125 | case S_IFDIR: | ||
126 | case S_IFLNK: | ||
127 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | ||
128 | break; | ||
129 | } | ||
130 | |||
131 | if (ip->i_afp) | ||
132 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | ||
133 | |||
134 | #ifdef XFS_INODE_TRACE | ||
135 | ktrace_free(ip->i_trace); | ||
136 | #endif | ||
137 | #ifdef XFS_BMAP_TRACE | ||
138 | ktrace_free(ip->i_xtrace); | ||
139 | #endif | ||
140 | #ifdef XFS_BTREE_TRACE | ||
141 | ktrace_free(ip->i_btrace); | ||
142 | #endif | ||
143 | #ifdef XFS_RW_TRACE | ||
144 | ktrace_free(ip->i_rwtrace); | ||
145 | #endif | ||
146 | #ifdef XFS_ILOCK_TRACE | ||
147 | ktrace_free(ip->i_lock_trace); | ||
148 | #endif | ||
149 | #ifdef XFS_DIR2_TRACE | ||
150 | ktrace_free(ip->i_dir_trace); | ||
151 | #endif | ||
152 | |||
153 | if (ip->i_itemp) { | ||
154 | /* | ||
155 | * Only if we are shutting down the fs will we see an | ||
156 | * inode still in the AIL. If it is there, we should remove | ||
157 | * it to prevent a use-after-free from occurring. | ||
158 | */ | ||
159 | xfs_log_item_t *lip = &ip->i_itemp->ili_item; | ||
160 | struct xfs_ail *ailp = lip->li_ailp; | ||
161 | |||
162 | ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || | ||
163 | XFS_FORCED_SHUTDOWN(ip->i_mount)); | ||
164 | if (lip->li_flags & XFS_LI_IN_AIL) { | ||
165 | spin_lock(&ailp->xa_lock); | ||
166 | if (lip->li_flags & XFS_LI_IN_AIL) | ||
167 | xfs_trans_ail_delete(ailp, lip); | ||
168 | else | ||
169 | spin_unlock(&ailp->xa_lock); | ||
170 | } | ||
171 | xfs_inode_item_destroy(ip); | ||
172 | ip->i_itemp = NULL; | ||
173 | } | ||
174 | |||
175 | /* asserts to verify all state is correct here */ | ||
176 | ASSERT(atomic_read(&ip->i_iocount) == 0); | ||
177 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
178 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
179 | ASSERT(completion_done(&ip->i_flush)); | ||
180 | |||
181 | kmem_zone_free(xfs_inode_zone, ip); | ||
182 | } | ||
183 | |||
126 | /* | 184 | /* |
127 | * Check the validity of the inode we just found it the cache | 185 | * Check the validity of the inode we just found it the cache |
128 | */ | 186 | */ |
@@ -133,80 +191,82 @@ xfs_iget_cache_hit( | |||
133 | int flags, | 191 | int flags, |
134 | int lock_flags) __releases(pag->pag_ici_lock) | 192 | int lock_flags) __releases(pag->pag_ici_lock) |
135 | { | 193 | { |
194 | struct inode *inode = VFS_I(ip); | ||
136 | struct xfs_mount *mp = ip->i_mount; | 195 | struct xfs_mount *mp = ip->i_mount; |
137 | int error = EAGAIN; | 196 | int error; |
197 | |||
198 | spin_lock(&ip->i_flags_lock); | ||
138 | 199 | ||
139 | /* | 200 | /* |
140 | * If INEW is set this inode is being set up | 201 | * If we are racing with another cache hit that is currently |
141 | * If IRECLAIM is set this inode is being torn down | 202 | * instantiating this inode or currently recycling it out of |
142 | * Pause and try again. | 203 | * reclaimabe state, wait for the initialisation to complete |
204 | * before continuing. | ||
205 | * | ||
206 | * XXX(hch): eventually we should do something equivalent to | ||
207 | * wait_on_inode to wait for these flags to be cleared | ||
208 | * instead of polling for it. | ||
143 | */ | 209 | */ |
144 | if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { | 210 | if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { |
145 | XFS_STATS_INC(xs_ig_frecycle); | 211 | XFS_STATS_INC(xs_ig_frecycle); |
212 | error = EAGAIN; | ||
146 | goto out_error; | 213 | goto out_error; |
147 | } | 214 | } |
148 | 215 | ||
149 | /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ | 216 | /* |
150 | if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { | 217 | * If lookup is racing with unlink return an error immediately. |
151 | 218 | */ | |
152 | /* | 219 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { |
153 | * If lookup is racing with unlink, then we should return an | 220 | error = ENOENT; |
154 | * error immediately so we don't remove it from the reclaim | 221 | goto out_error; |
155 | * list and potentially leak the inode. | 222 | } |
156 | */ | ||
157 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | ||
158 | error = ENOENT; | ||
159 | goto out_error; | ||
160 | } | ||
161 | 223 | ||
224 | /* | ||
225 | * If IRECLAIMABLE is set, we've torn down the VFS inode already. | ||
226 | * Need to carefully get it back into useable state. | ||
227 | */ | ||
228 | if (ip->i_flags & XFS_IRECLAIMABLE) { | ||
162 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); | 229 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); |
163 | 230 | ||
164 | /* | 231 | /* |
165 | * We need to re-initialise the VFS inode as it has been | 232 | * We need to set XFS_INEW atomically with clearing the |
166 | * 'freed' by the VFS. Do this here so we can deal with | 233 | * reclaimable tag so that we do have an indicator of the |
167 | * errors cleanly, then tag it so it can be set up correctly | 234 | * inode still being initialized. |
168 | * later. | ||
169 | */ | 235 | */ |
170 | if (!inode_init_always(mp->m_super, VFS_I(ip))) { | 236 | ip->i_flags |= XFS_INEW; |
171 | error = ENOMEM; | 237 | ip->i_flags &= ~XFS_IRECLAIMABLE; |
172 | goto out_error; | 238 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); |
173 | } | ||
174 | 239 | ||
175 | /* | 240 | spin_unlock(&ip->i_flags_lock); |
176 | * We must set the XFS_INEW flag before clearing the | 241 | read_unlock(&pag->pag_ici_lock); |
177 | * XFS_IRECLAIMABLE flag so that if a racing lookup does | ||
178 | * not find the XFS_IRECLAIMABLE above but has the igrab() | ||
179 | * below succeed we can safely check XFS_INEW to detect | ||
180 | * that this inode is still being initialised. | ||
181 | */ | ||
182 | xfs_iflags_set(ip, XFS_INEW); | ||
183 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); | ||
184 | 242 | ||
185 | /* clear the radix tree reclaim flag as well. */ | 243 | error = -inode_init_always(mp->m_super, inode); |
186 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 244 | if (error) { |
187 | } else if (!igrab(VFS_I(ip))) { | 245 | /* |
246 | * Re-initializing the inode failed, and we are in deep | ||
247 | * trouble. Try to re-add it to the reclaim list. | ||
248 | */ | ||
249 | read_lock(&pag->pag_ici_lock); | ||
250 | spin_lock(&ip->i_flags_lock); | ||
251 | |||
252 | ip->i_flags &= ~XFS_INEW; | ||
253 | ip->i_flags |= XFS_IRECLAIMABLE; | ||
254 | __xfs_inode_set_reclaim_tag(pag, ip); | ||
255 | goto out_error; | ||
256 | } | ||
257 | inode->i_state = I_LOCK|I_NEW; | ||
258 | } else { | ||
188 | /* If the VFS inode is being torn down, pause and try again. */ | 259 | /* If the VFS inode is being torn down, pause and try again. */ |
189 | XFS_STATS_INC(xs_ig_frecycle); | 260 | if (!igrab(inode)) { |
190 | goto out_error; | 261 | error = EAGAIN; |
191 | } else if (xfs_iflags_test(ip, XFS_INEW)) { | 262 | goto out_error; |
192 | /* | 263 | } |
193 | * We are racing with another cache hit that is | ||
194 | * currently recycling this inode out of the XFS_IRECLAIMABLE | ||
195 | * state. Wait for the initialisation to complete before | ||
196 | * continuing. | ||
197 | */ | ||
198 | wait_on_inode(VFS_I(ip)); | ||
199 | } | ||
200 | 264 | ||
201 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { | 265 | /* We've got a live one. */ |
202 | error = ENOENT; | 266 | spin_unlock(&ip->i_flags_lock); |
203 | iput(VFS_I(ip)); | 267 | read_unlock(&pag->pag_ici_lock); |
204 | goto out_error; | ||
205 | } | 268 | } |
206 | 269 | ||
207 | /* We've got a live one. */ | ||
208 | read_unlock(&pag->pag_ici_lock); | ||
209 | |||
210 | if (lock_flags != 0) | 270 | if (lock_flags != 0) |
211 | xfs_ilock(ip, lock_flags); | 271 | xfs_ilock(ip, lock_flags); |
212 | 272 | ||
@@ -216,6 +276,7 @@ xfs_iget_cache_hit( | |||
216 | return 0; | 276 | return 0; |
217 | 277 | ||
218 | out_error: | 278 | out_error: |
279 | spin_unlock(&ip->i_flags_lock); | ||
219 | read_unlock(&pag->pag_ici_lock); | 280 | read_unlock(&pag->pag_ici_lock); |
220 | return error; | 281 | return error; |
221 | } | 282 | } |
@@ -299,7 +360,8 @@ out_preload_end: | |||
299 | if (lock_flags) | 360 | if (lock_flags) |
300 | xfs_iunlock(ip, lock_flags); | 361 | xfs_iunlock(ip, lock_flags); |
301 | out_destroy: | 362 | out_destroy: |
302 | xfs_destroy_inode(ip); | 363 | __destroy_inode(VFS_I(ip)); |
364 | xfs_inode_free(ip); | ||
303 | return error; | 365 | return error; |
304 | } | 366 | } |
305 | 367 | ||
@@ -504,62 +566,7 @@ xfs_ireclaim( | |||
504 | xfs_qm_dqdetach(ip); | 566 | xfs_qm_dqdetach(ip); |
505 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 567 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
506 | 568 | ||
507 | switch (ip->i_d.di_mode & S_IFMT) { | 569 | xfs_inode_free(ip); |
508 | case S_IFREG: | ||
509 | case S_IFDIR: | ||
510 | case S_IFLNK: | ||
511 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | ||
512 | break; | ||
513 | } | ||
514 | |||
515 | if (ip->i_afp) | ||
516 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | ||
517 | |||
518 | #ifdef XFS_INODE_TRACE | ||
519 | ktrace_free(ip->i_trace); | ||
520 | #endif | ||
521 | #ifdef XFS_BMAP_TRACE | ||
522 | ktrace_free(ip->i_xtrace); | ||
523 | #endif | ||
524 | #ifdef XFS_BTREE_TRACE | ||
525 | ktrace_free(ip->i_btrace); | ||
526 | #endif | ||
527 | #ifdef XFS_RW_TRACE | ||
528 | ktrace_free(ip->i_rwtrace); | ||
529 | #endif | ||
530 | #ifdef XFS_ILOCK_TRACE | ||
531 | ktrace_free(ip->i_lock_trace); | ||
532 | #endif | ||
533 | #ifdef XFS_DIR2_TRACE | ||
534 | ktrace_free(ip->i_dir_trace); | ||
535 | #endif | ||
536 | if (ip->i_itemp) { | ||
537 | /* | ||
538 | * Only if we are shutting down the fs will we see an | ||
539 | * inode still in the AIL. If it is there, we should remove | ||
540 | * it to prevent a use-after-free from occurring. | ||
541 | */ | ||
542 | xfs_log_item_t *lip = &ip->i_itemp->ili_item; | ||
543 | struct xfs_ail *ailp = lip->li_ailp; | ||
544 | |||
545 | ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || | ||
546 | XFS_FORCED_SHUTDOWN(ip->i_mount)); | ||
547 | if (lip->li_flags & XFS_LI_IN_AIL) { | ||
548 | spin_lock(&ailp->xa_lock); | ||
549 | if (lip->li_flags & XFS_LI_IN_AIL) | ||
550 | xfs_trans_ail_delete(ailp, lip); | ||
551 | else | ||
552 | spin_unlock(&ailp->xa_lock); | ||
553 | } | ||
554 | xfs_inode_item_destroy(ip); | ||
555 | ip->i_itemp = NULL; | ||
556 | } | ||
557 | /* asserts to verify all state is correct here */ | ||
558 | ASSERT(atomic_read(&ip->i_iocount) == 0); | ||
559 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
560 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
561 | ASSERT(completion_done(&ip->i_flush)); | ||
562 | kmem_zone_free(xfs_inode_zone, ip); | ||
563 | } | 570 | } |
564 | 571 | ||
565 | /* | 572 | /* |