diff options
Diffstat (limited to 'fs/xfs/xfs_iget.c')
-rw-r--r-- | fs/xfs/xfs_iget.c | 280 |
1 files changed, 130 insertions, 150 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 5fcec6f020a7..80e526489be5 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -64,6 +64,10 @@ xfs_inode_alloc( | |||
64 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); | 64 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); |
65 | if (!ip) | 65 | if (!ip) |
66 | return NULL; | 66 | return NULL; |
67 | if (inode_init_always(mp->m_super, VFS_I(ip))) { | ||
68 | kmem_zone_free(xfs_inode_zone, ip); | ||
69 | return NULL; | ||
70 | } | ||
67 | 71 | ||
68 | ASSERT(atomic_read(&ip->i_iocount) == 0); | 72 | ASSERT(atomic_read(&ip->i_iocount) == 0); |
69 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 73 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
@@ -78,7 +82,6 @@ xfs_inode_alloc( | |||
78 | memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); | 82 | memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); |
79 | ip->i_flags = 0; | 83 | ip->i_flags = 0; |
80 | ip->i_update_core = 0; | 84 | ip->i_update_core = 0; |
81 | ip->i_update_size = 0; | ||
82 | ip->i_delayed_blks = 0; | 85 | ip->i_delayed_blks = 0; |
83 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); | 86 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); |
84 | ip->i_size = 0; | 87 | ip->i_size = 0; |
@@ -105,17 +108,6 @@ xfs_inode_alloc( | |||
105 | #ifdef XFS_DIR2_TRACE | 108 | #ifdef XFS_DIR2_TRACE |
106 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); | 109 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); |
107 | #endif | 110 | #endif |
108 | /* | ||
109 | * Now initialise the VFS inode. We do this after the xfs_inode | ||
110 | * initialisation as internal failures will result in ->destroy_inode | ||
111 | * being called and that will pass down through the reclaim path and | ||
112 | * free the XFS inode. This path requires the XFS inode to already be | ||
113 | * initialised. Hence if this call fails, the xfs_inode has already | ||
114 | * been freed and we should not reference it at all in the error | ||
115 | * handling. | ||
116 | */ | ||
117 | if (!inode_init_always(mp->m_super, VFS_I(ip))) | ||
118 | return NULL; | ||
119 | 111 | ||
120 | /* prevent anyone from using this yet */ | 112 | /* prevent anyone from using this yet */ |
121 | VFS_I(ip)->i_state = I_NEW|I_LOCK; | 113 | VFS_I(ip)->i_state = I_NEW|I_LOCK; |
@@ -123,6 +115,71 @@ xfs_inode_alloc( | |||
123 | return ip; | 115 | return ip; |
124 | } | 116 | } |
125 | 117 | ||
118 | STATIC void | ||
119 | xfs_inode_free( | ||
120 | struct xfs_inode *ip) | ||
121 | { | ||
122 | switch (ip->i_d.di_mode & S_IFMT) { | ||
123 | case S_IFREG: | ||
124 | case S_IFDIR: | ||
125 | case S_IFLNK: | ||
126 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | ||
127 | break; | ||
128 | } | ||
129 | |||
130 | if (ip->i_afp) | ||
131 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | ||
132 | |||
133 | #ifdef XFS_INODE_TRACE | ||
134 | ktrace_free(ip->i_trace); | ||
135 | #endif | ||
136 | #ifdef XFS_BMAP_TRACE | ||
137 | ktrace_free(ip->i_xtrace); | ||
138 | #endif | ||
139 | #ifdef XFS_BTREE_TRACE | ||
140 | ktrace_free(ip->i_btrace); | ||
141 | #endif | ||
142 | #ifdef XFS_RW_TRACE | ||
143 | ktrace_free(ip->i_rwtrace); | ||
144 | #endif | ||
145 | #ifdef XFS_ILOCK_TRACE | ||
146 | ktrace_free(ip->i_lock_trace); | ||
147 | #endif | ||
148 | #ifdef XFS_DIR2_TRACE | ||
149 | ktrace_free(ip->i_dir_trace); | ||
150 | #endif | ||
151 | |||
152 | if (ip->i_itemp) { | ||
153 | /* | ||
154 | * Only if we are shutting down the fs will we see an | ||
155 | * inode still in the AIL. If it is there, we should remove | ||
156 | * it to prevent a use-after-free from occurring. | ||
157 | */ | ||
158 | xfs_log_item_t *lip = &ip->i_itemp->ili_item; | ||
159 | struct xfs_ail *ailp = lip->li_ailp; | ||
160 | |||
161 | ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || | ||
162 | XFS_FORCED_SHUTDOWN(ip->i_mount)); | ||
163 | if (lip->li_flags & XFS_LI_IN_AIL) { | ||
164 | spin_lock(&ailp->xa_lock); | ||
165 | if (lip->li_flags & XFS_LI_IN_AIL) | ||
166 | xfs_trans_ail_delete(ailp, lip); | ||
167 | else | ||
168 | spin_unlock(&ailp->xa_lock); | ||
169 | } | ||
170 | xfs_inode_item_destroy(ip); | ||
171 | ip->i_itemp = NULL; | ||
172 | } | ||
173 | |||
174 | /* asserts to verify all state is correct here */ | ||
175 | ASSERT(atomic_read(&ip->i_iocount) == 0); | ||
176 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
177 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
178 | ASSERT(completion_done(&ip->i_flush)); | ||
179 | |||
180 | kmem_zone_free(xfs_inode_zone, ip); | ||
181 | } | ||
182 | |||
126 | /* | 183 | /* |
127 | * Check the validity of the inode we just found it the cache | 184 | * Check the validity of the inode we just found it the cache |
128 | */ | 185 | */ |
@@ -133,80 +190,82 @@ xfs_iget_cache_hit( | |||
133 | int flags, | 190 | int flags, |
134 | int lock_flags) __releases(pag->pag_ici_lock) | 191 | int lock_flags) __releases(pag->pag_ici_lock) |
135 | { | 192 | { |
193 | struct inode *inode = VFS_I(ip); | ||
136 | struct xfs_mount *mp = ip->i_mount; | 194 | struct xfs_mount *mp = ip->i_mount; |
137 | int error = EAGAIN; | 195 | int error; |
196 | |||
197 | spin_lock(&ip->i_flags_lock); | ||
138 | 198 | ||
139 | /* | 199 | /* |
140 | * If INEW is set this inode is being set up | 200 | * If we are racing with another cache hit that is currently |
141 | * If IRECLAIM is set this inode is being torn down | 201 | * instantiating this inode or currently recycling it out of |
142 | * Pause and try again. | 202 | * reclaimabe state, wait for the initialisation to complete |
203 | * before continuing. | ||
204 | * | ||
205 | * XXX(hch): eventually we should do something equivalent to | ||
206 | * wait_on_inode to wait for these flags to be cleared | ||
207 | * instead of polling for it. | ||
143 | */ | 208 | */ |
144 | if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { | 209 | if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { |
145 | XFS_STATS_INC(xs_ig_frecycle); | 210 | XFS_STATS_INC(xs_ig_frecycle); |
211 | error = EAGAIN; | ||
146 | goto out_error; | 212 | goto out_error; |
147 | } | 213 | } |
148 | 214 | ||
149 | /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ | 215 | /* |
150 | if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { | 216 | * If lookup is racing with unlink return an error immediately. |
151 | 217 | */ | |
152 | /* | 218 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { |
153 | * If lookup is racing with unlink, then we should return an | 219 | error = ENOENT; |
154 | * error immediately so we don't remove it from the reclaim | 220 | goto out_error; |
155 | * list and potentially leak the inode. | 221 | } |
156 | */ | ||
157 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | ||
158 | error = ENOENT; | ||
159 | goto out_error; | ||
160 | } | ||
161 | 222 | ||
223 | /* | ||
224 | * If IRECLAIMABLE is set, we've torn down the VFS inode already. | ||
225 | * Need to carefully get it back into useable state. | ||
226 | */ | ||
227 | if (ip->i_flags & XFS_IRECLAIMABLE) { | ||
162 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); | 228 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); |
163 | 229 | ||
164 | /* | 230 | /* |
165 | * We need to re-initialise the VFS inode as it has been | 231 | * We need to set XFS_INEW atomically with clearing the |
166 | * 'freed' by the VFS. Do this here so we can deal with | 232 | * reclaimable tag so that we do have an indicator of the |
167 | * errors cleanly, then tag it so it can be set up correctly | 233 | * inode still being initialized. |
168 | * later. | ||
169 | */ | 234 | */ |
170 | if (!inode_init_always(mp->m_super, VFS_I(ip))) { | 235 | ip->i_flags |= XFS_INEW; |
171 | error = ENOMEM; | 236 | ip->i_flags &= ~XFS_IRECLAIMABLE; |
172 | goto out_error; | 237 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); |
173 | } | ||
174 | 238 | ||
175 | /* | 239 | spin_unlock(&ip->i_flags_lock); |
176 | * We must set the XFS_INEW flag before clearing the | 240 | read_unlock(&pag->pag_ici_lock); |
177 | * XFS_IRECLAIMABLE flag so that if a racing lookup does | ||
178 | * not find the XFS_IRECLAIMABLE above but has the igrab() | ||
179 | * below succeed we can safely check XFS_INEW to detect | ||
180 | * that this inode is still being initialised. | ||
181 | */ | ||
182 | xfs_iflags_set(ip, XFS_INEW); | ||
183 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); | ||
184 | 241 | ||
185 | /* clear the radix tree reclaim flag as well. */ | 242 | error = -inode_init_always(mp->m_super, inode); |
186 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 243 | if (error) { |
187 | } else if (!igrab(VFS_I(ip))) { | 244 | /* |
245 | * Re-initializing the inode failed, and we are in deep | ||
246 | * trouble. Try to re-add it to the reclaim list. | ||
247 | */ | ||
248 | read_lock(&pag->pag_ici_lock); | ||
249 | spin_lock(&ip->i_flags_lock); | ||
250 | |||
251 | ip->i_flags &= ~XFS_INEW; | ||
252 | ip->i_flags |= XFS_IRECLAIMABLE; | ||
253 | __xfs_inode_set_reclaim_tag(pag, ip); | ||
254 | goto out_error; | ||
255 | } | ||
256 | inode->i_state = I_LOCK|I_NEW; | ||
257 | } else { | ||
188 | /* If the VFS inode is being torn down, pause and try again. */ | 258 | /* If the VFS inode is being torn down, pause and try again. */ |
189 | XFS_STATS_INC(xs_ig_frecycle); | 259 | if (!igrab(inode)) { |
190 | goto out_error; | 260 | error = EAGAIN; |
191 | } else if (xfs_iflags_test(ip, XFS_INEW)) { | 261 | goto out_error; |
192 | /* | 262 | } |
193 | * We are racing with another cache hit that is | ||
194 | * currently recycling this inode out of the XFS_IRECLAIMABLE | ||
195 | * state. Wait for the initialisation to complete before | ||
196 | * continuing. | ||
197 | */ | ||
198 | wait_on_inode(VFS_I(ip)); | ||
199 | } | ||
200 | 263 | ||
201 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { | 264 | /* We've got a live one. */ |
202 | error = ENOENT; | 265 | spin_unlock(&ip->i_flags_lock); |
203 | iput(VFS_I(ip)); | 266 | read_unlock(&pag->pag_ici_lock); |
204 | goto out_error; | ||
205 | } | 267 | } |
206 | 268 | ||
207 | /* We've got a live one. */ | ||
208 | read_unlock(&pag->pag_ici_lock); | ||
209 | |||
210 | if (lock_flags != 0) | 269 | if (lock_flags != 0) |
211 | xfs_ilock(ip, lock_flags); | 270 | xfs_ilock(ip, lock_flags); |
212 | 271 | ||
@@ -216,6 +275,7 @@ xfs_iget_cache_hit( | |||
216 | return 0; | 275 | return 0; |
217 | 276 | ||
218 | out_error: | 277 | out_error: |
278 | spin_unlock(&ip->i_flags_lock); | ||
219 | read_unlock(&pag->pag_ici_lock); | 279 | read_unlock(&pag->pag_ici_lock); |
220 | return error; | 280 | return error; |
221 | } | 281 | } |
@@ -299,7 +359,8 @@ out_preload_end: | |||
299 | if (lock_flags) | 359 | if (lock_flags) |
300 | xfs_iunlock(ip, lock_flags); | 360 | xfs_iunlock(ip, lock_flags); |
301 | out_destroy: | 361 | out_destroy: |
302 | xfs_destroy_inode(ip); | 362 | __destroy_inode(VFS_I(ip)); |
363 | xfs_inode_free(ip); | ||
303 | return error; | 364 | return error; |
304 | } | 365 | } |
305 | 366 | ||
@@ -394,32 +455,6 @@ out_error_or_again: | |||
394 | return error; | 455 | return error; |
395 | } | 456 | } |
396 | 457 | ||
397 | |||
398 | /* | ||
399 | * Look for the inode corresponding to the given ino in the hash table. | ||
400 | * If it is there and its i_transp pointer matches tp, return it. | ||
401 | * Otherwise, return NULL. | ||
402 | */ | ||
403 | xfs_inode_t * | ||
404 | xfs_inode_incore(xfs_mount_t *mp, | ||
405 | xfs_ino_t ino, | ||
406 | xfs_trans_t *tp) | ||
407 | { | ||
408 | xfs_inode_t *ip; | ||
409 | xfs_perag_t *pag; | ||
410 | |||
411 | pag = xfs_get_perag(mp, ino); | ||
412 | read_lock(&pag->pag_ici_lock); | ||
413 | ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino)); | ||
414 | read_unlock(&pag->pag_ici_lock); | ||
415 | xfs_put_perag(mp, pag); | ||
416 | |||
417 | /* the returned inode must match the transaction */ | ||
418 | if (ip && (ip->i_transp != tp)) | ||
419 | return NULL; | ||
420 | return ip; | ||
421 | } | ||
422 | |||
423 | /* | 458 | /* |
424 | * Decrement reference count of an inode structure and unlock it. | 459 | * Decrement reference count of an inode structure and unlock it. |
425 | * | 460 | * |
@@ -504,62 +539,7 @@ xfs_ireclaim( | |||
504 | xfs_qm_dqdetach(ip); | 539 | xfs_qm_dqdetach(ip); |
505 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 540 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
506 | 541 | ||
507 | switch (ip->i_d.di_mode & S_IFMT) { | 542 | xfs_inode_free(ip); |
508 | case S_IFREG: | ||
509 | case S_IFDIR: | ||
510 | case S_IFLNK: | ||
511 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | ||
512 | break; | ||
513 | } | ||
514 | |||
515 | if (ip->i_afp) | ||
516 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | ||
517 | |||
518 | #ifdef XFS_INODE_TRACE | ||
519 | ktrace_free(ip->i_trace); | ||
520 | #endif | ||
521 | #ifdef XFS_BMAP_TRACE | ||
522 | ktrace_free(ip->i_xtrace); | ||
523 | #endif | ||
524 | #ifdef XFS_BTREE_TRACE | ||
525 | ktrace_free(ip->i_btrace); | ||
526 | #endif | ||
527 | #ifdef XFS_RW_TRACE | ||
528 | ktrace_free(ip->i_rwtrace); | ||
529 | #endif | ||
530 | #ifdef XFS_ILOCK_TRACE | ||
531 | ktrace_free(ip->i_lock_trace); | ||
532 | #endif | ||
533 | #ifdef XFS_DIR2_TRACE | ||
534 | ktrace_free(ip->i_dir_trace); | ||
535 | #endif | ||
536 | if (ip->i_itemp) { | ||
537 | /* | ||
538 | * Only if we are shutting down the fs will we see an | ||
539 | * inode still in the AIL. If it is there, we should remove | ||
540 | * it to prevent a use-after-free from occurring. | ||
541 | */ | ||
542 | xfs_log_item_t *lip = &ip->i_itemp->ili_item; | ||
543 | struct xfs_ail *ailp = lip->li_ailp; | ||
544 | |||
545 | ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || | ||
546 | XFS_FORCED_SHUTDOWN(ip->i_mount)); | ||
547 | if (lip->li_flags & XFS_LI_IN_AIL) { | ||
548 | spin_lock(&ailp->xa_lock); | ||
549 | if (lip->li_flags & XFS_LI_IN_AIL) | ||
550 | xfs_trans_ail_delete(ailp, lip); | ||
551 | else | ||
552 | spin_unlock(&ailp->xa_lock); | ||
553 | } | ||
554 | xfs_inode_item_destroy(ip); | ||
555 | ip->i_itemp = NULL; | ||
556 | } | ||
557 | /* asserts to verify all state is correct here */ | ||
558 | ASSERT(atomic_read(&ip->i_iocount) == 0); | ||
559 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
560 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
561 | ASSERT(completion_done(&ip->i_flush)); | ||
562 | kmem_zone_free(xfs_inode_zone, ip); | ||
563 | } | 543 | } |
564 | 544 | ||
565 | /* | 545 | /* |