aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_iget.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_iget.c')
-rw-r--r--fs/xfs/xfs_iget.c280
1 files changed, 130 insertions, 150 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 5fcec6f020a7..80e526489be5 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -64,6 +64,10 @@ xfs_inode_alloc(
64 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); 64 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
65 if (!ip) 65 if (!ip)
66 return NULL; 66 return NULL;
67 if (inode_init_always(mp->m_super, VFS_I(ip))) {
68 kmem_zone_free(xfs_inode_zone, ip);
69 return NULL;
70 }
67 71
68 ASSERT(atomic_read(&ip->i_iocount) == 0); 72 ASSERT(atomic_read(&ip->i_iocount) == 0);
69 ASSERT(atomic_read(&ip->i_pincount) == 0); 73 ASSERT(atomic_read(&ip->i_pincount) == 0);
@@ -78,7 +82,6 @@ xfs_inode_alloc(
78 memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); 82 memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
79 ip->i_flags = 0; 83 ip->i_flags = 0;
80 ip->i_update_core = 0; 84 ip->i_update_core = 0;
81 ip->i_update_size = 0;
82 ip->i_delayed_blks = 0; 85 ip->i_delayed_blks = 0;
83 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 86 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
84 ip->i_size = 0; 87 ip->i_size = 0;
@@ -105,17 +108,6 @@ xfs_inode_alloc(
105#ifdef XFS_DIR2_TRACE 108#ifdef XFS_DIR2_TRACE
106 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); 109 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
107#endif 110#endif
108 /*
109 * Now initialise the VFS inode. We do this after the xfs_inode
110 * initialisation as internal failures will result in ->destroy_inode
111 * being called and that will pass down through the reclaim path and
112 * free the XFS inode. This path requires the XFS inode to already be
113 * initialised. Hence if this call fails, the xfs_inode has already
114 * been freed and we should not reference it at all in the error
115 * handling.
116 */
117 if (!inode_init_always(mp->m_super, VFS_I(ip)))
118 return NULL;
119 111
120 /* prevent anyone from using this yet */ 112 /* prevent anyone from using this yet */
121 VFS_I(ip)->i_state = I_NEW|I_LOCK; 113 VFS_I(ip)->i_state = I_NEW|I_LOCK;
@@ -123,6 +115,71 @@ xfs_inode_alloc(
123 return ip; 115 return ip;
124} 116}
125 117
118STATIC void
119xfs_inode_free(
120 struct xfs_inode *ip)
121{
122 switch (ip->i_d.di_mode & S_IFMT) {
123 case S_IFREG:
124 case S_IFDIR:
125 case S_IFLNK:
126 xfs_idestroy_fork(ip, XFS_DATA_FORK);
127 break;
128 }
129
130 if (ip->i_afp)
131 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
132
133#ifdef XFS_INODE_TRACE
134 ktrace_free(ip->i_trace);
135#endif
136#ifdef XFS_BMAP_TRACE
137 ktrace_free(ip->i_xtrace);
138#endif
139#ifdef XFS_BTREE_TRACE
140 ktrace_free(ip->i_btrace);
141#endif
142#ifdef XFS_RW_TRACE
143 ktrace_free(ip->i_rwtrace);
144#endif
145#ifdef XFS_ILOCK_TRACE
146 ktrace_free(ip->i_lock_trace);
147#endif
148#ifdef XFS_DIR2_TRACE
149 ktrace_free(ip->i_dir_trace);
150#endif
151
152 if (ip->i_itemp) {
153 /*
154 * Only if we are shutting down the fs will we see an
155 * inode still in the AIL. If it is there, we should remove
156 * it to prevent a use-after-free from occurring.
157 */
158 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
159 struct xfs_ail *ailp = lip->li_ailp;
160
161 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
162 XFS_FORCED_SHUTDOWN(ip->i_mount));
163 if (lip->li_flags & XFS_LI_IN_AIL) {
164 spin_lock(&ailp->xa_lock);
165 if (lip->li_flags & XFS_LI_IN_AIL)
166 xfs_trans_ail_delete(ailp, lip);
167 else
168 spin_unlock(&ailp->xa_lock);
169 }
170 xfs_inode_item_destroy(ip);
171 ip->i_itemp = NULL;
172 }
173
174 /* asserts to verify all state is correct here */
175 ASSERT(atomic_read(&ip->i_iocount) == 0);
176 ASSERT(atomic_read(&ip->i_pincount) == 0);
177 ASSERT(!spin_is_locked(&ip->i_flags_lock));
178 ASSERT(completion_done(&ip->i_flush));
179
180 kmem_zone_free(xfs_inode_zone, ip);
181}
182
126/* 183/*
127 * Check the validity of the inode we just found it the cache 184 * Check the validity of the inode we just found it the cache
128 */ 185 */
@@ -133,80 +190,82 @@ xfs_iget_cache_hit(
133 int flags, 190 int flags,
134 int lock_flags) __releases(pag->pag_ici_lock) 191 int lock_flags) __releases(pag->pag_ici_lock)
135{ 192{
193 struct inode *inode = VFS_I(ip);
136 struct xfs_mount *mp = ip->i_mount; 194 struct xfs_mount *mp = ip->i_mount;
137 int error = EAGAIN; 195 int error;
196
197 spin_lock(&ip->i_flags_lock);
138 198
139 /* 199 /*
140 * If INEW is set this inode is being set up 200 * If we are racing with another cache hit that is currently
141 * If IRECLAIM is set this inode is being torn down 201 * instantiating this inode or currently recycling it out of
142 * Pause and try again. 202 * reclaimabe state, wait for the initialisation to complete
203 * before continuing.
204 *
205 * XXX(hch): eventually we should do something equivalent to
206 * wait_on_inode to wait for these flags to be cleared
207 * instead of polling for it.
143 */ 208 */
144 if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { 209 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
145 XFS_STATS_INC(xs_ig_frecycle); 210 XFS_STATS_INC(xs_ig_frecycle);
211 error = EAGAIN;
146 goto out_error; 212 goto out_error;
147 } 213 }
148 214
149 /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ 215 /*
150 if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { 216 * If lookup is racing with unlink return an error immediately.
151 217 */
152 /* 218 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
153 * If lookup is racing with unlink, then we should return an 219 error = ENOENT;
154 * error immediately so we don't remove it from the reclaim 220 goto out_error;
155 * list and potentially leak the inode. 221 }
156 */
157 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
158 error = ENOENT;
159 goto out_error;
160 }
161 222
223 /*
224 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
225 * Need to carefully get it back into useable state.
226 */
227 if (ip->i_flags & XFS_IRECLAIMABLE) {
162 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 228 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
163 229
164 /* 230 /*
165 * We need to re-initialise the VFS inode as it has been 231 * We need to set XFS_INEW atomically with clearing the
166 * 'freed' by the VFS. Do this here so we can deal with 232 * reclaimable tag so that we do have an indicator of the
167 * errors cleanly, then tag it so it can be set up correctly 233 * inode still being initialized.
168 * later.
169 */ 234 */
170 if (!inode_init_always(mp->m_super, VFS_I(ip))) { 235 ip->i_flags |= XFS_INEW;
171 error = ENOMEM; 236 ip->i_flags &= ~XFS_IRECLAIMABLE;
172 goto out_error; 237 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
173 }
174 238
175 /* 239 spin_unlock(&ip->i_flags_lock);
176 * We must set the XFS_INEW flag before clearing the 240 read_unlock(&pag->pag_ici_lock);
177 * XFS_IRECLAIMABLE flag so that if a racing lookup does
178 * not find the XFS_IRECLAIMABLE above but has the igrab()
179 * below succeed we can safely check XFS_INEW to detect
180 * that this inode is still being initialised.
181 */
182 xfs_iflags_set(ip, XFS_INEW);
183 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
184 241
185 /* clear the radix tree reclaim flag as well. */ 242 error = -inode_init_always(mp->m_super, inode);
186 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 243 if (error) {
187 } else if (!igrab(VFS_I(ip))) { 244 /*
245 * Re-initializing the inode failed, and we are in deep
246 * trouble. Try to re-add it to the reclaim list.
247 */
248 read_lock(&pag->pag_ici_lock);
249 spin_lock(&ip->i_flags_lock);
250
251 ip->i_flags &= ~XFS_INEW;
252 ip->i_flags |= XFS_IRECLAIMABLE;
253 __xfs_inode_set_reclaim_tag(pag, ip);
254 goto out_error;
255 }
256 inode->i_state = I_LOCK|I_NEW;
257 } else {
188 /* If the VFS inode is being torn down, pause and try again. */ 258 /* If the VFS inode is being torn down, pause and try again. */
189 XFS_STATS_INC(xs_ig_frecycle); 259 if (!igrab(inode)) {
190 goto out_error; 260 error = EAGAIN;
191 } else if (xfs_iflags_test(ip, XFS_INEW)) { 261 goto out_error;
192 /* 262 }
193 * We are racing with another cache hit that is
194 * currently recycling this inode out of the XFS_IRECLAIMABLE
195 * state. Wait for the initialisation to complete before
196 * continuing.
197 */
198 wait_on_inode(VFS_I(ip));
199 }
200 263
201 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { 264 /* We've got a live one. */
202 error = ENOENT; 265 spin_unlock(&ip->i_flags_lock);
203 iput(VFS_I(ip)); 266 read_unlock(&pag->pag_ici_lock);
204 goto out_error;
205 } 267 }
206 268
207 /* We've got a live one. */
208 read_unlock(&pag->pag_ici_lock);
209
210 if (lock_flags != 0) 269 if (lock_flags != 0)
211 xfs_ilock(ip, lock_flags); 270 xfs_ilock(ip, lock_flags);
212 271
@@ -216,6 +275,7 @@ xfs_iget_cache_hit(
216 return 0; 275 return 0;
217 276
218out_error: 277out_error:
278 spin_unlock(&ip->i_flags_lock);
219 read_unlock(&pag->pag_ici_lock); 279 read_unlock(&pag->pag_ici_lock);
220 return error; 280 return error;
221} 281}
@@ -299,7 +359,8 @@ out_preload_end:
299 if (lock_flags) 359 if (lock_flags)
300 xfs_iunlock(ip, lock_flags); 360 xfs_iunlock(ip, lock_flags);
301out_destroy: 361out_destroy:
302 xfs_destroy_inode(ip); 362 __destroy_inode(VFS_I(ip));
363 xfs_inode_free(ip);
303 return error; 364 return error;
304} 365}
305 366
@@ -394,32 +455,6 @@ out_error_or_again:
394 return error; 455 return error;
395} 456}
396 457
397
398/*
399 * Look for the inode corresponding to the given ino in the hash table.
400 * If it is there and its i_transp pointer matches tp, return it.
401 * Otherwise, return NULL.
402 */
403xfs_inode_t *
404xfs_inode_incore(xfs_mount_t *mp,
405 xfs_ino_t ino,
406 xfs_trans_t *tp)
407{
408 xfs_inode_t *ip;
409 xfs_perag_t *pag;
410
411 pag = xfs_get_perag(mp, ino);
412 read_lock(&pag->pag_ici_lock);
413 ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino));
414 read_unlock(&pag->pag_ici_lock);
415 xfs_put_perag(mp, pag);
416
417 /* the returned inode must match the transaction */
418 if (ip && (ip->i_transp != tp))
419 return NULL;
420 return ip;
421}
422
423/* 458/*
424 * Decrement reference count of an inode structure and unlock it. 459 * Decrement reference count of an inode structure and unlock it.
425 * 460 *
@@ -504,62 +539,7 @@ xfs_ireclaim(
504 xfs_qm_dqdetach(ip); 539 xfs_qm_dqdetach(ip);
505 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 540 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
506 541
507 switch (ip->i_d.di_mode & S_IFMT) { 542 xfs_inode_free(ip);
508 case S_IFREG:
509 case S_IFDIR:
510 case S_IFLNK:
511 xfs_idestroy_fork(ip, XFS_DATA_FORK);
512 break;
513 }
514
515 if (ip->i_afp)
516 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
517
518#ifdef XFS_INODE_TRACE
519 ktrace_free(ip->i_trace);
520#endif
521#ifdef XFS_BMAP_TRACE
522 ktrace_free(ip->i_xtrace);
523#endif
524#ifdef XFS_BTREE_TRACE
525 ktrace_free(ip->i_btrace);
526#endif
527#ifdef XFS_RW_TRACE
528 ktrace_free(ip->i_rwtrace);
529#endif
530#ifdef XFS_ILOCK_TRACE
531 ktrace_free(ip->i_lock_trace);
532#endif
533#ifdef XFS_DIR2_TRACE
534 ktrace_free(ip->i_dir_trace);
535#endif
536 if (ip->i_itemp) {
537 /*
538 * Only if we are shutting down the fs will we see an
539 * inode still in the AIL. If it is there, we should remove
540 * it to prevent a use-after-free from occurring.
541 */
542 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
543 struct xfs_ail *ailp = lip->li_ailp;
544
545 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
546 XFS_FORCED_SHUTDOWN(ip->i_mount));
547 if (lip->li_flags & XFS_LI_IN_AIL) {
548 spin_lock(&ailp->xa_lock);
549 if (lip->li_flags & XFS_LI_IN_AIL)
550 xfs_trans_ail_delete(ailp, lip);
551 else
552 spin_unlock(&ailp->xa_lock);
553 }
554 xfs_inode_item_destroy(ip);
555 ip->i_itemp = NULL;
556 }
557 /* asserts to verify all state is correct here */
558 ASSERT(atomic_read(&ip->i_iocount) == 0);
559 ASSERT(atomic_read(&ip->i_pincount) == 0);
560 ASSERT(!spin_is_locked(&ip->i_flags_lock));
561 ASSERT(completion_done(&ip->i_flush));
562 kmem_zone_free(xfs_inode_zone, ip);
563} 543}
564 544
565/* 545/*