aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_iget.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_iget.c')
-rw-r--r--fs/xfs/xfs_iget.c253
1 files changed, 130 insertions, 123 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 5fcec6f020a7..ecbf8b4d2e2e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -64,6 +64,10 @@ xfs_inode_alloc(
64 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); 64 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
65 if (!ip) 65 if (!ip)
66 return NULL; 66 return NULL;
67 if (inode_init_always(mp->m_super, VFS_I(ip))) {
68 kmem_zone_free(xfs_inode_zone, ip);
69 return NULL;
70 }
67 71
68 ASSERT(atomic_read(&ip->i_iocount) == 0); 72 ASSERT(atomic_read(&ip->i_iocount) == 0);
69 ASSERT(atomic_read(&ip->i_pincount) == 0); 73 ASSERT(atomic_read(&ip->i_pincount) == 0);
@@ -105,17 +109,6 @@ xfs_inode_alloc(
105#ifdef XFS_DIR2_TRACE 109#ifdef XFS_DIR2_TRACE
106 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); 110 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
107#endif 111#endif
108 /*
109 * Now initialise the VFS inode. We do this after the xfs_inode
110 * initialisation as internal failures will result in ->destroy_inode
111 * being called and that will pass down through the reclaim path and
112 * free the XFS inode. This path requires the XFS inode to already be
113 * initialised. Hence if this call fails, the xfs_inode has already
114 * been freed and we should not reference it at all in the error
115 * handling.
116 */
117 if (!inode_init_always(mp->m_super, VFS_I(ip)))
118 return NULL;
119 112
120 /* prevent anyone from using this yet */ 113 /* prevent anyone from using this yet */
121 VFS_I(ip)->i_state = I_NEW|I_LOCK; 114 VFS_I(ip)->i_state = I_NEW|I_LOCK;
@@ -123,6 +116,71 @@ xfs_inode_alloc(
123 return ip; 116 return ip;
124} 117}
125 118
119STATIC void
120xfs_inode_free(
121 struct xfs_inode *ip)
122{
123 switch (ip->i_d.di_mode & S_IFMT) {
124 case S_IFREG:
125 case S_IFDIR:
126 case S_IFLNK:
127 xfs_idestroy_fork(ip, XFS_DATA_FORK);
128 break;
129 }
130
131 if (ip->i_afp)
132 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
133
134#ifdef XFS_INODE_TRACE
135 ktrace_free(ip->i_trace);
136#endif
137#ifdef XFS_BMAP_TRACE
138 ktrace_free(ip->i_xtrace);
139#endif
140#ifdef XFS_BTREE_TRACE
141 ktrace_free(ip->i_btrace);
142#endif
143#ifdef XFS_RW_TRACE
144 ktrace_free(ip->i_rwtrace);
145#endif
146#ifdef XFS_ILOCK_TRACE
147 ktrace_free(ip->i_lock_trace);
148#endif
149#ifdef XFS_DIR2_TRACE
150 ktrace_free(ip->i_dir_trace);
151#endif
152
153 if (ip->i_itemp) {
154 /*
155 * Only if we are shutting down the fs will we see an
156 * inode still in the AIL. If it is there, we should remove
157 * it to prevent a use-after-free from occurring.
158 */
159 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
160 struct xfs_ail *ailp = lip->li_ailp;
161
162 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
163 XFS_FORCED_SHUTDOWN(ip->i_mount));
164 if (lip->li_flags & XFS_LI_IN_AIL) {
165 spin_lock(&ailp->xa_lock);
166 if (lip->li_flags & XFS_LI_IN_AIL)
167 xfs_trans_ail_delete(ailp, lip);
168 else
169 spin_unlock(&ailp->xa_lock);
170 }
171 xfs_inode_item_destroy(ip);
172 ip->i_itemp = NULL;
173 }
174
175 /* asserts to verify all state is correct here */
176 ASSERT(atomic_read(&ip->i_iocount) == 0);
177 ASSERT(atomic_read(&ip->i_pincount) == 0);
178 ASSERT(!spin_is_locked(&ip->i_flags_lock));
179 ASSERT(completion_done(&ip->i_flush));
180
181 kmem_zone_free(xfs_inode_zone, ip);
182}
183
126/* 184/*
127 * Check the validity of the inode we just found it the cache 185 * Check the validity of the inode we just found it the cache
128 */ 186 */
@@ -133,80 +191,82 @@ xfs_iget_cache_hit(
133 int flags, 191 int flags,
134 int lock_flags) __releases(pag->pag_ici_lock) 192 int lock_flags) __releases(pag->pag_ici_lock)
135{ 193{
194 struct inode *inode = VFS_I(ip);
136 struct xfs_mount *mp = ip->i_mount; 195 struct xfs_mount *mp = ip->i_mount;
137 int error = EAGAIN; 196 int error;
197
198 spin_lock(&ip->i_flags_lock);
138 199
139 /* 200 /*
140 * If INEW is set this inode is being set up 201 * If we are racing with another cache hit that is currently
141 * If IRECLAIM is set this inode is being torn down 202 * instantiating this inode or currently recycling it out of
142 * Pause and try again. 203 * reclaimabe state, wait for the initialisation to complete
204 * before continuing.
205 *
206 * XXX(hch): eventually we should do something equivalent to
207 * wait_on_inode to wait for these flags to be cleared
208 * instead of polling for it.
143 */ 209 */
144 if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { 210 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
145 XFS_STATS_INC(xs_ig_frecycle); 211 XFS_STATS_INC(xs_ig_frecycle);
212 error = EAGAIN;
146 goto out_error; 213 goto out_error;
147 } 214 }
148 215
149 /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ 216 /*
150 if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { 217 * If lookup is racing with unlink return an error immediately.
151 218 */
152 /* 219 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
153 * If lookup is racing with unlink, then we should return an 220 error = ENOENT;
154 * error immediately so we don't remove it from the reclaim 221 goto out_error;
155 * list and potentially leak the inode. 222 }
156 */
157 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
158 error = ENOENT;
159 goto out_error;
160 }
161 223
224 /*
225 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
226 * Need to carefully get it back into useable state.
227 */
228 if (ip->i_flags & XFS_IRECLAIMABLE) {
162 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 229 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
163 230
164 /* 231 /*
165 * We need to re-initialise the VFS inode as it has been 232 * We need to set XFS_INEW atomically with clearing the
166 * 'freed' by the VFS. Do this here so we can deal with 233 * reclaimable tag so that we do have an indicator of the
167 * errors cleanly, then tag it so it can be set up correctly 234 * inode still being initialized.
168 * later.
169 */ 235 */
170 if (!inode_init_always(mp->m_super, VFS_I(ip))) { 236 ip->i_flags |= XFS_INEW;
171 error = ENOMEM; 237 ip->i_flags &= ~XFS_IRECLAIMABLE;
172 goto out_error; 238 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
173 }
174 239
175 /* 240 spin_unlock(&ip->i_flags_lock);
176 * We must set the XFS_INEW flag before clearing the 241 read_unlock(&pag->pag_ici_lock);
177 * XFS_IRECLAIMABLE flag so that if a racing lookup does
178 * not find the XFS_IRECLAIMABLE above but has the igrab()
179 * below succeed we can safely check XFS_INEW to detect
180 * that this inode is still being initialised.
181 */
182 xfs_iflags_set(ip, XFS_INEW);
183 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
184 242
185 /* clear the radix tree reclaim flag as well. */ 243 error = -inode_init_always(mp->m_super, inode);
186 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 244 if (error) {
187 } else if (!igrab(VFS_I(ip))) { 245 /*
246 * Re-initializing the inode failed, and we are in deep
247 * trouble. Try to re-add it to the reclaim list.
248 */
249 read_lock(&pag->pag_ici_lock);
250 spin_lock(&ip->i_flags_lock);
251
252 ip->i_flags &= ~XFS_INEW;
253 ip->i_flags |= XFS_IRECLAIMABLE;
254 __xfs_inode_set_reclaim_tag(pag, ip);
255 goto out_error;
256 }
257 inode->i_state = I_LOCK|I_NEW;
258 } else {
188 /* If the VFS inode is being torn down, pause and try again. */ 259 /* If the VFS inode is being torn down, pause and try again. */
189 XFS_STATS_INC(xs_ig_frecycle); 260 if (!igrab(inode)) {
190 goto out_error; 261 error = EAGAIN;
191 } else if (xfs_iflags_test(ip, XFS_INEW)) { 262 goto out_error;
192 /* 263 }
193 * We are racing with another cache hit that is
194 * currently recycling this inode out of the XFS_IRECLAIMABLE
195 * state. Wait for the initialisation to complete before
196 * continuing.
197 */
198 wait_on_inode(VFS_I(ip));
199 }
200 264
201 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { 265 /* We've got a live one. */
202 error = ENOENT; 266 spin_unlock(&ip->i_flags_lock);
203 iput(VFS_I(ip)); 267 read_unlock(&pag->pag_ici_lock);
204 goto out_error;
205 } 268 }
206 269
207 /* We've got a live one. */
208 read_unlock(&pag->pag_ici_lock);
209
210 if (lock_flags != 0) 270 if (lock_flags != 0)
211 xfs_ilock(ip, lock_flags); 271 xfs_ilock(ip, lock_flags);
212 272
@@ -216,6 +276,7 @@ xfs_iget_cache_hit(
216 return 0; 276 return 0;
217 277
218out_error: 278out_error:
279 spin_unlock(&ip->i_flags_lock);
219 read_unlock(&pag->pag_ici_lock); 280 read_unlock(&pag->pag_ici_lock);
220 return error; 281 return error;
221} 282}
@@ -299,7 +360,8 @@ out_preload_end:
299 if (lock_flags) 360 if (lock_flags)
300 xfs_iunlock(ip, lock_flags); 361 xfs_iunlock(ip, lock_flags);
301out_destroy: 362out_destroy:
302 xfs_destroy_inode(ip); 363 __destroy_inode(VFS_I(ip));
364 xfs_inode_free(ip);
303 return error; 365 return error;
304} 366}
305 367
@@ -504,62 +566,7 @@ xfs_ireclaim(
504 xfs_qm_dqdetach(ip); 566 xfs_qm_dqdetach(ip);
505 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 567 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
506 568
507 switch (ip->i_d.di_mode & S_IFMT) { 569 xfs_inode_free(ip);
508 case S_IFREG:
509 case S_IFDIR:
510 case S_IFLNK:
511 xfs_idestroy_fork(ip, XFS_DATA_FORK);
512 break;
513 }
514
515 if (ip->i_afp)
516 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
517
518#ifdef XFS_INODE_TRACE
519 ktrace_free(ip->i_trace);
520#endif
521#ifdef XFS_BMAP_TRACE
522 ktrace_free(ip->i_xtrace);
523#endif
524#ifdef XFS_BTREE_TRACE
525 ktrace_free(ip->i_btrace);
526#endif
527#ifdef XFS_RW_TRACE
528 ktrace_free(ip->i_rwtrace);
529#endif
530#ifdef XFS_ILOCK_TRACE
531 ktrace_free(ip->i_lock_trace);
532#endif
533#ifdef XFS_DIR2_TRACE
534 ktrace_free(ip->i_dir_trace);
535#endif
536 if (ip->i_itemp) {
537 /*
538 * Only if we are shutting down the fs will we see an
539 * inode still in the AIL. If it is there, we should remove
540 * it to prevent a use-after-free from occurring.
541 */
542 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
543 struct xfs_ail *ailp = lip->li_ailp;
544
545 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
546 XFS_FORCED_SHUTDOWN(ip->i_mount));
547 if (lip->li_flags & XFS_LI_IN_AIL) {
548 spin_lock(&ailp->xa_lock);
549 if (lip->li_flags & XFS_LI_IN_AIL)
550 xfs_trans_ail_delete(ailp, lip);
551 else
552 spin_unlock(&ailp->xa_lock);
553 }
554 xfs_inode_item_destroy(ip);
555 ip->i_itemp = NULL;
556 }
557 /* asserts to verify all state is correct here */
558 ASSERT(atomic_read(&ip->i_iocount) == 0);
559 ASSERT(atomic_read(&ip->i_pincount) == 0);
560 ASSERT(!spin_is_locked(&ip->i_flags_lock));
561 ASSERT(completion_done(&ip->i_flush));
562 kmem_zone_free(xfs_inode_zone, ip);
563} 570}
564 571
565/* 572/*