aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_iget.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_iget.c')
-rw-r--r--fs/xfs/xfs_iget.c735
1 files changed, 399 insertions, 336 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e229e9e001c2..e2fb6210d4c5 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -38,281 +38,283 @@
38#include "xfs_ialloc.h" 38#include "xfs_ialloc.h"
39#include "xfs_quota.h" 39#include "xfs_quota.h"
40#include "xfs_utils.h" 40#include "xfs_utils.h"
41#include "xfs_trans_priv.h"
42#include "xfs_inode_item.h"
43#include "xfs_bmap.h"
44#include "xfs_btree_trace.h"
45#include "xfs_dir2_trace.h"
46
41 47
42/* 48/*
43 * Look up an inode by number in the given file system. 49 * Allocate and initialise an xfs_inode.
44 * The inode is looked up in the cache held in each AG.
45 * If the inode is found in the cache, attach it to the provided
46 * vnode.
47 *
48 * If it is not in core, read it in from the file system's device,
49 * add it to the cache and attach the provided vnode.
50 *
51 * The inode is locked according to the value of the lock_flags parameter.
52 * This flag parameter indicates how and if the inode's IO lock and inode lock
53 * should be taken.
54 *
55 * mp -- the mount point structure for the current file system. It points
56 * to the inode hash table.
57 * tp -- a pointer to the current transaction if there is one. This is
58 * simply passed through to the xfs_iread() call.
59 * ino -- the number of the inode desired. This is the unique identifier
60 * within the file system for the inode being requested.
61 * lock_flags -- flags indicating how to lock the inode. See the comment
62 * for xfs_ilock() for a list of valid values.
63 * bno -- the block number starting the buffer containing the inode,
64 * if known (as by bulkstat), else 0.
65 */ 50 */
66STATIC int 51STATIC struct xfs_inode *
67xfs_iget_core( 52xfs_inode_alloc(
68 struct inode *inode, 53 struct xfs_mount *mp,
69 xfs_mount_t *mp, 54 xfs_ino_t ino)
70 xfs_trans_t *tp,
71 xfs_ino_t ino,
72 uint flags,
73 uint lock_flags,
74 xfs_inode_t **ipp,
75 xfs_daddr_t bno)
76{ 55{
77 struct inode *old_inode; 56 struct xfs_inode *ip;
78 xfs_inode_t *ip;
79 xfs_inode_t *iq;
80 int error;
81 unsigned long first_index, mask;
82 xfs_perag_t *pag;
83 xfs_agino_t agino;
84 57
85 /* the radix tree exists only in inode capable AGs */ 58 /*
86 if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) 59 * if this didn't occur in transactions, we could use
87 return EINVAL; 60 * KM_MAYFAIL and return NULL here on ENOMEM. Set the
61 * code up to do this anyway.
62 */
63 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
64 if (!ip)
65 return NULL;
88 66
89 /* get the perag structure and ensure that it's inode capable */ 67 ASSERT(atomic_read(&ip->i_iocount) == 0);
90 pag = xfs_get_perag(mp, ino); 68 ASSERT(atomic_read(&ip->i_pincount) == 0);
91 if (!pag->pagi_inodeok) 69 ASSERT(!spin_is_locked(&ip->i_flags_lock));
92 return EINVAL; 70 ASSERT(completion_done(&ip->i_flush));
93 ASSERT(pag->pag_ici_init);
94 agino = XFS_INO_TO_AGINO(mp, ino);
95 71
96again: 72 /*
97 read_lock(&pag->pag_ici_lock); 73 * initialise the VFS inode here to get failures
98 ip = radix_tree_lookup(&pag->pag_ici_root, agino); 74 * out of the way early.
75 */
76 if (!inode_init_always(mp->m_super, VFS_I(ip))) {
77 kmem_zone_free(xfs_inode_zone, ip);
78 return NULL;
79 }
80
81 /* initialise the xfs inode */
82 ip->i_ino = ino;
83 ip->i_mount = mp;
84 memset(&ip->i_imap, 0, sizeof(struct xfs_imap));
85 ip->i_afp = NULL;
86 memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
87 ip->i_flags = 0;
88 ip->i_update_core = 0;
89 ip->i_update_size = 0;
90 ip->i_delayed_blks = 0;
91 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
92 ip->i_size = 0;
93 ip->i_new_size = 0;
94
95 /*
96 * Initialize inode's trace buffers.
97 */
98#ifdef XFS_INODE_TRACE
99 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
100#endif
101#ifdef XFS_BMAP_TRACE
102 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
103#endif
104#ifdef XFS_BTREE_TRACE
105 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
106#endif
107#ifdef XFS_RW_TRACE
108 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
109#endif
110#ifdef XFS_ILOCK_TRACE
111 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
112#endif
113#ifdef XFS_DIR2_TRACE
114 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
115#endif
116
117 return ip;
118}
119
120/*
121 * Check the validity of the inode we just found it the cache
122 */
123static int
124xfs_iget_cache_hit(
125 struct xfs_perag *pag,
126 struct xfs_inode *ip,
127 int flags,
128 int lock_flags) __releases(pag->pag_ici_lock)
129{
130 struct xfs_mount *mp = ip->i_mount;
131 int error = EAGAIN;
132
133 /*
134 * If INEW is set this inode is being set up
135 * If IRECLAIM is set this inode is being torn down
136 * Pause and try again.
137 */
138 if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) {
139 XFS_STATS_INC(xs_ig_frecycle);
140 goto out_error;
141 }
142
143 /* If IRECLAIMABLE is set, we've torn down the vfs inode part */
144 if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
99 145
100 if (ip != NULL) {
101 /* 146 /*
102 * If INEW is set this inode is being set up 147 * If lookup is racing with unlink, then we should return an
103 * we need to pause and try again. 148 * error immediately so we don't remove it from the reclaim
149 * list and potentially leak the inode.
104 */ 150 */
105 if (xfs_iflags_test(ip, XFS_INEW)) { 151 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
106 read_unlock(&pag->pag_ici_lock); 152 error = ENOENT;
107 delay(1); 153 goto out_error;
108 XFS_STATS_INC(xs_ig_frecycle);
109
110 goto again;
111 } 154 }
112 155
113 old_inode = ip->i_vnode; 156 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
114 if (old_inode == NULL) {
115 /*
116 * If IRECLAIM is set this inode is
117 * on its way out of the system,
118 * we need to pause and try again.
119 */
120 if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
121 read_unlock(&pag->pag_ici_lock);
122 delay(1);
123 XFS_STATS_INC(xs_ig_frecycle);
124
125 goto again;
126 }
127 ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
128
129 /*
130 * If lookup is racing with unlink, then we
131 * should return an error immediately so we
132 * don't remove it from the reclaim list and
133 * potentially leak the inode.
134 */
135 if ((ip->i_d.di_mode == 0) &&
136 !(flags & XFS_IGET_CREATE)) {
137 read_unlock(&pag->pag_ici_lock);
138 xfs_put_perag(mp, pag);
139 return ENOENT;
140 }
141
142 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
143
144 XFS_STATS_INC(xs_ig_found);
145 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
146 read_unlock(&pag->pag_ici_lock);
147
148 XFS_MOUNT_ILOCK(mp);
149 list_del_init(&ip->i_reclaim);
150 XFS_MOUNT_IUNLOCK(mp);
151
152 goto finish_inode;
153
154 } else if (inode != old_inode) {
155 /* The inode is being torn down, pause and
156 * try again.
157 */
158 if (old_inode->i_state & (I_FREEING | I_CLEAR)) {
159 read_unlock(&pag->pag_ici_lock);
160 delay(1);
161 XFS_STATS_INC(xs_ig_frecycle);
162
163 goto again;
164 }
165/* Chances are the other vnode (the one in the inode) is being torn
166* down right now, and we landed on top of it. Question is, what do
167* we do? Unhook the old inode and hook up the new one?
168*/
169 cmn_err(CE_PANIC,
170 "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
171 old_inode, inode);
172 }
173 157
174 /* 158 /*
175 * Inode cache hit 159 * We need to re-initialise the VFS inode as it has been
160 * 'freed' by the VFS. Do this here so we can deal with
161 * errors cleanly, then tag it so it can be set up correctly
162 * later.
176 */ 163 */
177 read_unlock(&pag->pag_ici_lock); 164 if (!inode_init_always(mp->m_super, VFS_I(ip))) {
178 XFS_STATS_INC(xs_ig_found); 165 error = ENOMEM;
179 166 goto out_error;
180finish_inode:
181 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
182 xfs_put_perag(mp, pag);
183 return ENOENT;
184 } 167 }
185 168
186 if (lock_flags != 0) 169 /*
187 xfs_ilock(ip, lock_flags); 170 * We must set the XFS_INEW flag before clearing the
171 * XFS_IRECLAIMABLE flag so that if a racing lookup does
172 * not find the XFS_IRECLAIMABLE above but has the igrab()
173 * below succeed we can safely check XFS_INEW to detect
174 * that this inode is still being initialised.
175 */
176 xfs_iflags_set(ip, XFS_INEW);
177 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
178
179 /* clear the radix tree reclaim flag as well. */
180 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
181 } else if (!igrab(VFS_I(ip))) {
182 /* If the VFS inode is being torn down, pause and try again. */
183 XFS_STATS_INC(xs_ig_frecycle);
184 goto out_error;
185 } else if (xfs_iflags_test(ip, XFS_INEW)) {
186 /*
187 * We are racing with another cache hit that is
188 * currently recycling this inode out of the XFS_IRECLAIMABLE
189 * state. Wait for the initialisation to complete before
190 * continuing.
191 */
192 wait_on_inode(VFS_I(ip));
193 }
188 194
189 xfs_iflags_clear(ip, XFS_ISTALE); 195 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
190 xfs_itrace_exit_tag(ip, "xfs_iget.found"); 196 error = ENOENT;
191 goto return_ip; 197 iput(VFS_I(ip));
198 goto out_error;
192 } 199 }
193 200
194 /* 201 /* We've got a live one. */
195 * Inode cache miss
196 */
197 read_unlock(&pag->pag_ici_lock); 202 read_unlock(&pag->pag_ici_lock);
198 XFS_STATS_INC(xs_ig_missed);
199 203
200 /* 204 if (lock_flags != 0)
201 * Read the disk inode attributes into a new inode structure and get 205 xfs_ilock(ip, lock_flags);
202 * a new vnode for it. This should also initialize i_ino and i_mount.
203 */
204 error = xfs_iread(mp, tp, ino, &ip, bno,
205 (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0);
206 if (error) {
207 xfs_put_perag(mp, pag);
208 return error;
209 }
210 206
211 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 207 xfs_iflags_clear(ip, XFS_ISTALE);
208 xfs_itrace_exit_tag(ip, "xfs_iget.found");
209 XFS_STATS_INC(xs_ig_found);
210 return 0;
211
212out_error:
213 read_unlock(&pag->pag_ici_lock);
214 return error;
215}
212 216
213 217
214 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 218static int
215 "xfsino", ip->i_ino); 219xfs_iget_cache_miss(
216 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 220 struct xfs_mount *mp,
217 init_waitqueue_head(&ip->i_ipin_wait); 221 struct xfs_perag *pag,
218 atomic_set(&ip->i_pincount, 0); 222 xfs_trans_t *tp,
223 xfs_ino_t ino,
224 struct xfs_inode **ipp,
225 xfs_daddr_t bno,
226 int flags,
227 int lock_flags) __releases(pag->pag_ici_lock)
228{
229 struct xfs_inode *ip;
230 int error;
231 unsigned long first_index, mask;
232 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
219 233
220 /* 234 ip = xfs_inode_alloc(mp, ino);
221 * Because we want to use a counting completion, complete 235 if (!ip)
222 * the flush completion once to allow a single access to 236 return ENOMEM;
223 * the flush completion without blocking.
224 */
225 init_completion(&ip->i_flush);
226 complete(&ip->i_flush);
227 237
228 if (lock_flags) 238 error = xfs_iread(mp, tp, ip, bno, flags);
229 xfs_ilock(ip, lock_flags); 239 if (error)
240 goto out_destroy;
241
242 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
230 243
231 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 244 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
232 xfs_idestroy(ip); 245 error = ENOENT;
233 xfs_put_perag(mp, pag); 246 goto out_destroy;
234 return ENOENT;
235 } 247 }
236 248
249 if (lock_flags)
250 xfs_ilock(ip, lock_flags);
251
237 /* 252 /*
238 * Preload the radix tree so we can insert safely under the 253 * Preload the radix tree so we can insert safely under the
239 * write spinlock. 254 * write spinlock. Note that we cannot sleep inside the preload
255 * region.
240 */ 256 */
241 if (radix_tree_preload(GFP_KERNEL)) { 257 if (radix_tree_preload(GFP_KERNEL)) {
242 xfs_idestroy(ip); 258 error = EAGAIN;
243 delay(1); 259 goto out_unlock;
244 goto again;
245 } 260 }
261
246 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 262 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
247 first_index = agino & mask; 263 first_index = agino & mask;
248 write_lock(&pag->pag_ici_lock); 264 write_lock(&pag->pag_ici_lock);
249 /* 265
250 * insert the new inode 266 /* insert the new inode */
251 */
252 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 267 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
253 if (unlikely(error)) { 268 if (unlikely(error)) {
254 BUG_ON(error != -EEXIST); 269 WARN_ON(error != -EEXIST);
255 write_unlock(&pag->pag_ici_lock);
256 radix_tree_preload_end();
257 xfs_idestroy(ip);
258 XFS_STATS_INC(xs_ig_dup); 270 XFS_STATS_INC(xs_ig_dup);
259 goto again; 271 error = EAGAIN;
272 goto out_preload_end;
260 } 273 }
261 274
262 /* 275 /* These values _must_ be set before releasing the radix tree lock! */
263 * These values _must_ be set before releasing the radix tree lock!
264 */
265 ip->i_udquot = ip->i_gdquot = NULL; 276 ip->i_udquot = ip->i_gdquot = NULL;
266 xfs_iflags_set(ip, XFS_INEW); 277 xfs_iflags_set(ip, XFS_INEW);
267 278
268 write_unlock(&pag->pag_ici_lock); 279 write_unlock(&pag->pag_ici_lock);
269 radix_tree_preload_end(); 280 radix_tree_preload_end();
270
271 /*
272 * Link ip to its mount and thread it on the mount's inode list.
273 */
274 XFS_MOUNT_ILOCK(mp);
275 if ((iq = mp->m_inodes)) {
276 ASSERT(iq->i_mprev->i_mnext == iq);
277 ip->i_mprev = iq->i_mprev;
278 iq->i_mprev->i_mnext = ip;
279 iq->i_mprev = ip;
280 ip->i_mnext = iq;
281 } else {
282 ip->i_mnext = ip;
283 ip->i_mprev = ip;
284 }
285 mp->m_inodes = ip;
286
287 XFS_MOUNT_IUNLOCK(mp);
288 xfs_put_perag(mp, pag);
289
290 return_ip:
291 ASSERT(ip->i_df.if_ext_max ==
292 XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
293
294 xfs_iflags_set(ip, XFS_IMODIFIED);
295 *ipp = ip; 281 *ipp = ip;
296
297 /*
298 * Set up the Linux with the Linux inode.
299 */
300 ip->i_vnode = inode;
301 inode->i_private = ip;
302
303 /*
304 * If we have a real type for an on-disk inode, we can set ops(&unlock)
305 * now. If it's a new inode being created, xfs_ialloc will handle it.
306 */
307 if (ip->i_d.di_mode != 0)
308 xfs_setup_inode(ip);
309 return 0; 282 return 0;
310}
311 283
284out_preload_end:
285 write_unlock(&pag->pag_ici_lock);
286 radix_tree_preload_end();
287out_unlock:
288 if (lock_flags)
289 xfs_iunlock(ip, lock_flags);
290out_destroy:
291 xfs_destroy_inode(ip);
292 return error;
293}
312 294
313/* 295/*
314 * The 'normal' internal xfs_iget, if needed it will 296 * Look up an inode by number in the given file system.
315 * 'allocate', or 'get', the vnode. 297 * The inode is looked up in the cache held in each AG.
298 * If the inode is found in the cache, initialise the vfs inode
299 * if necessary.
300 *
301 * If it is not in core, read it in from the file system's device,
302 * add it to the cache and initialise the vfs inode.
303 *
304 * The inode is locked according to the value of the lock_flags parameter.
305 * This flag parameter indicates how and if the inode's IO lock and inode lock
306 * should be taken.
307 *
308 * mp -- the mount point structure for the current file system. It points
309 * to the inode hash table.
310 * tp -- a pointer to the current transaction if there is one. This is
311 * simply passed through to the xfs_iread() call.
312 * ino -- the number of the inode desired. This is the unique identifier
313 * within the file system for the inode being requested.
314 * lock_flags -- flags indicating how to lock the inode. See the comment
315 * for xfs_ilock() for a list of valid values.
316 * bno -- the block number starting the buffer containing the inode,
317 * if known (as by bulkstat), else 0.
316 */ 318 */
317int 319int
318xfs_iget( 320xfs_iget(
@@ -324,61 +326,64 @@ xfs_iget(
324 xfs_inode_t **ipp, 326 xfs_inode_t **ipp,
325 xfs_daddr_t bno) 327 xfs_daddr_t bno)
326{ 328{
327 struct inode *inode;
328 xfs_inode_t *ip; 329 xfs_inode_t *ip;
329 int error; 330 int error;
331 xfs_perag_t *pag;
332 xfs_agino_t agino;
330 333
331 XFS_STATS_INC(xs_ig_attempts); 334 /* the radix tree exists only in inode capable AGs */
335 if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
336 return EINVAL;
332 337
333retry: 338 /* get the perag structure and ensure that it's inode capable */
334 inode = iget_locked(mp->m_super, ino); 339 pag = xfs_get_perag(mp, ino);
335 if (!inode) 340 if (!pag->pagi_inodeok)
336 /* If we got no inode we are out of memory */ 341 return EINVAL;
337 return ENOMEM; 342 ASSERT(pag->pag_ici_init);
343 agino = XFS_INO_TO_AGINO(mp, ino);
338 344
339 if (inode->i_state & I_NEW) { 345again:
340 XFS_STATS_INC(vn_active); 346 error = 0;
341 XFS_STATS_INC(vn_alloc); 347 read_lock(&pag->pag_ici_lock);
342 348 ip = radix_tree_lookup(&pag->pag_ici_root, agino);
343 error = xfs_iget_core(inode, mp, tp, ino, flags, 349
344 lock_flags, ipp, bno); 350 if (ip) {
345 if (error) { 351 error = xfs_iget_cache_hit(pag, ip, flags, lock_flags);
346 make_bad_inode(inode); 352 if (error)
347 if (inode->i_state & I_NEW) 353 goto out_error_or_again;
348 unlock_new_inode(inode); 354 } else {
349 iput(inode); 355 read_unlock(&pag->pag_ici_lock);
350 } 356 XFS_STATS_INC(xs_ig_missed);
351 return error; 357
358 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno,
359 flags, lock_flags);
360 if (error)
361 goto out_error_or_again;
352 } 362 }
363 xfs_put_perag(mp, pag);
353 364
365 *ipp = ip;
366
367 ASSERT(ip->i_df.if_ext_max ==
368 XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
354 /* 369 /*
355 * If the inode is not fully constructed due to 370 * If we have a real type for an on-disk inode, we can set ops(&unlock)
356 * filehandle mismatches wait for the inode to go 371 * now. If it's a new inode being created, xfs_ialloc will handle it.
357 * away and try again.
358 *
359 * iget_locked will call __wait_on_freeing_inode
360 * to wait for the inode to go away.
361 */ 372 */
362 if (is_bad_inode(inode)) { 373 if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
363 iput(inode); 374 xfs_setup_inode(ip);
364 delay(1); 375 return 0;
365 goto retry;
366 }
367 376
368 ip = XFS_I(inode); 377out_error_or_again:
369 if (!ip) { 378 if (error == EAGAIN) {
370 iput(inode);
371 delay(1); 379 delay(1);
372 goto retry; 380 goto again;
373 } 381 }
374 382 xfs_put_perag(mp, pag);
375 if (lock_flags != 0) 383 return error;
376 xfs_ilock(ip, lock_flags);
377 XFS_STATS_INC(xs_ig_found);
378 *ipp = ip;
379 return 0;
380} 384}
381 385
386
382/* 387/*
383 * Look for the inode corresponding to the given ino in the hash table. 388 * Look for the inode corresponding to the given ino in the hash table.
384 * If it is there and its i_transp pointer matches tp, return it. 389 * If it is there and its i_transp pointer matches tp, return it.
@@ -444,99 +449,109 @@ xfs_iput_new(
444 IRELE(ip); 449 IRELE(ip);
445} 450}
446 451
447
448/* 452/*
449 * This routine embodies the part of the reclaim code that pulls 453 * This is called free all the memory associated with an inode.
450 * the inode from the inode hash table and the mount structure's 454 * It must free the inode itself and any buffers allocated for
451 * inode list. 455 * if_extents/if_data and if_broot. It must also free the lock
452 * This should only be called from xfs_reclaim(). 456 * associated with the inode.
457 *
458 * Note: because we don't initialise everything on reallocation out
459 * of the zone, we must ensure we nullify everything correctly before
460 * freeing the structure.
453 */ 461 */
454void 462void
455xfs_ireclaim(xfs_inode_t *ip) 463xfs_ireclaim(
464 struct xfs_inode *ip)
456{ 465{
457 /* 466 struct xfs_mount *mp = ip->i_mount;
458 * Remove from old hash list and mount list. 467 struct xfs_perag *pag;
459 */
460 XFS_STATS_INC(xs_ig_reclaims);
461 468
462 xfs_iextract(ip); 469 XFS_STATS_INC(xs_ig_reclaims);
463
464 /*
465 * Here we do a spurious inode lock in order to coordinate with
466 * xfs_sync(). This is because xfs_sync() references the inodes
467 * in the mount list without taking references on the corresponding
468 * vnodes. We make that OK here by ensuring that we wait until
469 * the inode is unlocked in xfs_sync() before we go ahead and
470 * free it. We get both the regular lock and the io lock because
471 * the xfs_sync() code may need to drop the regular one but will
472 * still hold the io lock.
473 */
474 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
475
476 /*
477 * Release dquots (and their references) if any. An inode may escape
478 * xfs_inactive and get here via vn_alloc->vn_reclaim path.
479 */
480 XFS_QM_DQDETACH(ip->i_mount, ip);
481
482 /*
483 * Pull our behavior descriptor from the vnode chain.
484 */
485 if (ip->i_vnode) {
486 ip->i_vnode->i_private = NULL;
487 ip->i_vnode = NULL;
488 }
489 470
490 /* 471 /*
491 * Free all memory associated with the inode. 472 * Remove the inode from the per-AG radix tree. It doesn't matter
473 * if it was never added to it because radix_tree_delete can deal
474 * with that case just fine.
492 */ 475 */
493 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 476 pag = xfs_get_perag(mp, ip->i_ino);
494 xfs_idestroy(ip);
495}
496
497/*
498 * This routine removes an about-to-be-destroyed inode from
499 * all of the lists in which it is located with the exception
500 * of the behavior chain.
501 */
502void
503xfs_iextract(
504 xfs_inode_t *ip)
505{
506 xfs_mount_t *mp = ip->i_mount;
507 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
508 xfs_inode_t *iq;
509
510 write_lock(&pag->pag_ici_lock); 477 write_lock(&pag->pag_ici_lock);
511 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); 478 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
512 write_unlock(&pag->pag_ici_lock); 479 write_unlock(&pag->pag_ici_lock);
513 xfs_put_perag(mp, pag); 480 xfs_put_perag(mp, pag);
514 481
515 /* 482 /*
516 * Remove from mount's inode list. 483 * Here we do an (almost) spurious inode lock in order to coordinate
484 * with inode cache radix tree lookups. This is because the lookup
485 * can reference the inodes in the cache without taking references.
486 *
487 * We make that OK here by ensuring that we wait until the inode is
488 * unlocked after the lookup before we go ahead and free it. We get
489 * both the ilock and the iolock because the code may need to drop the
490 * ilock one but will still hold the iolock.
517 */ 491 */
518 XFS_MOUNT_ILOCK(mp); 492 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
519 ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL));
520 iq = ip->i_mnext;
521 iq->i_mprev = ip->i_mprev;
522 ip->i_mprev->i_mnext = iq;
523
524 /* 493 /*
525 * Fix up the head pointer if it points to the inode being deleted. 494 * Release dquots (and their references) if any.
526 */ 495 */
527 if (mp->m_inodes == ip) { 496 XFS_QM_DQDETACH(ip->i_mount, ip);
528 if (ip == iq) { 497 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
529 mp->m_inodes = NULL; 498
530 } else { 499 switch (ip->i_d.di_mode & S_IFMT) {
531 mp->m_inodes = iq; 500 case S_IFREG:
532 } 501 case S_IFDIR:
502 case S_IFLNK:
503 xfs_idestroy_fork(ip, XFS_DATA_FORK);
504 break;
533 } 505 }
534 506
535 /* Deal with the deleted inodes list */ 507 if (ip->i_afp)
536 list_del_init(&ip->i_reclaim); 508 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
537 509
538 mp->m_ireclaims++; 510#ifdef XFS_INODE_TRACE
539 XFS_MOUNT_IUNLOCK(mp); 511 ktrace_free(ip->i_trace);
512#endif
513#ifdef XFS_BMAP_TRACE
514 ktrace_free(ip->i_xtrace);
515#endif
516#ifdef XFS_BTREE_TRACE
517 ktrace_free(ip->i_btrace);
518#endif
519#ifdef XFS_RW_TRACE
520 ktrace_free(ip->i_rwtrace);
521#endif
522#ifdef XFS_ILOCK_TRACE
523 ktrace_free(ip->i_lock_trace);
524#endif
525#ifdef XFS_DIR2_TRACE
526 ktrace_free(ip->i_dir_trace);
527#endif
528 if (ip->i_itemp) {
529 /*
530 * Only if we are shutting down the fs will we see an
531 * inode still in the AIL. If it is there, we should remove
532 * it to prevent a use-after-free from occurring.
533 */
534 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
535 struct xfs_ail *ailp = lip->li_ailp;
536
537 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
538 XFS_FORCED_SHUTDOWN(ip->i_mount));
539 if (lip->li_flags & XFS_LI_IN_AIL) {
540 spin_lock(&ailp->xa_lock);
541 if (lip->li_flags & XFS_LI_IN_AIL)
542 xfs_trans_ail_delete(ailp, lip);
543 else
544 spin_unlock(&ailp->xa_lock);
545 }
546 xfs_inode_item_destroy(ip);
547 ip->i_itemp = NULL;
548 }
549 /* asserts to verify all state is correct here */
550 ASSERT(atomic_read(&ip->i_iocount) == 0);
551 ASSERT(atomic_read(&ip->i_pincount) == 0);
552 ASSERT(!spin_is_locked(&ip->i_flags_lock));
553 ASSERT(completion_done(&ip->i_flush));
554 kmem_zone_free(xfs_inode_zone, ip);
540} 555}
541 556
542/* 557/*
@@ -737,7 +752,7 @@ xfs_iunlock(
737 * it is in the AIL and anyone is waiting on it. Don't do 752 * it is in the AIL and anyone is waiting on it. Don't do
738 * this if the caller has asked us not to. 753 * this if the caller has asked us not to.
739 */ 754 */
740 xfs_trans_unlocked_item(ip->i_mount, 755 xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp,
741 (xfs_log_item_t*)(ip->i_itemp)); 756 (xfs_log_item_t*)(ip->i_itemp));
742 } 757 }
743 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); 758 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
@@ -790,3 +805,51 @@ xfs_isilocked(
790} 805}
791#endif 806#endif
792 807
808#ifdef XFS_INODE_TRACE
809
810#define KTRACE_ENTER(ip, vk, s, line, ra) \
811 ktrace_enter((ip)->i_trace, \
812/* 0 */ (void *)(__psint_t)(vk), \
813/* 1 */ (void *)(s), \
814/* 2 */ (void *)(__psint_t) line, \
815/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \
816/* 4 */ (void *)(ra), \
817/* 5 */ NULL, \
818/* 6 */ (void *)(__psint_t)current_cpu(), \
819/* 7 */ (void *)(__psint_t)current_pid(), \
820/* 8 */ (void *)__return_address, \
821/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL)
822
823/*
824 * Vnode tracing code.
825 */
826void
827_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra)
828{
829 KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra);
830}
831
832void
833_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra)
834{
835 KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra);
836}
837
838void
839xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra)
840{
841 KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra);
842}
843
844void
845_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra)
846{
847 KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra);
848}
849
850void
851xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra)
852{
853 KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra);
854}
855#endif /* XFS_INODE_TRACE */