aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/xfs_export.c1
-rw-r--r--fs/xfs/xfs_icache.c421
-rw-r--r--fs/xfs/xfs_icache.h6
-rw-r--r--fs/xfs/xfs_iget.c455
-rw-r--r--fs/xfs/xfs_inode.c1
-rw-r--r--fs/xfs/xfs_inode.h10
-rw-r--r--fs/xfs/xfs_itable.c1
-rw-r--r--fs/xfs/xfs_log_recover.c1
-rw-r--r--fs/xfs/xfs_qm.c1
-rw-r--r--fs/xfs/xfs_rtalloc.c1
-rw-r--r--fs/xfs/xfs_vnodeops.c1
12 files changed, 430 insertions, 470 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 442f256dbcac..e65357bb3dc6 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -40,7 +40,6 @@ xfs-y += xfs_aops.o \
40 xfs_fs_subr.o \ 40 xfs_fs_subr.o \
41 xfs_globals.o \ 41 xfs_globals.o \
42 xfs_icache.o \ 42 xfs_icache.o \
43 xfs_iget.o \
44 xfs_ioctl.o \ 43 xfs_ioctl.o \
45 xfs_iomap.o \ 44 xfs_iomap.o \
46 xfs_iops.o \ 45 xfs_iops.o \
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 8c6d1d70278c..a83611849cee 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -29,6 +29,7 @@
29#include "xfs_inode.h" 29#include "xfs_inode.h"
30#include "xfs_inode_item.h" 30#include "xfs_inode_item.h"
31#include "xfs_trace.h" 31#include "xfs_trace.h"
32#include "xfs_icache.h"
32 33
33/* 34/*
34 * Note that we only accept fileids which are long enough rather than allow 35 * Note that we only accept fileids which are long enough rather than allow
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index eba216f11d5e..9c8703b5cd72 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -41,6 +41,421 @@
41#include <linux/kthread.h> 41#include <linux/kthread.h>
42#include <linux/freezer.h> 42#include <linux/freezer.h>
43 43
44STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp,
45 struct xfs_perag *pag, struct xfs_inode *ip);
46
47/*
48 * Allocate and initialise an xfs_inode.
49 */
50STATIC struct xfs_inode *
51xfs_inode_alloc(
52 struct xfs_mount *mp,
53 xfs_ino_t ino)
54{
55 struct xfs_inode *ip;
56
57 /*
58 * if this didn't occur in transactions, we could use
59 * KM_MAYFAIL and return NULL here on ENOMEM. Set the
60 * code up to do this anyway.
61 */
62 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
63 if (!ip)
64 return NULL;
65 if (inode_init_always(mp->m_super, VFS_I(ip))) {
66 kmem_zone_free(xfs_inode_zone, ip);
67 return NULL;
68 }
69
70 ASSERT(atomic_read(&ip->i_pincount) == 0);
71 ASSERT(!spin_is_locked(&ip->i_flags_lock));
72 ASSERT(!xfs_isiflocked(ip));
73 ASSERT(ip->i_ino == 0);
74
75 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
76
77 /* initialise the xfs inode */
78 ip->i_ino = ino;
79 ip->i_mount = mp;
80 memset(&ip->i_imap, 0, sizeof(struct xfs_imap));
81 ip->i_afp = NULL;
82 memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
83 ip->i_flags = 0;
84 ip->i_delayed_blks = 0;
85 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
86
87 return ip;
88}
89
90STATIC void
91xfs_inode_free_callback(
92 struct rcu_head *head)
93{
94 struct inode *inode = container_of(head, struct inode, i_rcu);
95 struct xfs_inode *ip = XFS_I(inode);
96
97 kmem_zone_free(xfs_inode_zone, ip);
98}
99
100STATIC void
101xfs_inode_free(
102 struct xfs_inode *ip)
103{
104 switch (ip->i_d.di_mode & S_IFMT) {
105 case S_IFREG:
106 case S_IFDIR:
107 case S_IFLNK:
108 xfs_idestroy_fork(ip, XFS_DATA_FORK);
109 break;
110 }
111
112 if (ip->i_afp)
113 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
114
115 if (ip->i_itemp) {
116 ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL));
117 xfs_inode_item_destroy(ip);
118 ip->i_itemp = NULL;
119 }
120
121 /* asserts to verify all state is correct here */
122 ASSERT(atomic_read(&ip->i_pincount) == 0);
123 ASSERT(!spin_is_locked(&ip->i_flags_lock));
124 ASSERT(!xfs_isiflocked(ip));
125
126 /*
127 * Because we use RCU freeing we need to ensure the inode always
128 * appears to be reclaimed with an invalid inode number when in the
129 * free state. The ip->i_flags_lock provides the barrier against lookup
130 * races.
131 */
132 spin_lock(&ip->i_flags_lock);
133 ip->i_flags = XFS_IRECLAIM;
134 ip->i_ino = 0;
135 spin_unlock(&ip->i_flags_lock);
136
137 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
138}
139
140/*
141 * Check the validity of the inode we just found it the cache
142 */
143static int
144xfs_iget_cache_hit(
145 struct xfs_perag *pag,
146 struct xfs_inode *ip,
147 xfs_ino_t ino,
148 int flags,
149 int lock_flags) __releases(RCU)
150{
151 struct inode *inode = VFS_I(ip);
152 struct xfs_mount *mp = ip->i_mount;
153 int error;
154
155 /*
156 * check for re-use of an inode within an RCU grace period due to the
157 * radix tree nodes not being updated yet. We monitor for this by
158 * setting the inode number to zero before freeing the inode structure.
159 * If the inode has been reallocated and set up, then the inode number
160 * will not match, so check for that, too.
161 */
162 spin_lock(&ip->i_flags_lock);
163 if (ip->i_ino != ino) {
164 trace_xfs_iget_skip(ip);
165 XFS_STATS_INC(xs_ig_frecycle);
166 error = EAGAIN;
167 goto out_error;
168 }
169
170
171 /*
172 * If we are racing with another cache hit that is currently
173 * instantiating this inode or currently recycling it out of
174 * reclaimabe state, wait for the initialisation to complete
175 * before continuing.
176 *
177 * XXX(hch): eventually we should do something equivalent to
178 * wait_on_inode to wait for these flags to be cleared
179 * instead of polling for it.
180 */
181 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
182 trace_xfs_iget_skip(ip);
183 XFS_STATS_INC(xs_ig_frecycle);
184 error = EAGAIN;
185 goto out_error;
186 }
187
188 /*
189 * If lookup is racing with unlink return an error immediately.
190 */
191 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
192 error = ENOENT;
193 goto out_error;
194 }
195
196 /*
197 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
198 * Need to carefully get it back into useable state.
199 */
200 if (ip->i_flags & XFS_IRECLAIMABLE) {
201 trace_xfs_iget_reclaim(ip);
202
203 /*
204 * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
205 * from stomping over us while we recycle the inode. We can't
206 * clear the radix tree reclaimable tag yet as it requires
207 * pag_ici_lock to be held exclusive.
208 */
209 ip->i_flags |= XFS_IRECLAIM;
210
211 spin_unlock(&ip->i_flags_lock);
212 rcu_read_unlock();
213
214 error = -inode_init_always(mp->m_super, inode);
215 if (error) {
216 /*
217 * Re-initializing the inode failed, and we are in deep
218 * trouble. Try to re-add it to the reclaim list.
219 */
220 rcu_read_lock();
221 spin_lock(&ip->i_flags_lock);
222
223 ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
224 ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
225 trace_xfs_iget_reclaim_fail(ip);
226 goto out_error;
227 }
228
229 spin_lock(&pag->pag_ici_lock);
230 spin_lock(&ip->i_flags_lock);
231
232 /*
233 * Clear the per-lifetime state in the inode as we are now
234 * effectively a new inode and need to return to the initial
235 * state before reuse occurs.
236 */
237 ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
238 ip->i_flags |= XFS_INEW;
239 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
240 inode->i_state = I_NEW;
241
242 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
243 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
244
245 spin_unlock(&ip->i_flags_lock);
246 spin_unlock(&pag->pag_ici_lock);
247 } else {
248 /* If the VFS inode is being torn down, pause and try again. */
249 if (!igrab(inode)) {
250 trace_xfs_iget_skip(ip);
251 error = EAGAIN;
252 goto out_error;
253 }
254
255 /* We've got a live one. */
256 spin_unlock(&ip->i_flags_lock);
257 rcu_read_unlock();
258 trace_xfs_iget_hit(ip);
259 }
260
261 if (lock_flags != 0)
262 xfs_ilock(ip, lock_flags);
263
264 xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
265 XFS_STATS_INC(xs_ig_found);
266
267 return 0;
268
269out_error:
270 spin_unlock(&ip->i_flags_lock);
271 rcu_read_unlock();
272 return error;
273}
274
275
276static int
277xfs_iget_cache_miss(
278 struct xfs_mount *mp,
279 struct xfs_perag *pag,
280 xfs_trans_t *tp,
281 xfs_ino_t ino,
282 struct xfs_inode **ipp,
283 int flags,
284 int lock_flags)
285{
286 struct xfs_inode *ip;
287 int error;
288 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
289 int iflags;
290
291 ip = xfs_inode_alloc(mp, ino);
292 if (!ip)
293 return ENOMEM;
294
295 error = xfs_iread(mp, tp, ip, flags);
296 if (error)
297 goto out_destroy;
298
299 trace_xfs_iget_miss(ip);
300
301 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
302 error = ENOENT;
303 goto out_destroy;
304 }
305
306 /*
307 * Preload the radix tree so we can insert safely under the
308 * write spinlock. Note that we cannot sleep inside the preload
309 * region. Since we can be called from transaction context, don't
310 * recurse into the file system.
311 */
312 if (radix_tree_preload(GFP_NOFS)) {
313 error = EAGAIN;
314 goto out_destroy;
315 }
316
317 /*
318 * Because the inode hasn't been added to the radix-tree yet it can't
319 * be found by another thread, so we can do the non-sleeping lock here.
320 */
321 if (lock_flags) {
322 if (!xfs_ilock_nowait(ip, lock_flags))
323 BUG();
324 }
325
326 /*
327 * These values must be set before inserting the inode into the radix
328 * tree as the moment it is inserted a concurrent lookup (allowed by the
329 * RCU locking mechanism) can find it and that lookup must see that this
330 * is an inode currently under construction (i.e. that XFS_INEW is set).
331 * The ip->i_flags_lock that protects the XFS_INEW flag forms the
332 * memory barrier that ensures this detection works correctly at lookup
333 * time.
334 */
335 iflags = XFS_INEW;
336 if (flags & XFS_IGET_DONTCACHE)
337 iflags |= XFS_IDONTCACHE;
338 ip->i_udquot = ip->i_gdquot = NULL;
339 xfs_iflags_set(ip, iflags);
340
341 /* insert the new inode */
342 spin_lock(&pag->pag_ici_lock);
343 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
344 if (unlikely(error)) {
345 WARN_ON(error != -EEXIST);
346 XFS_STATS_INC(xs_ig_dup);
347 error = EAGAIN;
348 goto out_preload_end;
349 }
350 spin_unlock(&pag->pag_ici_lock);
351 radix_tree_preload_end();
352
353 *ipp = ip;
354 return 0;
355
356out_preload_end:
357 spin_unlock(&pag->pag_ici_lock);
358 radix_tree_preload_end();
359 if (lock_flags)
360 xfs_iunlock(ip, lock_flags);
361out_destroy:
362 __destroy_inode(VFS_I(ip));
363 xfs_inode_free(ip);
364 return error;
365}
366
367/*
368 * Look up an inode by number in the given file system.
369 * The inode is looked up in the cache held in each AG.
370 * If the inode is found in the cache, initialise the vfs inode
371 * if necessary.
372 *
373 * If it is not in core, read it in from the file system's device,
374 * add it to the cache and initialise the vfs inode.
375 *
376 * The inode is locked according to the value of the lock_flags parameter.
377 * This flag parameter indicates how and if the inode's IO lock and inode lock
378 * should be taken.
379 *
380 * mp -- the mount point structure for the current file system. It points
381 * to the inode hash table.
382 * tp -- a pointer to the current transaction if there is one. This is
383 * simply passed through to the xfs_iread() call.
384 * ino -- the number of the inode desired. This is the unique identifier
385 * within the file system for the inode being requested.
386 * lock_flags -- flags indicating how to lock the inode. See the comment
387 * for xfs_ilock() for a list of valid values.
388 */
389int
390xfs_iget(
391 xfs_mount_t *mp,
392 xfs_trans_t *tp,
393 xfs_ino_t ino,
394 uint flags,
395 uint lock_flags,
396 xfs_inode_t **ipp)
397{
398 xfs_inode_t *ip;
399 int error;
400 xfs_perag_t *pag;
401 xfs_agino_t agino;
402
403 /*
404 * xfs_reclaim_inode() uses the ILOCK to ensure an inode
405 * doesn't get freed while it's being referenced during a
406 * radix tree traversal here. It assumes this function
407 * aqcuires only the ILOCK (and therefore it has no need to
408 * involve the IOLOCK in this synchronization).
409 */
410 ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0);
411
412 /* reject inode numbers outside existing AGs */
413 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
414 return EINVAL;
415
416 /* get the perag structure and ensure that it's inode capable */
417 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
418 agino = XFS_INO_TO_AGINO(mp, ino);
419
420again:
421 error = 0;
422 rcu_read_lock();
423 ip = radix_tree_lookup(&pag->pag_ici_root, agino);
424
425 if (ip) {
426 error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags);
427 if (error)
428 goto out_error_or_again;
429 } else {
430 rcu_read_unlock();
431 XFS_STATS_INC(xs_ig_missed);
432
433 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
434 flags, lock_flags);
435 if (error)
436 goto out_error_or_again;
437 }
438 xfs_perag_put(pag);
439
440 *ipp = ip;
441
442 /*
443 * If we have a real type for an on-disk inode, we can set ops(&unlock)
444 * now. If it's a new inode being created, xfs_ialloc will handle it.
445 */
446 if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
447 xfs_setup_inode(ip);
448 return 0;
449
450out_error_or_again:
451 if (error == EAGAIN) {
452 delay(1);
453 goto again;
454 }
455 xfs_perag_put(pag);
456 return error;
457}
458
44/* 459/*
45 * The inode lookup is done in batches to keep the amount of lock traffic and 460 * The inode lookup is done in batches to keep the amount of lock traffic and
46 * radix tree lookups to a minimum. The batch size is a trade off between 461 * radix tree lookups to a minimum. The batch size is a trade off between
@@ -253,7 +668,7 @@ xfs_reclaim_worker(
253 xfs_reclaim_work_queue(mp); 668 xfs_reclaim_work_queue(mp);
254} 669}
255 670
256void 671static void
257__xfs_inode_set_reclaim_tag( 672__xfs_inode_set_reclaim_tag(
258 struct xfs_perag *pag, 673 struct xfs_perag *pag,
259 struct xfs_inode *ip) 674 struct xfs_inode *ip)
@@ -319,7 +734,7 @@ __xfs_inode_clear_reclaim(
319 } 734 }
320} 735}
321 736
322void 737STATIC void
323__xfs_inode_clear_reclaim_tag( 738__xfs_inode_clear_reclaim_tag(
324 xfs_mount_t *mp, 739 xfs_mount_t *mp,
325 xfs_perag_t *pag, 740 xfs_perag_t *pag,
@@ -542,7 +957,7 @@ out:
542 * then a shut down during filesystem unmount reclaim walk leak all the 957 * then a shut down during filesystem unmount reclaim walk leak all the
543 * unreclaimed inodes. 958 * unreclaimed inodes.
544 */ 959 */
545int 960STATIC int
546xfs_reclaim_inodes_ag( 961xfs_reclaim_inodes_ag(
547 struct xfs_mount *mp, 962 struct xfs_mount *mp,
548 int flags, 963 int flags,
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 0ba9c89c316e..222e22f16b4a 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -24,6 +24,9 @@ struct xfs_perag;
24#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ 24#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
25#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ 25#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
26 26
27int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
28 uint flags, uint lock_flags, xfs_inode_t **ipp);
29
27void xfs_reclaim_worker(struct work_struct *work); 30void xfs_reclaim_worker(struct work_struct *work);
28 31
29int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 32int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
@@ -31,9 +34,6 @@ int xfs_reclaim_inodes_count(struct xfs_mount *mp);
31void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); 34void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
32 35
33void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 36void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
34void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
35void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
36 struct xfs_inode *ip);
37 37
38int xfs_sync_inode_grab(struct xfs_inode *ip); 38int xfs_sync_inode_grab(struct xfs_inode *ip);
39int xfs_inode_ag_iterator(struct xfs_mount *mp, 39int xfs_inode_ag_iterator(struct xfs_mount *mp,
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
deleted file mode 100644
index ea9a5fa49a48..000000000000
--- a/fs/xfs/xfs_iget.c
+++ /dev/null
@@ -1,455 +0,0 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_acl.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_mount.h"
28#include "xfs_bmap_btree.h"
29#include "xfs_alloc_btree.h"
30#include "xfs_ialloc_btree.h"
31#include "xfs_dinode.h"
32#include "xfs_inode.h"
33#include "xfs_btree.h"
34#include "xfs_ialloc.h"
35#include "xfs_quota.h"
36#include "xfs_utils.h"
37#include "xfs_trans_priv.h"
38#include "xfs_inode_item.h"
39#include "xfs_bmap.h"
40#include "xfs_trace.h"
41#include "xfs_icache.h"
42
43
44/*
45 * Allocate and initialise an xfs_inode.
46 */
47STATIC struct xfs_inode *
48xfs_inode_alloc(
49 struct xfs_mount *mp,
50 xfs_ino_t ino)
51{
52 struct xfs_inode *ip;
53
54 /*
55 * if this didn't occur in transactions, we could use
56 * KM_MAYFAIL and return NULL here on ENOMEM. Set the
57 * code up to do this anyway.
58 */
59 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
60 if (!ip)
61 return NULL;
62 if (inode_init_always(mp->m_super, VFS_I(ip))) {
63 kmem_zone_free(xfs_inode_zone, ip);
64 return NULL;
65 }
66
67 ASSERT(atomic_read(&ip->i_pincount) == 0);
68 ASSERT(!spin_is_locked(&ip->i_flags_lock));
69 ASSERT(!xfs_isiflocked(ip));
70 ASSERT(ip->i_ino == 0);
71
72 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
73
74 /* initialise the xfs inode */
75 ip->i_ino = ino;
76 ip->i_mount = mp;
77 memset(&ip->i_imap, 0, sizeof(struct xfs_imap));
78 ip->i_afp = NULL;
79 memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
80 ip->i_flags = 0;
81 ip->i_delayed_blks = 0;
82 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
83
84 return ip;
85}
86
87STATIC void
88xfs_inode_free_callback(
89 struct rcu_head *head)
90{
91 struct inode *inode = container_of(head, struct inode, i_rcu);
92 struct xfs_inode *ip = XFS_I(inode);
93
94 kmem_zone_free(xfs_inode_zone, ip);
95}
96
97void
98xfs_inode_free(
99 struct xfs_inode *ip)
100{
101 switch (ip->i_d.di_mode & S_IFMT) {
102 case S_IFREG:
103 case S_IFDIR:
104 case S_IFLNK:
105 xfs_idestroy_fork(ip, XFS_DATA_FORK);
106 break;
107 }
108
109 if (ip->i_afp)
110 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
111
112 if (ip->i_itemp) {
113 ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL));
114 xfs_inode_item_destroy(ip);
115 ip->i_itemp = NULL;
116 }
117
118 /* asserts to verify all state is correct here */
119 ASSERT(atomic_read(&ip->i_pincount) == 0);
120 ASSERT(!spin_is_locked(&ip->i_flags_lock));
121 ASSERT(!xfs_isiflocked(ip));
122
123 /*
124 * Because we use RCU freeing we need to ensure the inode always
125 * appears to be reclaimed with an invalid inode number when in the
126 * free state. The ip->i_flags_lock provides the barrier against lookup
127 * races.
128 */
129 spin_lock(&ip->i_flags_lock);
130 ip->i_flags = XFS_IRECLAIM;
131 ip->i_ino = 0;
132 spin_unlock(&ip->i_flags_lock);
133
134 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
135}
136
137/*
138 * Check the validity of the inode we just found it the cache
139 */
140static int
141xfs_iget_cache_hit(
142 struct xfs_perag *pag,
143 struct xfs_inode *ip,
144 xfs_ino_t ino,
145 int flags,
146 int lock_flags) __releases(RCU)
147{
148 struct inode *inode = VFS_I(ip);
149 struct xfs_mount *mp = ip->i_mount;
150 int error;
151
152 /*
153 * check for re-use of an inode within an RCU grace period due to the
154 * radix tree nodes not being updated yet. We monitor for this by
155 * setting the inode number to zero before freeing the inode structure.
156 * If the inode has been reallocated and set up, then the inode number
157 * will not match, so check for that, too.
158 */
159 spin_lock(&ip->i_flags_lock);
160 if (ip->i_ino != ino) {
161 trace_xfs_iget_skip(ip);
162 XFS_STATS_INC(xs_ig_frecycle);
163 error = EAGAIN;
164 goto out_error;
165 }
166
167
168 /*
169 * If we are racing with another cache hit that is currently
170 * instantiating this inode or currently recycling it out of
171 * reclaimabe state, wait for the initialisation to complete
172 * before continuing.
173 *
174 * XXX(hch): eventually we should do something equivalent to
175 * wait_on_inode to wait for these flags to be cleared
176 * instead of polling for it.
177 */
178 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
179 trace_xfs_iget_skip(ip);
180 XFS_STATS_INC(xs_ig_frecycle);
181 error = EAGAIN;
182 goto out_error;
183 }
184
185 /*
186 * If lookup is racing with unlink return an error immediately.
187 */
188 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
189 error = ENOENT;
190 goto out_error;
191 }
192
193 /*
194 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
195 * Need to carefully get it back into useable state.
196 */
197 if (ip->i_flags & XFS_IRECLAIMABLE) {
198 trace_xfs_iget_reclaim(ip);
199
200 /*
201 * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
202 * from stomping over us while we recycle the inode. We can't
203 * clear the radix tree reclaimable tag yet as it requires
204 * pag_ici_lock to be held exclusive.
205 */
206 ip->i_flags |= XFS_IRECLAIM;
207
208 spin_unlock(&ip->i_flags_lock);
209 rcu_read_unlock();
210
211 error = -inode_init_always(mp->m_super, inode);
212 if (error) {
213 /*
214 * Re-initializing the inode failed, and we are in deep
215 * trouble. Try to re-add it to the reclaim list.
216 */
217 rcu_read_lock();
218 spin_lock(&ip->i_flags_lock);
219
220 ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
221 ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
222 trace_xfs_iget_reclaim_fail(ip);
223 goto out_error;
224 }
225
226 spin_lock(&pag->pag_ici_lock);
227 spin_lock(&ip->i_flags_lock);
228
229 /*
230 * Clear the per-lifetime state in the inode as we are now
231 * effectively a new inode and need to return to the initial
232 * state before reuse occurs.
233 */
234 ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
235 ip->i_flags |= XFS_INEW;
236 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
237 inode->i_state = I_NEW;
238
239 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
240 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
241
242 spin_unlock(&ip->i_flags_lock);
243 spin_unlock(&pag->pag_ici_lock);
244 } else {
245 /* If the VFS inode is being torn down, pause and try again. */
246 if (!igrab(inode)) {
247 trace_xfs_iget_skip(ip);
248 error = EAGAIN;
249 goto out_error;
250 }
251
252 /* We've got a live one. */
253 spin_unlock(&ip->i_flags_lock);
254 rcu_read_unlock();
255 trace_xfs_iget_hit(ip);
256 }
257
258 if (lock_flags != 0)
259 xfs_ilock(ip, lock_flags);
260
261 xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
262 XFS_STATS_INC(xs_ig_found);
263
264 return 0;
265
266out_error:
267 spin_unlock(&ip->i_flags_lock);
268 rcu_read_unlock();
269 return error;
270}
271
272
273static int
274xfs_iget_cache_miss(
275 struct xfs_mount *mp,
276 struct xfs_perag *pag,
277 xfs_trans_t *tp,
278 xfs_ino_t ino,
279 struct xfs_inode **ipp,
280 int flags,
281 int lock_flags)
282{
283 struct xfs_inode *ip;
284 int error;
285 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
286 int iflags;
287
288 ip = xfs_inode_alloc(mp, ino);
289 if (!ip)
290 return ENOMEM;
291
292 error = xfs_iread(mp, tp, ip, flags);
293 if (error)
294 goto out_destroy;
295
296 trace_xfs_iget_miss(ip);
297
298 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
299 error = ENOENT;
300 goto out_destroy;
301 }
302
303 /*
304 * Preload the radix tree so we can insert safely under the
305 * write spinlock. Note that we cannot sleep inside the preload
306 * region. Since we can be called from transaction context, don't
307 * recurse into the file system.
308 */
309 if (radix_tree_preload(GFP_NOFS)) {
310 error = EAGAIN;
311 goto out_destroy;
312 }
313
314 /*
315 * Because the inode hasn't been added to the radix-tree yet it can't
316 * be found by another thread, so we can do the non-sleeping lock here.
317 */
318 if (lock_flags) {
319 if (!xfs_ilock_nowait(ip, lock_flags))
320 BUG();
321 }
322
323 /*
324 * These values must be set before inserting the inode into the radix
325 * tree as the moment it is inserted a concurrent lookup (allowed by the
326 * RCU locking mechanism) can find it and that lookup must see that this
327 * is an inode currently under construction (i.e. that XFS_INEW is set).
328 * The ip->i_flags_lock that protects the XFS_INEW flag forms the
329 * memory barrier that ensures this detection works correctly at lookup
330 * time.
331 */
332 iflags = XFS_INEW;
333 if (flags & XFS_IGET_DONTCACHE)
334 iflags |= XFS_IDONTCACHE;
335 ip->i_udquot = ip->i_gdquot = NULL;
336 xfs_iflags_set(ip, iflags);
337
338 /* insert the new inode */
339 spin_lock(&pag->pag_ici_lock);
340 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
341 if (unlikely(error)) {
342 WARN_ON(error != -EEXIST);
343 XFS_STATS_INC(xs_ig_dup);
344 error = EAGAIN;
345 goto out_preload_end;
346 }
347 spin_unlock(&pag->pag_ici_lock);
348 radix_tree_preload_end();
349
350 *ipp = ip;
351 return 0;
352
353out_preload_end:
354 spin_unlock(&pag->pag_ici_lock);
355 radix_tree_preload_end();
356 if (lock_flags)
357 xfs_iunlock(ip, lock_flags);
358out_destroy:
359 __destroy_inode(VFS_I(ip));
360 xfs_inode_free(ip);
361 return error;
362}
363
364/*
365 * Look up an inode by number in the given file system.
366 * The inode is looked up in the cache held in each AG.
367 * If the inode is found in the cache, initialise the vfs inode
368 * if necessary.
369 *
370 * If it is not in core, read it in from the file system's device,
371 * add it to the cache and initialise the vfs inode.
372 *
373 * The inode is locked according to the value of the lock_flags parameter.
374 * This flag parameter indicates how and if the inode's IO lock and inode lock
375 * should be taken.
376 *
377 * mp -- the mount point structure for the current file system. It points
378 * to the inode hash table.
379 * tp -- a pointer to the current transaction if there is one. This is
380 * simply passed through to the xfs_iread() call.
381 * ino -- the number of the inode desired. This is the unique identifier
382 * within the file system for the inode being requested.
383 * lock_flags -- flags indicating how to lock the inode. See the comment
384 * for xfs_ilock() for a list of valid values.
385 */
386int
387xfs_iget(
388 xfs_mount_t *mp,
389 xfs_trans_t *tp,
390 xfs_ino_t ino,
391 uint flags,
392 uint lock_flags,
393 xfs_inode_t **ipp)
394{
395 xfs_inode_t *ip;
396 int error;
397 xfs_perag_t *pag;
398 xfs_agino_t agino;
399
400 /*
401 * xfs_reclaim_inode() uses the ILOCK to ensure an inode
402 * doesn't get freed while it's being referenced during a
403 * radix tree traversal here. It assumes this function
404 * aqcuires only the ILOCK (and therefore it has no need to
405 * involve the IOLOCK in this synchronization).
406 */
407 ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0);
408
409 /* reject inode numbers outside existing AGs */
410 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
411 return EINVAL;
412
413 /* get the perag structure and ensure that it's inode capable */
414 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
415 agino = XFS_INO_TO_AGINO(mp, ino);
416
417again:
418 error = 0;
419 rcu_read_lock();
420 ip = radix_tree_lookup(&pag->pag_ici_root, agino);
421
422 if (ip) {
423 error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags);
424 if (error)
425 goto out_error_or_again;
426 } else {
427 rcu_read_unlock();
428 XFS_STATS_INC(xs_ig_missed);
429
430 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
431 flags, lock_flags);
432 if (error)
433 goto out_error_or_again;
434 }
435 xfs_perag_put(pag);
436
437 *ipp = ip;
438
439 /*
440 * If we have a real type for an on-disk inode, we can set ops(&unlock)
441 * now. If it's a new inode being created, xfs_ialloc will handle it.
442 */
443 if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
444 xfs_setup_inode(ip);
445 return 0;
446
447out_error_or_again:
448 if (error == EAGAIN) {
449 delay(1);
450 goto again;
451 }
452 xfs_perag_put(pag);
453 return error;
454}
455
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ba404e4b9f0c..bba8f37525b3 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -45,6 +45,7 @@
45#include "xfs_filestream.h" 45#include "xfs_filestream.h"
46#include "xfs_vnodeops.h" 46#include "xfs_vnodeops.h"
47#include "xfs_trace.h" 47#include "xfs_trace.h"
48#include "xfs_icache.h"
48 49
49kmem_zone_t *xfs_ifork_zone; 50kmem_zone_t *xfs_ifork_zone;
50kmem_zone_t *xfs_inode_zone; 51kmem_zone_t *xfs_inode_zone;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 94b32f906e79..1fc2065e010b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -496,11 +496,10 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
496 (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \ 496 (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \
497 ((pip)->i_d.di_mode & S_ISGID)) 497 ((pip)->i_d.di_mode & S_ISGID))
498 498
499
499/* 500/*
500 * xfs_iget.c prototypes. 501 * xfs_inode.c prototypes.
501 */ 502 */
502int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
503 uint, uint, xfs_inode_t **);
504void xfs_ilock(xfs_inode_t *, uint); 503void xfs_ilock(xfs_inode_t *, uint);
505int xfs_ilock_nowait(xfs_inode_t *, uint); 504int xfs_ilock_nowait(xfs_inode_t *, uint);
506void xfs_iunlock(xfs_inode_t *, uint); 505void xfs_iunlock(xfs_inode_t *, uint);
@@ -508,11 +507,6 @@ void xfs_ilock_demote(xfs_inode_t *, uint);
508int xfs_isilocked(xfs_inode_t *, uint); 507int xfs_isilocked(xfs_inode_t *, uint);
509uint xfs_ilock_map_shared(xfs_inode_t *); 508uint xfs_ilock_map_shared(xfs_inode_t *);
510void xfs_iunlock_map_shared(xfs_inode_t *, uint); 509void xfs_iunlock_map_shared(xfs_inode_t *, uint);
511void xfs_inode_free(struct xfs_inode *ip);
512
513/*
514 * xfs_inode.c prototypes.
515 */
516int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t, 510int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t,
517 xfs_nlink_t, xfs_dev_t, prid_t, int, 511 xfs_nlink_t, xfs_dev_t, prid_t, int,
518 struct xfs_buf **, xfs_inode_t **); 512 struct xfs_buf **, xfs_inode_t **);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 01d10a66e302..3998fd2a7949 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -34,6 +34,7 @@
34#include "xfs_error.h" 34#include "xfs_error.h"
35#include "xfs_btree.h" 35#include "xfs_btree.h"
36#include "xfs_trace.h" 36#include "xfs_trace.h"
37#include "xfs_icache.h"
37 38
38STATIC int 39STATIC int
39xfs_internal_inum( 40xfs_internal_inum(
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 5da3ace352bf..651c98859b04 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -42,6 +42,7 @@
42#include "xfs_quota.h" 42#include "xfs_quota.h"
43#include "xfs_utils.h" 43#include "xfs_utils.h"
44#include "xfs_trace.h" 44#include "xfs_trace.h"
45#include "xfs_icache.h"
45 46
46STATIC int 47STATIC int
47xlog_find_zeroed( 48xlog_find_zeroed(
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 2e86fa0cfc0d..48c750b0e830 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -40,6 +40,7 @@
40#include "xfs_utils.h" 40#include "xfs_utils.h"
41#include "xfs_qm.h" 41#include "xfs_qm.h"
42#include "xfs_trace.h" 42#include "xfs_trace.h"
43#include "xfs_icache.h"
43 44
44/* 45/*
45 * The global quota manager. There is only one of these for the entire 46 * The global quota manager. There is only one of these for the entire
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index ca28a4ba4b54..a69e0b4750a9 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -38,6 +38,7 @@
38#include "xfs_utils.h" 38#include "xfs_utils.h"
39#include "xfs_trace.h" 39#include "xfs_trace.h"
40#include "xfs_buf.h" 40#include "xfs_buf.h"
41#include "xfs_icache.h"
41 42
42 43
43/* 44/*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 14928564f106..2ee1f49da0aa 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -47,6 +47,7 @@
47#include "xfs_filestream.h" 47#include "xfs_filestream.h"
48#include "xfs_vnodeops.h" 48#include "xfs_vnodeops.h"
49#include "xfs_trace.h" 49#include "xfs_trace.h"
50#include "xfs_icache.h"
50 51
51/* 52/*
52 * The maximum pathlen is 1024 bytes. Since the minimum file system 53 * The maximum pathlen is 1024 bytes. Since the minimum file system