aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2007-08-28 00:00:13 -0400
committerTim Shimmin <tes@chook.melbourne.sgi.com>2007-10-15 02:50:50 -0400
commitda353b0d64e070ae7c5342a0d56ec20ae9ef5cfb (patch)
tree84454023d649df67cc6b125c73746ddb341ac34e /fs/xfs
parent39cd9f877e63ce7e02cdc7f5dbf1b908451c9532 (diff)
[XFS] Radix tree based inode caching
One of the perpetual scaling problems XFS has is indexing it's incore inodes. We currently uses hashes and the default hash sizes chosen can only ever be a tradeoff between memory consumption and the maximum realistic size of the cache. As a result, anyone who has millions of inodes cached on a filesystem needs to tunes the size of the cache via the ihashsize mount option to allow decent scalability with inode cache operations. A further problem is the separate inode cluster hash, whose size is based on the ihashsize but is smaller, and so under certain conditions (sparse cluster cache population) this can become a limitation long before the inode hash is causing issues. The following patchset removes the inode hash and cluster hash and replaces them with radix trees to avoid the scalability limitations of the hashes. It also reduces the size of the inodes by 3 pointers.... SGI-PV: 969561 SGI-Modid: xfs-linux-melb:xfs-kern:29481a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Tim Shimmin <tes@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--fs/xfs/xfs_ag.h4
-rw-r--r--fs/xfs/xfs_buf_item.c1
-rw-r--r--fs/xfs/xfs_clnt.h1
-rw-r--r--fs/xfs/xfs_dir2_block.c1
-rw-r--r--fs/xfs/xfs_dir2_data.c1
-rw-r--r--fs/xfs/xfs_dir2_node.c1
-rw-r--r--fs/xfs/xfs_dir2_sf.c1
-rw-r--r--fs/xfs/xfs_error.c1
-rw-r--r--fs/xfs/xfs_extfree_item.c1
-rw-r--r--fs/xfs/xfs_iget.c585
-rw-r--r--fs/xfs/xfs_inode.c42
-rw-r--r--fs/xfs/xfs_inode.h56
-rw-r--r--fs/xfs/xfs_mount.c27
-rw-r--r--fs/xfs/xfs_mount.h22
-rw-r--r--fs/xfs/xfs_rename.c1
-rw-r--r--fs/xfs/xfs_trans_ail.c1
-rw-r--r--fs/xfs/xfs_trans_extfree.c1
-rw-r--r--fs/xfs/xfs_vfsops.c27
-rw-r--r--fs/xfs/xfs_vnodeops.c9
20 files changed, 284 insertions, 503 deletions
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index e3a5fedac1ba..f6e99fa7a683 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -17,10 +17,12 @@
17 */ 17 */
18#include "xfs.h" 18#include "xfs.h"
19#include "xfs_types.h" 19#include "xfs_types.h"
20#include "xfs_dmapi.h" 20#include "xfs_inum.h"
21#include "xfs_log.h" 21#include "xfs_log.h"
22#include "xfs_trans.h" 22#include "xfs_trans.h"
23#include "xfs_sb.h" 23#include "xfs_sb.h"
24#include "xfs_ag.h"
25#include "xfs_dmapi.h"
24#include "xfs_mount.h" 26#include "xfs_mount.h"
25#include "xfs_export.h" 27#include "xfs_export.h"
26 28
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 51c09c114a20..9381b0360c4b 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -197,6 +197,10 @@ typedef struct xfs_perag
197#endif 197#endif
198 xfs_perag_busy_t *pagb_list; /* unstable blocks */ 198 xfs_perag_busy_t *pagb_list; /* unstable blocks */
199 atomic_t pagf_fstrms; /* # of filestreams active in this AG */ 199 atomic_t pagf_fstrms; /* # of filestreams active in this AG */
200
201 int pag_ici_init; /* incore inode cache initialised */
202 rwlock_t pag_ici_lock; /* incore inode lock */
203 struct radix_tree_root pag_ici_root; /* incore inode cache root */
200} xfs_perag_t; 204} xfs_perag_t;
201 205
202#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) 206#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index b0667cb27d66..c8f2c2886fe4 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -23,6 +23,7 @@
23#include "xfs_inum.h" 23#include "xfs_inum.h"
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h"
26#include "xfs_dmapi.h" 27#include "xfs_dmapi.h"
27#include "xfs_mount.h" 28#include "xfs_mount.h"
28#include "xfs_buf_item.h" 29#include "xfs_buf_item.h"
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index f89196cb08d2..d16c1b971074 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -89,7 +89,6 @@ struct xfs_mount_args {
89#define XFSMNT_IDELETE 0x08000000 /* inode cluster delete */ 89#define XFSMNT_IDELETE 0x08000000 /* inode cluster delete */
90#define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width 90#define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width
91 * allocation */ 91 * allocation */
92#define XFSMNT_IHASHSIZE 0x20000000 /* inode hash table size */
93#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename 92#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename
94 * symlink,mkdir,rmdir,mknod */ 93 * symlink,mkdir,rmdir,mknod */
95#define XFSMNT_FLAGS2 0x80000000 /* more flags set in flags2 */ 94#define XFSMNT_FLAGS2 0x80000000 /* more flags set in flags2 */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index f6b919af7b82..c171767e242a 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -22,6 +22,7 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h"
25#include "xfs_dir2.h" 26#include "xfs_dir2.h"
26#include "xfs_dmapi.h" 27#include "xfs_dmapi.h"
27#include "xfs_mount.h" 28#include "xfs_mount.h"
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 7ebe295bd6d3..d2452699e9b1 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -22,6 +22,7 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h"
25#include "xfs_dir2.h" 26#include "xfs_dir2.h"
26#include "xfs_dmapi.h" 27#include "xfs_dmapi.h"
27#include "xfs_mount.h" 28#include "xfs_mount.h"
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 91c61d9632c8..eb18e399e836 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -22,6 +22,7 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h"
25#include "xfs_dir2.h" 26#include "xfs_dir2.h"
26#include "xfs_dmapi.h" 27#include "xfs_dmapi.h"
27#include "xfs_mount.h" 28#include "xfs_mount.h"
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index c67d73572905..182c70315ad1 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -22,6 +22,7 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h"
25#include "xfs_dir2.h" 26#include "xfs_dir2.h"
26#include "xfs_dmapi.h" 27#include "xfs_dmapi.h"
27#include "xfs_mount.h" 28#include "xfs_mount.h"
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 8c4331631337..339f9d4a49cd 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -22,6 +22,7 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h"
25#include "xfs_dir2.h" 26#include "xfs_dir2.h"
26#include "xfs_dmapi.h" 27#include "xfs_dmapi.h"
27#include "xfs_mount.h" 28#include "xfs_mount.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 3b14427ee123..f938a51be81b 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -23,6 +23,7 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_buf_item.h" 24#include "xfs_buf_item.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h"
26#include "xfs_dmapi.h" 27#include "xfs_dmapi.h"
27#include "xfs_mount.h" 28#include "xfs_mount.h"
28#include "xfs_trans_priv.h" 29#include "xfs_trans_priv.h"
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 114433a22baa..e07dcc1b70a6 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -40,131 +40,13 @@
40#include "xfs_utils.h" 40#include "xfs_utils.h"
41 41
42/* 42/*
43 * Initialize the inode hash table for the newly mounted file system.
44 * Choose an initial table size based on user specified value, else
45 * use a simple algorithm using the maximum number of inodes as an
46 * indicator for table size, and clamp it between one and some large
47 * number of pages.
48 */
49void
50xfs_ihash_init(xfs_mount_t *mp)
51{
52 __uint64_t icount;
53 uint i;
54
55 if (!mp->m_ihsize) {
56 icount = mp->m_maxicount ? mp->m_maxicount :
57 (mp->m_sb.sb_dblocks << mp->m_sb.sb_inopblog);
58 mp->m_ihsize = 1 << max_t(uint, 8,
59 (xfs_highbit64(icount) + 1) / 2);
60 mp->m_ihsize = min_t(uint, mp->m_ihsize,
61 (64 * NBPP) / sizeof(xfs_ihash_t));
62 }
63
64 mp->m_ihash = kmem_zalloc_greedy(&mp->m_ihsize,
65 NBPC * sizeof(xfs_ihash_t),
66 mp->m_ihsize * sizeof(xfs_ihash_t),
67 KM_SLEEP | KM_MAYFAIL | KM_LARGE);
68 mp->m_ihsize /= sizeof(xfs_ihash_t);
69 for (i = 0; i < mp->m_ihsize; i++)
70 rwlock_init(&(mp->m_ihash[i].ih_lock));
71}
72
73/*
74 * Free up structures allocated by xfs_ihash_init, at unmount time.
75 */
76void
77xfs_ihash_free(xfs_mount_t *mp)
78{
79 kmem_free(mp->m_ihash, mp->m_ihsize * sizeof(xfs_ihash_t));
80 mp->m_ihash = NULL;
81}
82
83/*
84 * Initialize the inode cluster hash table for the newly mounted file system.
85 * Its size is derived from the ihash table size.
86 */
87void
88xfs_chash_init(xfs_mount_t *mp)
89{
90 uint i;
91
92 mp->m_chsize = max_t(uint, 1, mp->m_ihsize /
93 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog));
94 mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize);
95 mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize
96 * sizeof(xfs_chash_t),
97 KM_SLEEP | KM_LARGE);
98 for (i = 0; i < mp->m_chsize; i++) {
99 spinlock_init(&mp->m_chash[i].ch_lock,"xfshash");
100 }
101}
102
103/*
104 * Free up structures allocated by xfs_chash_init, at unmount time.
105 */
106void
107xfs_chash_free(xfs_mount_t *mp)
108{
109 int i;
110
111 for (i = 0; i < mp->m_chsize; i++) {
112 spinlock_destroy(&mp->m_chash[i].ch_lock);
113 }
114
115 kmem_free(mp->m_chash, mp->m_chsize*sizeof(xfs_chash_t));
116 mp->m_chash = NULL;
117}
118
119/*
120 * Try to move an inode to the front of its hash list if possible
121 * (and if its not there already). Called right after obtaining
122 * the list version number and then dropping the read_lock on the
123 * hash list in question (which is done right after looking up the
124 * inode in question...).
125 */
126STATIC void
127xfs_ihash_promote(
128 xfs_ihash_t *ih,
129 xfs_inode_t *ip,
130 ulong version)
131{
132 xfs_inode_t *iq;
133
134 if ((ip->i_prevp != &ih->ih_next) && write_trylock(&ih->ih_lock)) {
135 if (likely(version == ih->ih_version)) {
136 /* remove from list */
137 if ((iq = ip->i_next)) {
138 iq->i_prevp = ip->i_prevp;
139 }
140 *ip->i_prevp = iq;
141
142 /* insert at list head */
143 iq = ih->ih_next;
144 iq->i_prevp = &ip->i_next;
145 ip->i_next = iq;
146 ip->i_prevp = &ih->ih_next;
147 ih->ih_next = ip;
148 }
149 write_unlock(&ih->ih_lock);
150 }
151}
152
153/*
154 * Look up an inode by number in the given file system. 43 * Look up an inode by number in the given file system.
155 * The inode is looked up in the hash table for the file system 44 * The inode is looked up in the cache held in each AG.
156 * represented by the mount point parameter mp. Each bucket of 45 * If the inode is found in the cache, attach it to the provided
157 * the hash table is guarded by an individual semaphore. 46 * vnode.
158 *
159 * If the inode is found in the hash table, its corresponding vnode
160 * is obtained with a call to vn_get(). This call takes care of
161 * coordination with the reclamation of the inode and vnode. Note
162 * that the vmap structure is filled in while holding the hash lock.
163 * This gives us the state of the inode/vnode when we found it and
164 * is used for coordination in vn_get().
165 * 47 *
166 * If it is not in core, read it in from the file system's device and 48 * If it is not in core, read it in from the file system's device,
167 * add the inode into the hash table. 49 * add it to the cache and attach the provided vnode.
168 * 50 *
169 * The inode is locked according to the value of the lock_flags parameter. 51 * The inode is locked according to the value of the lock_flags parameter.
170 * This flag parameter indicates how and if the inode's IO lock and inode lock 52 * This flag parameter indicates how and if the inode's IO lock and inode lock
@@ -192,274 +74,241 @@ xfs_iget_core(
192 xfs_inode_t **ipp, 74 xfs_inode_t **ipp,
193 xfs_daddr_t bno) 75 xfs_daddr_t bno)
194{ 76{
195 xfs_ihash_t *ih;
196 xfs_inode_t *ip; 77 xfs_inode_t *ip;
197 xfs_inode_t *iq; 78 xfs_inode_t *iq;
198 bhv_vnode_t *inode_vp; 79 bhv_vnode_t *inode_vp;
199 ulong version;
200 int error; 80 int error;
201 /* REFERENCED */ 81 xfs_icluster_t *icl, *new_icl = NULL;
202 xfs_chash_t *ch; 82 unsigned long first_index, mask;
203 xfs_chashlist_t *chl, *chlnew; 83 xfs_perag_t *pag;
204 SPLDECL(s); 84 xfs_agino_t agino;
85
86 /* the radix tree exists only in inode capable AGs */
87 if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
88 return EINVAL;
89
90 /* get the perag structure and ensure that it's inode capable */
91 pag = xfs_get_perag(mp, ino);
92 if (!pag->pagi_inodeok)
93 return EINVAL;
94 ASSERT(pag->pag_ici_init);
95 agino = XFS_INO_TO_AGINO(mp, ino);
205 96
97again:
98 read_lock(&pag->pag_ici_lock);
99 ip = radix_tree_lookup(&pag->pag_ici_root, agino);
206 100
207 ih = XFS_IHASH(mp, ino); 101 if (ip != NULL) {
102 /*
103 * If INEW is set this inode is being set up
104 * we need to pause and try again.
105 */
106 if (xfs_iflags_test(ip, XFS_INEW)) {
107 read_unlock(&pag->pag_ici_lock);
108 delay(1);
109 XFS_STATS_INC(xs_ig_frecycle);
208 110
209again: 111 goto again;
210 read_lock(&ih->ih_lock); 112 }
211 113
212 for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { 114 inode_vp = XFS_ITOV_NULL(ip);
213 if (ip->i_ino == ino) { 115 if (inode_vp == NULL) {
214 /* 116 /*
215 * If INEW is set this inode is being set up 117 * If IRECLAIM is set this inode is
118 * on its way out of the system,
216 * we need to pause and try again. 119 * we need to pause and try again.
217 */ 120 */
218 if (xfs_iflags_test(ip, XFS_INEW)) { 121 if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
219 read_unlock(&ih->ih_lock); 122 read_unlock(&pag->pag_ici_lock);
220 delay(1); 123 delay(1);
221 XFS_STATS_INC(xs_ig_frecycle); 124 XFS_STATS_INC(xs_ig_frecycle);
222 125
223 goto again; 126 goto again;
224 } 127 }
128 ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
225 129
226 inode_vp = XFS_ITOV_NULL(ip); 130 /*
227 if (inode_vp == NULL) { 131 * If lookup is racing with unlink, then we
228 /* 132 * should return an error immediately so we
229 * If IRECLAIM is set this inode is 133 * don't remove it from the reclaim list and
230 * on its way out of the system, 134 * potentially leak the inode.
231 * we need to pause and try again. 135 */
232 */ 136 if ((ip->i_d.di_mode == 0) &&
233 if (xfs_iflags_test(ip, XFS_IRECLAIM)) { 137 !(flags & XFS_IGET_CREATE)) {
234 read_unlock(&ih->ih_lock); 138 read_unlock(&pag->pag_ici_lock);
235 delay(1); 139 xfs_put_perag(mp, pag);
236 XFS_STATS_INC(xs_ig_frecycle); 140 return ENOENT;
237 141 }
238 goto again;
239 }
240 ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
241
242 /*
243 * If lookup is racing with unlink, then we
244 * should return an error immediately so we
245 * don't remove it from the reclaim list and
246 * potentially leak the inode.
247 */
248 if ((ip->i_d.di_mode == 0) &&
249 !(flags & XFS_IGET_CREATE)) {
250 read_unlock(&ih->ih_lock);
251 return ENOENT;
252 }
253
254 /*
255 * There may be transactions sitting in the
256 * incore log buffers or being flushed to disk
257 * at this time. We can't clear the
258 * XFS_IRECLAIMABLE flag until these
259 * transactions have hit the disk, otherwise we
260 * will void the guarantee the flag provides
261 * xfs_iunpin()
262 */
263 if (xfs_ipincount(ip)) {
264 read_unlock(&ih->ih_lock);
265 xfs_log_force(mp, 0,
266 XFS_LOG_FORCE|XFS_LOG_SYNC);
267 XFS_STATS_INC(xs_ig_frecycle);
268 goto again;
269 }
270
271 vn_trace_exit(vp, "xfs_iget.alloc",
272 (inst_t *)__return_address);
273 142
274 XFS_STATS_INC(xs_ig_found); 143 /*
144 * There may be transactions sitting in the
145 * incore log buffers or being flushed to disk
146 * at this time. We can't clear the
147 * XFS_IRECLAIMABLE flag until these
148 * transactions have hit the disk, otherwise we
149 * will void the guarantee the flag provides
150 * xfs_iunpin()
151 */
152 if (xfs_ipincount(ip)) {
153 read_unlock(&pag->pag_ici_lock);
154 xfs_log_force(mp, 0,
155 XFS_LOG_FORCE|XFS_LOG_SYNC);
156 XFS_STATS_INC(xs_ig_frecycle);
157 goto again;
158 }
275 159
276 xfs_iflags_clear(ip, XFS_IRECLAIMABLE); 160 vn_trace_exit(vp, "xfs_iget.alloc",
277 version = ih->ih_version; 161 (inst_t *)__return_address);
278 read_unlock(&ih->ih_lock);
279 xfs_ihash_promote(ih, ip, version);
280 162
281 XFS_MOUNT_ILOCK(mp); 163 XFS_STATS_INC(xs_ig_found);
282 list_del_init(&ip->i_reclaim);
283 XFS_MOUNT_IUNLOCK(mp);
284 164
285 goto finish_inode; 165 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
166 read_unlock(&pag->pag_ici_lock);
286 167
287 } else if (vp != inode_vp) { 168 XFS_MOUNT_ILOCK(mp);
288 struct inode *inode = vn_to_inode(inode_vp); 169 list_del_init(&ip->i_reclaim);
170 XFS_MOUNT_IUNLOCK(mp);
289 171
290 /* The inode is being torn down, pause and 172 goto finish_inode;
291 * try again.
292 */
293 if (inode->i_state & (I_FREEING | I_CLEAR)) {
294 read_unlock(&ih->ih_lock);
295 delay(1);
296 XFS_STATS_INC(xs_ig_frecycle);
297 173
298 goto again; 174 } else if (vp != inode_vp) {
299 } 175 struct inode *inode = vn_to_inode(inode_vp);
300/* Chances are the other vnode (the one in the inode) is being torn
301 * down right now, and we landed on top of it. Question is, what do
302 * we do? Unhook the old inode and hook up the new one?
303 */
304 cmn_err(CE_PANIC,
305 "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
306 inode_vp, vp);
307 }
308 176
309 /* 177 /* The inode is being torn down, pause and
310 * Inode cache hit: if ip is not at the front of 178 * try again.
311 * its hash chain, move it there now.
312 * Do this with the lock held for update, but
313 * do statistics after releasing the lock.
314 */ 179 */
315 version = ih->ih_version; 180 if (inode->i_state & (I_FREEING | I_CLEAR)) {
316 read_unlock(&ih->ih_lock); 181 read_unlock(&pag->pag_ici_lock);
317 xfs_ihash_promote(ih, ip, version); 182 delay(1);
318 XFS_STATS_INC(xs_ig_found); 183 XFS_STATS_INC(xs_ig_frecycle);
319 184
320finish_inode: 185 goto again;
321 if (ip->i_d.di_mode == 0) {
322 if (!(flags & XFS_IGET_CREATE))
323 return ENOENT;
324 xfs_iocore_inode_reinit(ip);
325 } 186 }
187/* Chances are the other vnode (the one in the inode) is being torn
188* down right now, and we landed on top of it. Question is, what do
189* we do? Unhook the old inode and hook up the new one?
190*/
191 cmn_err(CE_PANIC,
192 "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
193 inode_vp, vp);
194 }
326 195
327 if (lock_flags != 0) 196 /*
328 xfs_ilock(ip, lock_flags); 197 * Inode cache hit
198 */
199 read_unlock(&pag->pag_ici_lock);
200 XFS_STATS_INC(xs_ig_found);
329 201
330 xfs_iflags_clear(ip, XFS_ISTALE); 202finish_inode:
331 vn_trace_exit(vp, "xfs_iget.found", 203 if (ip->i_d.di_mode == 0) {
332 (inst_t *)__return_address); 204 if (!(flags & XFS_IGET_CREATE)) {
333 goto return_ip; 205 xfs_put_perag(mp, pag);
206 return ENOENT;
207 }
208 xfs_iocore_inode_reinit(ip);
334 } 209 }
210
211 if (lock_flags != 0)
212 xfs_ilock(ip, lock_flags);
213
214 xfs_iflags_clear(ip, XFS_ISTALE);
215 vn_trace_exit(vp, "xfs_iget.found",
216 (inst_t *)__return_address);
217 goto return_ip;
335 } 218 }
336 219
337 /* 220 /*
338 * Inode cache miss: save the hash chain version stamp and unlock 221 * Inode cache miss
339 * the chain, so we don't deadlock in vn_alloc.
340 */ 222 */
223 read_unlock(&pag->pag_ici_lock);
341 XFS_STATS_INC(xs_ig_missed); 224 XFS_STATS_INC(xs_ig_missed);
342 225
343 version = ih->ih_version;
344
345 read_unlock(&ih->ih_lock);
346
347 /* 226 /*
348 * Read the disk inode attributes into a new inode structure and get 227 * Read the disk inode attributes into a new inode structure and get
349 * a new vnode for it. This should also initialize i_ino and i_mount. 228 * a new vnode for it. This should also initialize i_ino and i_mount.
350 */ 229 */
351 error = xfs_iread(mp, tp, ino, &ip, bno, 230 error = xfs_iread(mp, tp, ino, &ip, bno,
352 (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0); 231 (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0);
353 if (error) 232 if (error) {
233 xfs_put_perag(mp, pag);
354 return error; 234 return error;
235 }
355 236
356 vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address); 237 vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address);
357 238
358 xfs_inode_lock_init(ip, vp); 239 xfs_inode_lock_init(ip, vp);
359 xfs_iocore_inode_init(ip); 240 xfs_iocore_inode_init(ip);
360
361 if (lock_flags) 241 if (lock_flags)
362 xfs_ilock(ip, lock_flags); 242 xfs_ilock(ip, lock_flags);
363 243
364 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 244 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
365 xfs_idestroy(ip); 245 xfs_idestroy(ip);
246 xfs_put_perag(mp, pag);
366 return ENOENT; 247 return ENOENT;
367 } 248 }
368 249
369 /* 250 /*
370 * Put ip on its hash chain, unless someone else hashed a duplicate 251 * This is a bit messy - we preallocate everything we _might_
371 * after we released the hash lock. 252 * need before we pick up the ici lock. That way we don't have to
253 * juggle locks and go all the way back to the start.
372 */ 254 */
373 write_lock(&ih->ih_lock); 255 new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP);
256 if (radix_tree_preload(GFP_KERNEL)) {
257 delay(1);
258 goto again;
259 }
260 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
261 first_index = agino & mask;
262 write_lock(&pag->pag_ici_lock);
374 263
375 if (ih->ih_version != version) { 264 /*
376 for (iq = ih->ih_next; iq != NULL; iq = iq->i_next) { 265 * Find the cluster if it exists
377 if (iq->i_ino == ino) { 266 */
378 write_unlock(&ih->ih_lock); 267 icl = NULL;
379 xfs_idestroy(ip); 268 if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq,
269 first_index, 1)) {
270 if ((iq->i_ino & mask) == first_index)
271 icl = iq->i_cluster;
272 }
380 273
381 XFS_STATS_INC(xs_ig_dup); 274 /*
382 goto again; 275 * insert the new inode
383 } 276 */
384 } 277 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
278 if (unlikely(error)) {
279 BUG_ON(error != -EEXIST);
280 write_unlock(&pag->pag_ici_lock);
281 radix_tree_preload_end();
282 xfs_idestroy(ip);
283 XFS_STATS_INC(xs_ig_dup);
284 goto again;
385 } 285 }
386 286
387 /* 287 /*
388 * These values _must_ be set before releasing ihlock! 288 * These values _must_ be set before releasing ihlock!
389 */ 289 */
390 ip->i_hash = ih;
391 if ((iq = ih->ih_next)) {
392 iq->i_prevp = &ip->i_next;
393 }
394 ip->i_next = iq;
395 ip->i_prevp = &ih->ih_next;
396 ih->ih_next = ip;
397 ip->i_udquot = ip->i_gdquot = NULL; 290 ip->i_udquot = ip->i_gdquot = NULL;
398 ih->ih_version++;
399 xfs_iflags_set(ip, XFS_INEW); 291 xfs_iflags_set(ip, XFS_INEW);
400 write_unlock(&ih->ih_lock);
401 292
402 /* 293 ASSERT(ip->i_cluster == NULL);
403 * put ip on its cluster's hash chain
404 */
405 ASSERT(ip->i_chash == NULL && ip->i_cprev == NULL &&
406 ip->i_cnext == NULL);
407
408 chlnew = NULL;
409 ch = XFS_CHASH(mp, ip->i_blkno);
410 chlredo:
411 s = mutex_spinlock(&ch->ch_lock);
412 for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) {
413 if (chl->chl_blkno == ip->i_blkno) {
414
415 /* insert this inode into the doubly-linked list
416 * where chl points */
417 if ((iq = chl->chl_ip)) {
418 ip->i_cprev = iq->i_cprev;
419 iq->i_cprev->i_cnext = ip;
420 iq->i_cprev = ip;
421 ip->i_cnext = iq;
422 } else {
423 ip->i_cnext = ip;
424 ip->i_cprev = ip;
425 }
426 chl->chl_ip = ip;
427 ip->i_chash = chl;
428 break;
429 }
430 }
431 294
432 /* no hash list found for this block; add a new hash list */ 295 if (!icl) {
433 if (chl == NULL) { 296 spin_lock_init(&new_icl->icl_lock);
434 if (chlnew == NULL) { 297 INIT_HLIST_HEAD(&new_icl->icl_inodes);
435 mutex_spinunlock(&ch->ch_lock, s); 298 icl = new_icl;
436 ASSERT(xfs_chashlist_zone != NULL); 299 new_icl = NULL;
437 chlnew = (xfs_chashlist_t *)
438 kmem_zone_alloc(xfs_chashlist_zone,
439 KM_SLEEP);
440 ASSERT(chlnew != NULL);
441 goto chlredo;
442 } else {
443 ip->i_cnext = ip;
444 ip->i_cprev = ip;
445 ip->i_chash = chlnew;
446 chlnew->chl_ip = ip;
447 chlnew->chl_blkno = ip->i_blkno;
448 if (ch->ch_list)
449 ch->ch_list->chl_prev = chlnew;
450 chlnew->chl_next = ch->ch_list;
451 chlnew->chl_prev = NULL;
452 ch->ch_list = chlnew;
453 chlnew = NULL;
454 }
455 } else { 300 } else {
456 if (chlnew != NULL) { 301 ASSERT(!hlist_empty(&icl->icl_inodes));
457 kmem_zone_free(xfs_chashlist_zone, chlnew);
458 }
459 } 302 }
303 spin_lock(&icl->icl_lock);
304 hlist_add_head(&ip->i_cnode, &icl->icl_inodes);
305 ip->i_cluster = icl;
306 spin_unlock(&icl->icl_lock);
460 307
461 mutex_spinunlock(&ch->ch_lock, s); 308 write_unlock(&pag->pag_ici_lock);
462 309 radix_tree_preload_end();
310 if (new_icl)
311 kmem_zone_free(xfs_icluster_zone, new_icl);
463 312
464 /* 313 /*
465 * Link ip to its mount and thread it on the mount's inode list. 314 * Link ip to its mount and thread it on the mount's inode list.
@@ -478,6 +327,7 @@ finish_inode:
478 mp->m_inodes = ip; 327 mp->m_inodes = ip;
479 328
480 XFS_MOUNT_IUNLOCK(mp); 329 XFS_MOUNT_IUNLOCK(mp);
330 xfs_put_perag(mp, pag);
481 331
482 return_ip: 332 return_ip:
483 ASSERT(ip->i_df.if_ext_max == 333 ASSERT(ip->i_df.if_ext_max ==
@@ -587,32 +437,19 @@ xfs_inode_incore(xfs_mount_t *mp,
587 xfs_ino_t ino, 437 xfs_ino_t ino,
588 xfs_trans_t *tp) 438 xfs_trans_t *tp)
589{ 439{
590 xfs_ihash_t *ih;
591 xfs_inode_t *ip; 440 xfs_inode_t *ip;
592 ulong version; 441 xfs_perag_t *pag;
593 442
594 ih = XFS_IHASH(mp, ino); 443 pag = xfs_get_perag(mp, ino);
595 read_lock(&ih->ih_lock); 444 read_lock(&pag->pag_ici_lock);
596 for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { 445 ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino));
597 if (ip->i_ino == ino) { 446 read_unlock(&pag->pag_ici_lock);
598 /* 447 xfs_put_perag(mp, pag);
599 * If we find it and tp matches, return it. 448
600 * Also move it to the front of the hash list 449 /* the returned inode must match the transaction */
601 * if we find it and it is not already there. 450 if (ip && (ip->i_transp != tp))
602 * Otherwise break from the loop and return 451 return NULL;
603 * NULL. 452 return ip;
604 */
605 if (ip->i_transp == tp) {
606 version = ih->ih_version;
607 read_unlock(&ih->ih_lock);
608 xfs_ihash_promote(ih, ip, version);
609 return (ip);
610 }
611 break;
612 }
613 }
614 read_unlock(&ih->ih_lock);
615 return (NULL);
616} 453}
617 454
618/* 455/*
@@ -718,58 +555,26 @@ void
718xfs_iextract( 555xfs_iextract(
719 xfs_inode_t *ip) 556 xfs_inode_t *ip)
720{ 557{
721 xfs_ihash_t *ih; 558 xfs_mount_t *mp = ip->i_mount;
559 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
722 xfs_inode_t *iq; 560 xfs_inode_t *iq;
723 xfs_mount_t *mp; 561
724 xfs_chash_t *ch; 562 write_lock(&pag->pag_ici_lock);
725 xfs_chashlist_t *chl, *chm; 563 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
726 SPLDECL(s); 564 write_unlock(&pag->pag_ici_lock);
727 565 xfs_put_perag(mp, pag);
728 ih = ip->i_hash;
729 write_lock(&ih->ih_lock);
730 if ((iq = ip->i_next)) {
731 iq->i_prevp = ip->i_prevp;
732 }
733 *ip->i_prevp = iq;
734 ih->ih_version++;
735 write_unlock(&ih->ih_lock);
736 566
737 /* 567 /*
738 * Remove from cluster hash list 568 * Remove from cluster list
739 * 1) delete the chashlist if this is the last inode on the chashlist
740 * 2) unchain from list of inodes
741 * 3) point chashlist->chl_ip to 'chl_next' if to this inode.
742 */ 569 */
743 mp = ip->i_mount; 570 mp = ip->i_mount;
744 ch = XFS_CHASH(mp, ip->i_blkno); 571 spin_lock(&ip->i_cluster->icl_lock);
745 s = mutex_spinlock(&ch->ch_lock); 572 hlist_del(&ip->i_cnode);
746 573 spin_unlock(&ip->i_cluster->icl_lock);
747 if (ip->i_cnext == ip) { 574
748 /* Last inode on chashlist */ 575 /* was last inode in cluster? */
749 ASSERT(ip->i_cnext == ip && ip->i_cprev == ip); 576 if (hlist_empty(&ip->i_cluster->icl_inodes))
750 ASSERT(ip->i_chash != NULL); 577 kmem_zone_free(xfs_icluster_zone, ip->i_cluster);
751 chm=NULL;
752 chl = ip->i_chash;
753 if (chl->chl_prev)
754 chl->chl_prev->chl_next = chl->chl_next;
755 else
756 ch->ch_list = chl->chl_next;
757 if (chl->chl_next)
758 chl->chl_next->chl_prev = chl->chl_prev;
759 kmem_zone_free(xfs_chashlist_zone, chl);
760 } else {
761 /* delete one inode from a non-empty list */
762 iq = ip->i_cnext;
763 iq->i_cprev = ip->i_cprev;
764 ip->i_cprev->i_cnext = iq;
765 if (ip->i_chash->chl_ip == ip) {
766 ip->i_chash->chl_ip = iq;
767 }
768 ip->i_chash = __return_address;
769 ip->i_cprev = __return_address;
770 ip->i_cnext = __return_address;
771 }
772 mutex_spinunlock(&ch->ch_lock, s);
773 578
774 /* 579 /*
775 * Remove from mount's inode list. 580 * Remove from mount's inode list.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 41a0c73b601a..c1b917bd5951 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -52,7 +52,7 @@
52 52
53kmem_zone_t *xfs_ifork_zone; 53kmem_zone_t *xfs_ifork_zone;
54kmem_zone_t *xfs_inode_zone; 54kmem_zone_t *xfs_inode_zone;
55kmem_zone_t *xfs_chashlist_zone; 55kmem_zone_t *xfs_icluster_zone;
56 56
57/* 57/*
58 * Used in xfs_itruncate(). This is the maximum number of extents 58 * Used in xfs_itruncate(). This is the maximum number of extents
@@ -2182,10 +2182,10 @@ xfs_ifree_cluster(
2182 int i, j, found, pre_flushed; 2182 int i, j, found, pre_flushed;
2183 xfs_daddr_t blkno; 2183 xfs_daddr_t blkno;
2184 xfs_buf_t *bp; 2184 xfs_buf_t *bp;
2185 xfs_ihash_t *ih;
2186 xfs_inode_t *ip, **ip_found; 2185 xfs_inode_t *ip, **ip_found;
2187 xfs_inode_log_item_t *iip; 2186 xfs_inode_log_item_t *iip;
2188 xfs_log_item_t *lip; 2187 xfs_log_item_t *lip;
2188 xfs_perag_t *pag = xfs_get_perag(mp, inum);
2189 SPLDECL(s); 2189 SPLDECL(s);
2190 2190
2191 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 2191 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
@@ -2220,23 +2220,20 @@ xfs_ifree_cluster(
2220 */ 2220 */
2221 found = 0; 2221 found = 0;
2222 for (i = 0; i < ninodes; i++) { 2222 for (i = 0; i < ninodes; i++) {
2223 ih = XFS_IHASH(mp, inum + i); 2223 read_lock(&pag->pag_ici_lock);
2224 read_lock(&ih->ih_lock); 2224 ip = radix_tree_lookup(&pag->pag_ici_root,
2225 for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { 2225 XFS_INO_TO_AGINO(mp, (inum + i)));
2226 if (ip->i_ino == inum + i)
2227 break;
2228 }
2229 2226
2230 /* Inode not in memory or we found it already, 2227 /* Inode not in memory or we found it already,
2231 * nothing to do 2228 * nothing to do
2232 */ 2229 */
2233 if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { 2230 if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
2234 read_unlock(&ih->ih_lock); 2231 read_unlock(&pag->pag_ici_lock);
2235 continue; 2232 continue;
2236 } 2233 }
2237 2234
2238 if (xfs_inode_clean(ip)) { 2235 if (xfs_inode_clean(ip)) {
2239 read_unlock(&ih->ih_lock); 2236 read_unlock(&pag->pag_ici_lock);
2240 continue; 2237 continue;
2241 } 2238 }
2242 2239
@@ -2259,7 +2256,7 @@ xfs_ifree_cluster(
2259 ip_found[found++] = ip; 2256 ip_found[found++] = ip;
2260 } 2257 }
2261 } 2258 }
2262 read_unlock(&ih->ih_lock); 2259 read_unlock(&pag->pag_ici_lock);
2263 continue; 2260 continue;
2264 } 2261 }
2265 2262
@@ -2277,8 +2274,7 @@ xfs_ifree_cluster(
2277 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2274 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2278 } 2275 }
2279 } 2276 }
2280 2277 read_unlock(&pag->pag_ici_lock);
2281 read_unlock(&ih->ih_lock);
2282 } 2278 }
2283 2279
2284 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2280 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
@@ -2333,6 +2329,7 @@ xfs_ifree_cluster(
2333 } 2329 }
2334 2330
2335 kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *)); 2331 kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *));
2332 xfs_put_perag(mp, pag);
2336} 2333}
2337 2334
2338/* 2335/*
@@ -3050,12 +3047,11 @@ xfs_iflush(
3050 xfs_mount_t *mp; 3047 xfs_mount_t *mp;
3051 int error; 3048 int error;
3052 /* REFERENCED */ 3049 /* REFERENCED */
3053 xfs_chash_t *ch;
3054 xfs_inode_t *iq; 3050 xfs_inode_t *iq;
3055 int clcount; /* count of inodes clustered */ 3051 int clcount; /* count of inodes clustered */
3056 int bufwasdelwri; 3052 int bufwasdelwri;
3053 struct hlist_node *entry;
3057 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; 3054 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
3058 SPLDECL(s);
3059 3055
3060 XFS_STATS_INC(xs_iflush_count); 3056 XFS_STATS_INC(xs_iflush_count);
3061 3057
@@ -3169,14 +3165,14 @@ xfs_iflush(
3169 * inode clustering: 3165 * inode clustering:
3170 * see if other inodes can be gathered into this write 3166 * see if other inodes can be gathered into this write
3171 */ 3167 */
3172 3168 spin_lock(&ip->i_cluster->icl_lock);
3173 ip->i_chash->chl_buf = bp; 3169 ip->i_cluster->icl_buf = bp;
3174
3175 ch = XFS_CHASH(mp, ip->i_blkno);
3176 s = mutex_spinlock(&ch->ch_lock);
3177 3170
3178 clcount = 0; 3171 clcount = 0;
3179 for (iq = ip->i_cnext; iq != ip; iq = iq->i_cnext) { 3172 hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) {
3173 if (iq == ip)
3174 continue;
3175
3180 /* 3176 /*
3181 * Do an un-protected check to see if the inode is dirty and 3177 * Do an un-protected check to see if the inode is dirty and
3182 * is a candidate for flushing. These checks will be repeated 3178 * is a candidate for flushing. These checks will be repeated
@@ -3227,7 +3223,7 @@ xfs_iflush(
3227 xfs_iunlock(iq, XFS_ILOCK_SHARED); 3223 xfs_iunlock(iq, XFS_ILOCK_SHARED);
3228 } 3224 }
3229 } 3225 }
3230 mutex_spinunlock(&ch->ch_lock, s); 3226 spin_unlock(&ip->i_cluster->icl_lock);
3231 3227
3232 if (clcount) { 3228 if (clcount) {
3233 XFS_STATS_INC(xs_icluster_flushcnt); 3229 XFS_STATS_INC(xs_icluster_flushcnt);
@@ -3264,7 +3260,7 @@ cluster_corrupt_out:
3264 /* Corruption detected in the clustering loop. Invalidate the 3260 /* Corruption detected in the clustering loop. Invalidate the
3265 * inode buffer and shut down the filesystem. 3261 * inode buffer and shut down the filesystem.
3266 */ 3262 */
3267 mutex_spinunlock(&ch->ch_lock, s); 3263 spin_unlock(&ip->i_cluster->icl_lock);
3268 3264
3269 /* 3265 /*
3270 * Clean up the buffer. If it was B_DELWRI, just release it -- 3266 * Clean up the buffer. If it was B_DELWRI, just release it --
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 873b9f783d29..b6dd23d9b3d6 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -172,41 +172,18 @@ typedef struct xfs_iocore {
172extern void xfs_iocore_inode_init(struct xfs_inode *); 172extern void xfs_iocore_inode_init(struct xfs_inode *);
173extern void xfs_iocore_inode_reinit(struct xfs_inode *); 173extern void xfs_iocore_inode_reinit(struct xfs_inode *);
174 174
175
176/*
177 * This is the type used in the xfs inode hash table.
178 * An array of these is allocated for each mounted
179 * file system to hash the inodes for that file system.
180 */
181typedef struct xfs_ihash {
182 struct xfs_inode *ih_next;
183 rwlock_t ih_lock;
184 uint ih_version;
185} xfs_ihash_t;
186
187#define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)(ino)) % (mp)->m_ihsize))
188
189/* 175/*
190 * This is the xfs inode cluster hash. This hash is used by xfs_iflush to 176 * This is the xfs inode cluster structure. This structure is used by
191 * find inodes that share a cluster and can be flushed to disk at the same 177 * xfs_iflush to find inodes that share a cluster and can be flushed to disk at
192 * time. 178 * the same time.
193 */ 179 */
194typedef struct xfs_chashlist { 180typedef struct xfs_icluster {
195 struct xfs_chashlist *chl_next; 181 struct hlist_head icl_inodes; /* list of inodes on cluster */
196 struct xfs_chashlist *chl_prev; 182 xfs_daddr_t icl_blkno; /* starting block number of
197 struct xfs_inode *chl_ip;
198 xfs_daddr_t chl_blkno; /* starting block number of
199 * the cluster */ 183 * the cluster */
200 struct xfs_buf *chl_buf; /* the inode buffer */ 184 struct xfs_buf *icl_buf; /* the inode buffer */
201} xfs_chashlist_t; 185 lock_t icl_lock; /* inode list lock */
202 186} xfs_icluster_t;
203typedef struct xfs_chash {
204 xfs_chashlist_t *ch_list;
205 lock_t ch_lock;
206} xfs_chash_t;
207
208#define XFS_CHASH(mp,blk) ((mp)->m_chash + (((uint)blk) % (mp)->m_chsize))
209
210 187
211/* 188/*
212 * This is the xfs in-core inode structure. 189 * This is the xfs in-core inode structure.
@@ -269,21 +246,15 @@ typedef struct xfs_icdinode {
269} xfs_icdinode_t; 246} xfs_icdinode_t;
270 247
271typedef struct { 248typedef struct {
272 struct xfs_ihash *ip_hash; /* pointer to hash header */
273 struct xfs_inode *ip_next; /* inode hash link forw */
274 struct xfs_inode *ip_mnext; /* next inode in mount list */ 249 struct xfs_inode *ip_mnext; /* next inode in mount list */
275 struct xfs_inode *ip_mprev; /* ptr to prev inode */ 250 struct xfs_inode *ip_mprev; /* ptr to prev inode */
276 struct xfs_inode **ip_prevp; /* ptr to prev i_next */
277 struct xfs_mount *ip_mount; /* fs mount struct ptr */ 251 struct xfs_mount *ip_mount; /* fs mount struct ptr */
278} xfs_iptr_t; 252} xfs_iptr_t;
279 253
280typedef struct xfs_inode { 254typedef struct xfs_inode {
281 /* Inode linking and identification information. */ 255 /* Inode linking and identification information. */
282 struct xfs_ihash *i_hash; /* pointer to hash header */
283 struct xfs_inode *i_next; /* inode hash link forw */
284 struct xfs_inode *i_mnext; /* next inode in mount list */ 256 struct xfs_inode *i_mnext; /* next inode in mount list */
285 struct xfs_inode *i_mprev; /* ptr to prev inode */ 257 struct xfs_inode *i_mprev; /* ptr to prev inode */
286 struct xfs_inode **i_prevp; /* ptr to prev i_next */
287 struct xfs_mount *i_mount; /* fs mount struct ptr */ 258 struct xfs_mount *i_mount; /* fs mount struct ptr */
288 struct list_head i_reclaim; /* reclaim list */ 259 struct list_head i_reclaim; /* reclaim list */
289 struct bhv_desc i_bhv_desc; /* inode behavior descriptor*/ 260 struct bhv_desc i_bhv_desc; /* inode behavior descriptor*/
@@ -324,9 +295,8 @@ typedef struct xfs_inode {
324 unsigned int i_delayed_blks; /* count of delay alloc blks */ 295 unsigned int i_delayed_blks; /* count of delay alloc blks */
325 296
326 xfs_icdinode_t i_d; /* most of ondisk inode */ 297 xfs_icdinode_t i_d; /* most of ondisk inode */
327 xfs_chashlist_t *i_chash; /* cluster hash list header */ 298 xfs_icluster_t *i_cluster; /* cluster list header */
328 struct xfs_inode *i_cnext; /* cluster hash link forward */ 299 struct hlist_node i_cnode; /* cluster link node */
329 struct xfs_inode *i_cprev; /* cluster hash link backward */
330 300
331 xfs_fsize_t i_size; /* in-memory size */ 301 xfs_fsize_t i_size; /* in-memory size */
332 /* Trace buffers per inode. */ 302 /* Trace buffers per inode. */
@@ -521,8 +491,6 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
521 */ 491 */
522void xfs_ihash_init(struct xfs_mount *); 492void xfs_ihash_init(struct xfs_mount *);
523void xfs_ihash_free(struct xfs_mount *); 493void xfs_ihash_free(struct xfs_mount *);
524void xfs_chash_init(struct xfs_mount *);
525void xfs_chash_free(struct xfs_mount *);
526xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, 494xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
527 struct xfs_trans *); 495 struct xfs_trans *);
528void xfs_inode_lock_init(xfs_inode_t *, struct bhv_vnode *); 496void xfs_inode_lock_init(xfs_inode_t *, struct bhv_vnode *);
@@ -633,7 +601,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
633#define xfs_inobp_check(mp, bp) 601#define xfs_inobp_check(mp, bp)
634#endif /* DEBUG */ 602#endif /* DEBUG */
635 603
636extern struct kmem_zone *xfs_chashlist_zone; 604extern struct kmem_zone *xfs_icluster_zone;
637extern struct kmem_zone *xfs_ifork_zone; 605extern struct kmem_zone *xfs_ifork_zone;
638extern struct kmem_zone *xfs_inode_zone; 606extern struct kmem_zone *xfs_inode_zone;
639extern struct kmem_zone *xfs_ili_zone; 607extern struct kmem_zone *xfs_ili_zone;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f4daf1ec9931..71f25947251d 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -160,11 +160,6 @@ xfs_mount_free(
160 xfs_mount_t *mp, 160 xfs_mount_t *mp,
161 int remove_bhv) 161 int remove_bhv)
162{ 162{
163 if (mp->m_ihash)
164 xfs_ihash_free(mp);
165 if (mp->m_chash)
166 xfs_chash_free(mp);
167
168 if (mp->m_perag) { 163 if (mp->m_perag) {
169 int agno; 164 int agno;
170 165
@@ -342,6 +337,17 @@ xfs_mount_validate_sb(
342 return 0; 337 return 0;
343} 338}
344 339
340STATIC void
341xfs_initialize_perag_icache(
342 xfs_perag_t *pag)
343{
344 if (!pag->pag_ici_init) {
345 rwlock_init(&pag->pag_ici_lock);
346 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
347 pag->pag_ici_init = 1;
348 }
349}
350
345xfs_agnumber_t 351xfs_agnumber_t
346xfs_initialize_perag( 352xfs_initialize_perag(
347 bhv_vfs_t *vfs, 353 bhv_vfs_t *vfs,
@@ -396,12 +402,14 @@ xfs_initialize_perag(
396 pag->pagi_inodeok = 1; 402 pag->pagi_inodeok = 1;
397 if (index < max_metadata) 403 if (index < max_metadata)
398 pag->pagf_metadata = 1; 404 pag->pagf_metadata = 1;
405 xfs_initialize_perag_icache(pag);
399 } 406 }
400 } else { 407 } else {
401 /* Setup default behavior for smaller filesystems */ 408 /* Setup default behavior for smaller filesystems */
402 for (index = 0; index < agcount; index++) { 409 for (index = 0; index < agcount; index++) {
403 pag = &mp->m_perag[index]; 410 pag = &mp->m_perag[index];
404 pag->pagi_inodeok = 1; 411 pag->pagi_inodeok = 1;
412 xfs_initialize_perag_icache(pag);
405 } 413 }
406 } 414 }
407 return index; 415 return index;
@@ -1033,13 +1041,6 @@ xfs_mountfs(
1033 xfs_trans_init(mp); 1041 xfs_trans_init(mp);
1034 1042
1035 /* 1043 /*
1036 * Allocate and initialize the inode hash table for this
1037 * file system.
1038 */
1039 xfs_ihash_init(mp);
1040 xfs_chash_init(mp);
1041
1042 /*
1043 * Allocate and initialize the per-ag data. 1044 * Allocate and initialize the per-ag data.
1044 */ 1045 */
1045 init_rwsem(&mp->m_peraglock); 1046 init_rwsem(&mp->m_peraglock);
@@ -1190,8 +1191,6 @@ xfs_mountfs(
1190 error3: 1191 error3:
1191 xfs_log_unmount_dealloc(mp); 1192 xfs_log_unmount_dealloc(mp);
1192 error2: 1193 error2:
1193 xfs_ihash_free(mp);
1194 xfs_chash_free(mp);
1195 for (agno = 0; agno < sbp->sb_agcount; agno++) 1194 for (agno = 0; agno < sbp->sb_agcount; agno++)
1196 if (mp->m_perag[agno].pagb_list) 1195 if (mp->m_perag[agno].pagb_list)
1197 kmem_free(mp->m_perag[agno].pagb_list, 1196 kmem_free(mp->m_perag[agno].pagb_list,
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9ceff40326d0..bc23cb407701 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -57,10 +57,7 @@ struct log;
57struct bhv_vfs; 57struct bhv_vfs;
58struct bhv_vnode; 58struct bhv_vnode;
59struct xfs_mount_args; 59struct xfs_mount_args;
60struct xfs_ihash;
61struct xfs_chash;
62struct xfs_inode; 60struct xfs_inode;
63struct xfs_perag;
64struct xfs_iocore; 61struct xfs_iocore;
65struct xfs_bmbt_irec; 62struct xfs_bmbt_irec;
66struct xfs_bmap_free; 63struct xfs_bmap_free;
@@ -335,8 +332,6 @@ typedef struct xfs_mount {
335 xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ 332 xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */
336 lock_t m_agirotor_lock;/* .. and lock protecting it */ 333 lock_t m_agirotor_lock;/* .. and lock protecting it */
337 xfs_agnumber_t m_maxagi; /* highest inode alloc group */ 334 xfs_agnumber_t m_maxagi; /* highest inode alloc group */
338 size_t m_ihsize; /* size of next field */
339 struct xfs_ihash *m_ihash; /* fs private inode hash table*/
340 struct xfs_inode *m_inodes; /* active inode list */ 335 struct xfs_inode *m_inodes; /* active inode list */
341 struct list_head m_del_inodes; /* inodes to reclaim */ 336 struct list_head m_del_inodes; /* inodes to reclaim */
342 mutex_t m_ilock; /* inode list mutex */ 337 mutex_t m_ilock; /* inode list mutex */
@@ -458,7 +453,7 @@ typedef struct xfs_mount {
458#define XFS_MOUNT_IDELETE (1ULL << 18) /* delete empty inode clusters*/ 453#define XFS_MOUNT_IDELETE (1ULL << 18) /* delete empty inode clusters*/
459#define XFS_MOUNT_SWALLOC (1ULL << 19) /* turn on stripe width 454#define XFS_MOUNT_SWALLOC (1ULL << 19) /* turn on stripe width
460 * allocation */ 455 * allocation */
461#define XFS_MOUNT_IHASHSIZE (1ULL << 20) /* inode hash table size */ 456 /* (1ULL << 20) -- currently unused */
462#define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */ 457#define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */
463#define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred 458#define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred
464 * I/O size in stat() */ 459 * I/O size in stat() */
@@ -572,6 +567,21 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
572} 567}
573 568
574/* 569/*
570 * perag get/put wrappers for eventual ref counting
571 */
572static inline xfs_perag_t *
573xfs_get_perag(struct xfs_mount *mp, xfs_ino_t ino)
574{
575 return &mp->m_perag[XFS_INO_TO_AGNO(mp, ino)];
576}
577
578static inline void
579xfs_put_perag(struct xfs_mount *mp, xfs_perag_t *pag)
580{
581 /* nothing to see here, move along */
582}
583
584/*
575 * Per-cpu superblock locking functions 585 * Per-cpu superblock locking functions
576 */ 586 */
577#ifdef HAVE_PERCPU_SB 587#ifdef HAVE_PERCPU_SB
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 53d9600af4a4..187318e8d259 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -22,6 +22,7 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h"
25#include "xfs_dir2.h" 26#include "xfs_dir2.h"
26#include "xfs_dmapi.h" 27#include "xfs_dmapi.h"
27#include "xfs_mount.h" 28#include "xfs_mount.h"
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index ceb4f6e99960..5b2ff59f19cf 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -22,6 +22,7 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h"
25#include "xfs_dmapi.h" 26#include "xfs_dmapi.h"
26#include "xfs_mount.h" 27#include "xfs_mount.h"
27#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index b290270dd4a6..27cce2a9c7e9 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -22,6 +22,7 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h"
25#include "xfs_dmapi.h" 26#include "xfs_dmapi.h"
26#include "xfs_mount.h" 27#include "xfs_mount.h"
27#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 4a27648b5446..1644be14a144 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -117,8 +117,8 @@ xfs_init(void)
117 xfs_ili_zone = 117 xfs_ili_zone =
118 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", 118 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
119 KM_ZONE_SPREAD, NULL); 119 KM_ZONE_SPREAD, NULL);
120 xfs_chashlist_zone = 120 xfs_icluster_zone =
121 kmem_zone_init_flags(sizeof(xfs_chashlist_t), "xfs_chashlist", 121 kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster",
122 KM_ZONE_SPREAD, NULL); 122 KM_ZONE_SPREAD, NULL);
123 123
124 /* 124 /*
@@ -163,7 +163,7 @@ xfs_cleanup(void)
163 extern kmem_zone_t *xfs_efd_zone; 163 extern kmem_zone_t *xfs_efd_zone;
164 extern kmem_zone_t *xfs_efi_zone; 164 extern kmem_zone_t *xfs_efi_zone;
165 extern kmem_zone_t *xfs_buf_item_zone; 165 extern kmem_zone_t *xfs_buf_item_zone;
166 extern kmem_zone_t *xfs_chashlist_zone; 166 extern kmem_zone_t *xfs_icluster_zone;
167 167
168 xfs_cleanup_procfs(); 168 xfs_cleanup_procfs();
169 xfs_sysctl_unregister(); 169 xfs_sysctl_unregister();
@@ -199,7 +199,7 @@ xfs_cleanup(void)
199 kmem_zone_destroy(xfs_efi_zone); 199 kmem_zone_destroy(xfs_efi_zone);
200 kmem_zone_destroy(xfs_ifork_zone); 200 kmem_zone_destroy(xfs_ifork_zone);
201 kmem_zone_destroy(xfs_ili_zone); 201 kmem_zone_destroy(xfs_ili_zone);
202 kmem_zone_destroy(xfs_chashlist_zone); 202 kmem_zone_destroy(xfs_icluster_zone);
203} 203}
204 204
205/* 205/*
@@ -246,7 +246,6 @@ xfs_start_flags(
246 ap->logbufsize); 246 ap->logbufsize);
247 return XFS_ERROR(EINVAL); 247 return XFS_ERROR(EINVAL);
248 } 248 }
249 mp->m_ihsize = ap->ihashsize;
250 mp->m_logbsize = ap->logbufsize; 249 mp->m_logbsize = ap->logbufsize;
251 mp->m_fsname_len = strlen(ap->fsname) + 1; 250 mp->m_fsname_len = strlen(ap->fsname) + 1;
252 mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP); 251 mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
@@ -293,8 +292,6 @@ xfs_start_flags(
293 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog; 292 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
294 } 293 }
295 294
296 if (ap->flags & XFSMNT_IHASHSIZE)
297 mp->m_flags |= XFS_MOUNT_IHASHSIZE;
298 if (ap->flags & XFSMNT_IDELETE) 295 if (ap->flags & XFSMNT_IDELETE)
299 mp->m_flags |= XFS_MOUNT_IDELETE; 296 mp->m_flags |= XFS_MOUNT_IDELETE;
300 if (ap->flags & XFSMNT_DIRSYNC) 297 if (ap->flags & XFSMNT_DIRSYNC)
@@ -1673,7 +1670,6 @@ xfs_vget(
1673#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ 1670#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */
1674#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ 1671#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */
1675#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ 1672#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */
1676#define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */
1677#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ 1673#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */
1678#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and 1674#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and
1679 * unwritten extent conversion */ 1675 * unwritten extent conversion */
@@ -1799,15 +1795,6 @@ xfs_parseargs(
1799 iosize = suffix_strtoul(value, &eov, 10); 1795 iosize = suffix_strtoul(value, &eov, 10);
1800 args->flags |= XFSMNT_IOSIZE; 1796 args->flags |= XFSMNT_IOSIZE;
1801 args->iosizelog = ffs(iosize) - 1; 1797 args->iosizelog = ffs(iosize) - 1;
1802 } else if (!strcmp(this_char, MNTOPT_IHASHSIZE)) {
1803 if (!value || !*value) {
1804 cmn_err(CE_WARN,
1805 "XFS: %s option requires an argument",
1806 this_char);
1807 return EINVAL;
1808 }
1809 args->flags |= XFSMNT_IHASHSIZE;
1810 args->ihashsize = simple_strtoul(value, &eov, 10);
1811 } else if (!strcmp(this_char, MNTOPT_GRPID) || 1798 } else if (!strcmp(this_char, MNTOPT_GRPID) ||
1812 !strcmp(this_char, MNTOPT_BSDGROUPS)) { 1799 !strcmp(this_char, MNTOPT_BSDGROUPS)) {
1813 vfsp->vfs_flag |= VFS_GRPID; 1800 vfsp->vfs_flag |= VFS_GRPID;
@@ -1876,6 +1863,9 @@ xfs_parseargs(
1876 args->flags &= ~XFSMNT_ATTR2; 1863 args->flags &= ~XFSMNT_ATTR2;
1877 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { 1864 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
1878 args->flags2 |= XFSMNT2_FILESTREAMS; 1865 args->flags2 |= XFSMNT2_FILESTREAMS;
1866 } else if (!strcmp(this_char, "ihashsize")) {
1867 cmn_err(CE_WARN,
1868 "XFS: ihashsize no longer used, option is deprecated.");
1879 } else if (!strcmp(this_char, "osyncisdsync")) { 1869 } else if (!strcmp(this_char, "osyncisdsync")) {
1880 /* no-op, this is now the default */ 1870 /* no-op, this is now the default */
1881 cmn_err(CE_WARN, 1871 cmn_err(CE_WARN,
@@ -1966,9 +1956,6 @@ xfs_showargs(
1966 seq_puts(m, xfs_infop->str); 1956 seq_puts(m, xfs_infop->str);
1967 } 1957 }
1968 1958
1969 if (mp->m_flags & XFS_MOUNT_IHASHSIZE)
1970 seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", (int)mp->m_ihsize);
1971
1972 if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) 1959 if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
1973 seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", 1960 seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
1974 (int)(1 << mp->m_writeio_log) >> 10); 1961 (int)(1 << mp->m_writeio_log) >> 10);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index bde4a1ad90f2..15bc01b2d6a0 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3876,7 +3876,7 @@ xfs_finish_reclaim(
3876 int locked, 3876 int locked,
3877 int sync_mode) 3877 int sync_mode)
3878{ 3878{
3879 xfs_ihash_t *ih = ip->i_hash; 3879 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
3880 bhv_vnode_t *vp = XFS_ITOV_NULL(ip); 3880 bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
3881 int error; 3881 int error;
3882 3882
@@ -3888,12 +3888,12 @@ xfs_finish_reclaim(
3888 * Once we have the XFS_IRECLAIM flag set it will not touch 3888 * Once we have the XFS_IRECLAIM flag set it will not touch
3889 * us. 3889 * us.
3890 */ 3890 */
3891 write_lock(&ih->ih_lock); 3891 write_lock(&pag->pag_ici_lock);
3892 spin_lock(&ip->i_flags_lock); 3892 spin_lock(&ip->i_flags_lock);
3893 if (__xfs_iflags_test(ip, XFS_IRECLAIM) || 3893 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
3894 (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) { 3894 (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) {
3895 spin_unlock(&ip->i_flags_lock); 3895 spin_unlock(&ip->i_flags_lock);
3896 write_unlock(&ih->ih_lock); 3896 write_unlock(&pag->pag_ici_lock);
3897 if (locked) { 3897 if (locked) {
3898 xfs_ifunlock(ip); 3898 xfs_ifunlock(ip);
3899 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3899 xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -3902,7 +3902,8 @@ xfs_finish_reclaim(
3902 } 3902 }
3903 __xfs_iflags_set(ip, XFS_IRECLAIM); 3903 __xfs_iflags_set(ip, XFS_IRECLAIM);
3904 spin_unlock(&ip->i_flags_lock); 3904 spin_unlock(&ip->i_flags_lock);
3905 write_unlock(&ih->ih_lock); 3905 write_unlock(&pag->pag_ici_lock);
3906 xfs_put_perag(ip->i_mount, pag);
3906 3907
3907 /* 3908 /*
3908 * If the inode is still dirty, then flush it out. If the inode 3909 * If the inode is still dirty, then flush it out. If the inode