diff options
author | David Chinner <dgc@sgi.com> | 2007-08-28 00:00:13 -0400 |
---|---|---|
committer | Tim Shimmin <tes@chook.melbourne.sgi.com> | 2007-10-15 02:50:50 -0400 |
commit | da353b0d64e070ae7c5342a0d56ec20ae9ef5cfb (patch) | |
tree | 84454023d649df67cc6b125c73746ddb341ac34e /fs | |
parent | 39cd9f877e63ce7e02cdc7f5dbf1b908451c9532 (diff) |
[XFS] Radix tree based inode caching
One of the perpetual scaling problems XFS has is indexing it's incore
inodes. We currently uses hashes and the default hash sizes chosen can
only ever be a tradeoff between memory consumption and the maximum
realistic size of the cache.
As a result, anyone who has millions of inodes cached on a filesystem
needs to tunes the size of the cache via the ihashsize mount option to
allow decent scalability with inode cache operations.
A further problem is the separate inode cluster hash, whose size is based
on the ihashsize but is smaller, and so under certain conditions (sparse
cluster cache population) this can become a limitation long before the
inode hash is causing issues.
The following patchset removes the inode hash and cluster hash and
replaces them with radix trees to avoid the scalability limitations of the
hashes. It also reduces the size of the inodes by 3 pointers....
SGI-PV: 969561
SGI-Modid: xfs-linux-melb:xfs-kern:29481a
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_export.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_ag.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_clnt.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_dir2_block.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_dir2_data.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_dir2_node.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_dir2_sf.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_error.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_extfree_item.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_iget.c | 585 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 42 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 56 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 27 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 22 | ||||
-rw-r--r-- | fs/xfs/xfs_rename.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_ail.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_extfree.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_vfsops.c | 27 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 9 |
20 files changed, 284 insertions, 503 deletions
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index e3a5fedac1ba..f6e99fa7a683 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c | |||
@@ -17,10 +17,12 @@ | |||
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_types.h" | 19 | #include "xfs_types.h" |
20 | #include "xfs_dmapi.h" | 20 | #include "xfs_inum.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
23 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_dmapi.h" | ||
24 | #include "xfs_mount.h" | 26 | #include "xfs_mount.h" |
25 | #include "xfs_export.h" | 27 | #include "xfs_export.h" |
26 | 28 | ||
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 51c09c114a20..9381b0360c4b 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -197,6 +197,10 @@ typedef struct xfs_perag | |||
197 | #endif | 197 | #endif |
198 | xfs_perag_busy_t *pagb_list; /* unstable blocks */ | 198 | xfs_perag_busy_t *pagb_list; /* unstable blocks */ |
199 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ | 199 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ |
200 | |||
201 | int pag_ici_init; /* incore inode cache initialised */ | ||
202 | rwlock_t pag_ici_lock; /* incore inode lock */ | ||
203 | struct radix_tree_root pag_ici_root; /* incore inode cache root */ | ||
200 | } xfs_perag_t; | 204 | } xfs_perag_t; |
201 | 205 | ||
202 | #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) | 206 | #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index b0667cb27d66..c8f2c2886fe4 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
24 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | ||
26 | #include "xfs_dmapi.h" | 27 | #include "xfs_dmapi.h" |
27 | #include "xfs_mount.h" | 28 | #include "xfs_mount.h" |
28 | #include "xfs_buf_item.h" | 29 | #include "xfs_buf_item.h" |
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h index f89196cb08d2..d16c1b971074 100644 --- a/fs/xfs/xfs_clnt.h +++ b/fs/xfs/xfs_clnt.h | |||
@@ -89,7 +89,6 @@ struct xfs_mount_args { | |||
89 | #define XFSMNT_IDELETE 0x08000000 /* inode cluster delete */ | 89 | #define XFSMNT_IDELETE 0x08000000 /* inode cluster delete */ |
90 | #define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width | 90 | #define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width |
91 | * allocation */ | 91 | * allocation */ |
92 | #define XFSMNT_IHASHSIZE 0x20000000 /* inode hash table size */ | ||
93 | #define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename | 92 | #define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename |
94 | * symlink,mkdir,rmdir,mknod */ | 93 | * symlink,mkdir,rmdir,mknod */ |
95 | #define XFSMNT_FLAGS2 0x80000000 /* more flags set in flags2 */ | 94 | #define XFSMNT_FLAGS2 0x80000000 /* more flags set in flags2 */ |
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index f6b919af7b82..c171767e242a 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
23 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | ||
25 | #include "xfs_dir2.h" | 26 | #include "xfs_dir2.h" |
26 | #include "xfs_dmapi.h" | 27 | #include "xfs_dmapi.h" |
27 | #include "xfs_mount.h" | 28 | #include "xfs_mount.h" |
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index 7ebe295bd6d3..d2452699e9b1 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
23 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | ||
25 | #include "xfs_dir2.h" | 26 | #include "xfs_dir2.h" |
26 | #include "xfs_dmapi.h" | 27 | #include "xfs_dmapi.h" |
27 | #include "xfs_mount.h" | 28 | #include "xfs_mount.h" |
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 91c61d9632c8..eb18e399e836 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
23 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | ||
25 | #include "xfs_dir2.h" | 26 | #include "xfs_dir2.h" |
26 | #include "xfs_dmapi.h" | 27 | #include "xfs_dmapi.h" |
27 | #include "xfs_mount.h" | 28 | #include "xfs_mount.h" |
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index c67d73572905..182c70315ad1 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
23 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | ||
25 | #include "xfs_dir2.h" | 26 | #include "xfs_dir2.h" |
26 | #include "xfs_dmapi.h" | 27 | #include "xfs_dmapi.h" |
27 | #include "xfs_mount.h" | 28 | #include "xfs_mount.h" |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 8c4331631337..339f9d4a49cd 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
23 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | ||
25 | #include "xfs_dir2.h" | 26 | #include "xfs_dir2.h" |
26 | #include "xfs_dmapi.h" | 27 | #include "xfs_dmapi.h" |
27 | #include "xfs_mount.h" | 28 | #include "xfs_mount.h" |
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 3b14427ee123..f938a51be81b 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
24 | #include "xfs_buf_item.h" | 24 | #include "xfs_buf_item.h" |
25 | #include "xfs_sb.h" | 25 | #include "xfs_sb.h" |
26 | #include "xfs_ag.h" | ||
26 | #include "xfs_dmapi.h" | 27 | #include "xfs_dmapi.h" |
27 | #include "xfs_mount.h" | 28 | #include "xfs_mount.h" |
28 | #include "xfs_trans_priv.h" | 29 | #include "xfs_trans_priv.h" |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 114433a22baa..e07dcc1b70a6 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -40,131 +40,13 @@ | |||
40 | #include "xfs_utils.h" | 40 | #include "xfs_utils.h" |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * Initialize the inode hash table for the newly mounted file system. | ||
44 | * Choose an initial table size based on user specified value, else | ||
45 | * use a simple algorithm using the maximum number of inodes as an | ||
46 | * indicator for table size, and clamp it between one and some large | ||
47 | * number of pages. | ||
48 | */ | ||
49 | void | ||
50 | xfs_ihash_init(xfs_mount_t *mp) | ||
51 | { | ||
52 | __uint64_t icount; | ||
53 | uint i; | ||
54 | |||
55 | if (!mp->m_ihsize) { | ||
56 | icount = mp->m_maxicount ? mp->m_maxicount : | ||
57 | (mp->m_sb.sb_dblocks << mp->m_sb.sb_inopblog); | ||
58 | mp->m_ihsize = 1 << max_t(uint, 8, | ||
59 | (xfs_highbit64(icount) + 1) / 2); | ||
60 | mp->m_ihsize = min_t(uint, mp->m_ihsize, | ||
61 | (64 * NBPP) / sizeof(xfs_ihash_t)); | ||
62 | } | ||
63 | |||
64 | mp->m_ihash = kmem_zalloc_greedy(&mp->m_ihsize, | ||
65 | NBPC * sizeof(xfs_ihash_t), | ||
66 | mp->m_ihsize * sizeof(xfs_ihash_t), | ||
67 | KM_SLEEP | KM_MAYFAIL | KM_LARGE); | ||
68 | mp->m_ihsize /= sizeof(xfs_ihash_t); | ||
69 | for (i = 0; i < mp->m_ihsize; i++) | ||
70 | rwlock_init(&(mp->m_ihash[i].ih_lock)); | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * Free up structures allocated by xfs_ihash_init, at unmount time. | ||
75 | */ | ||
76 | void | ||
77 | xfs_ihash_free(xfs_mount_t *mp) | ||
78 | { | ||
79 | kmem_free(mp->m_ihash, mp->m_ihsize * sizeof(xfs_ihash_t)); | ||
80 | mp->m_ihash = NULL; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Initialize the inode cluster hash table for the newly mounted file system. | ||
85 | * Its size is derived from the ihash table size. | ||
86 | */ | ||
87 | void | ||
88 | xfs_chash_init(xfs_mount_t *mp) | ||
89 | { | ||
90 | uint i; | ||
91 | |||
92 | mp->m_chsize = max_t(uint, 1, mp->m_ihsize / | ||
93 | (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)); | ||
94 | mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize); | ||
95 | mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize | ||
96 | * sizeof(xfs_chash_t), | ||
97 | KM_SLEEP | KM_LARGE); | ||
98 | for (i = 0; i < mp->m_chsize; i++) { | ||
99 | spinlock_init(&mp->m_chash[i].ch_lock,"xfshash"); | ||
100 | } | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * Free up structures allocated by xfs_chash_init, at unmount time. | ||
105 | */ | ||
106 | void | ||
107 | xfs_chash_free(xfs_mount_t *mp) | ||
108 | { | ||
109 | int i; | ||
110 | |||
111 | for (i = 0; i < mp->m_chsize; i++) { | ||
112 | spinlock_destroy(&mp->m_chash[i].ch_lock); | ||
113 | } | ||
114 | |||
115 | kmem_free(mp->m_chash, mp->m_chsize*sizeof(xfs_chash_t)); | ||
116 | mp->m_chash = NULL; | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * Try to move an inode to the front of its hash list if possible | ||
121 | * (and if its not there already). Called right after obtaining | ||
122 | * the list version number and then dropping the read_lock on the | ||
123 | * hash list in question (which is done right after looking up the | ||
124 | * inode in question...). | ||
125 | */ | ||
126 | STATIC void | ||
127 | xfs_ihash_promote( | ||
128 | xfs_ihash_t *ih, | ||
129 | xfs_inode_t *ip, | ||
130 | ulong version) | ||
131 | { | ||
132 | xfs_inode_t *iq; | ||
133 | |||
134 | if ((ip->i_prevp != &ih->ih_next) && write_trylock(&ih->ih_lock)) { | ||
135 | if (likely(version == ih->ih_version)) { | ||
136 | /* remove from list */ | ||
137 | if ((iq = ip->i_next)) { | ||
138 | iq->i_prevp = ip->i_prevp; | ||
139 | } | ||
140 | *ip->i_prevp = iq; | ||
141 | |||
142 | /* insert at list head */ | ||
143 | iq = ih->ih_next; | ||
144 | iq->i_prevp = &ip->i_next; | ||
145 | ip->i_next = iq; | ||
146 | ip->i_prevp = &ih->ih_next; | ||
147 | ih->ih_next = ip; | ||
148 | } | ||
149 | write_unlock(&ih->ih_lock); | ||
150 | } | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * Look up an inode by number in the given file system. | 43 | * Look up an inode by number in the given file system. |
155 | * The inode is looked up in the hash table for the file system | 44 | * The inode is looked up in the cache held in each AG. |
156 | * represented by the mount point parameter mp. Each bucket of | 45 | * If the inode is found in the cache, attach it to the provided |
157 | * the hash table is guarded by an individual semaphore. | 46 | * vnode. |
158 | * | ||
159 | * If the inode is found in the hash table, its corresponding vnode | ||
160 | * is obtained with a call to vn_get(). This call takes care of | ||
161 | * coordination with the reclamation of the inode and vnode. Note | ||
162 | * that the vmap structure is filled in while holding the hash lock. | ||
163 | * This gives us the state of the inode/vnode when we found it and | ||
164 | * is used for coordination in vn_get(). | ||
165 | * | 47 | * |
166 | * If it is not in core, read it in from the file system's device and | 48 | * If it is not in core, read it in from the file system's device, |
167 | * add the inode into the hash table. | 49 | * add it to the cache and attach the provided vnode. |
168 | * | 50 | * |
169 | * The inode is locked according to the value of the lock_flags parameter. | 51 | * The inode is locked according to the value of the lock_flags parameter. |
170 | * This flag parameter indicates how and if the inode's IO lock and inode lock | 52 | * This flag parameter indicates how and if the inode's IO lock and inode lock |
@@ -192,274 +74,241 @@ xfs_iget_core( | |||
192 | xfs_inode_t **ipp, | 74 | xfs_inode_t **ipp, |
193 | xfs_daddr_t bno) | 75 | xfs_daddr_t bno) |
194 | { | 76 | { |
195 | xfs_ihash_t *ih; | ||
196 | xfs_inode_t *ip; | 77 | xfs_inode_t *ip; |
197 | xfs_inode_t *iq; | 78 | xfs_inode_t *iq; |
198 | bhv_vnode_t *inode_vp; | 79 | bhv_vnode_t *inode_vp; |
199 | ulong version; | ||
200 | int error; | 80 | int error; |
201 | /* REFERENCED */ | 81 | xfs_icluster_t *icl, *new_icl = NULL; |
202 | xfs_chash_t *ch; | 82 | unsigned long first_index, mask; |
203 | xfs_chashlist_t *chl, *chlnew; | 83 | xfs_perag_t *pag; |
204 | SPLDECL(s); | 84 | xfs_agino_t agino; |
85 | |||
86 | /* the radix tree exists only in inode capable AGs */ | ||
87 | if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) | ||
88 | return EINVAL; | ||
89 | |||
90 | /* get the perag structure and ensure that it's inode capable */ | ||
91 | pag = xfs_get_perag(mp, ino); | ||
92 | if (!pag->pagi_inodeok) | ||
93 | return EINVAL; | ||
94 | ASSERT(pag->pag_ici_init); | ||
95 | agino = XFS_INO_TO_AGINO(mp, ino); | ||
205 | 96 | ||
97 | again: | ||
98 | read_lock(&pag->pag_ici_lock); | ||
99 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | ||
206 | 100 | ||
207 | ih = XFS_IHASH(mp, ino); | 101 | if (ip != NULL) { |
102 | /* | ||
103 | * If INEW is set this inode is being set up | ||
104 | * we need to pause and try again. | ||
105 | */ | ||
106 | if (xfs_iflags_test(ip, XFS_INEW)) { | ||
107 | read_unlock(&pag->pag_ici_lock); | ||
108 | delay(1); | ||
109 | XFS_STATS_INC(xs_ig_frecycle); | ||
208 | 110 | ||
209 | again: | 111 | goto again; |
210 | read_lock(&ih->ih_lock); | 112 | } |
211 | 113 | ||
212 | for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { | 114 | inode_vp = XFS_ITOV_NULL(ip); |
213 | if (ip->i_ino == ino) { | 115 | if (inode_vp == NULL) { |
214 | /* | 116 | /* |
215 | * If INEW is set this inode is being set up | 117 | * If IRECLAIM is set this inode is |
118 | * on its way out of the system, | ||
216 | * we need to pause and try again. | 119 | * we need to pause and try again. |
217 | */ | 120 | */ |
218 | if (xfs_iflags_test(ip, XFS_INEW)) { | 121 | if (xfs_iflags_test(ip, XFS_IRECLAIM)) { |
219 | read_unlock(&ih->ih_lock); | 122 | read_unlock(&pag->pag_ici_lock); |
220 | delay(1); | 123 | delay(1); |
221 | XFS_STATS_INC(xs_ig_frecycle); | 124 | XFS_STATS_INC(xs_ig_frecycle); |
222 | 125 | ||
223 | goto again; | 126 | goto again; |
224 | } | 127 | } |
128 | ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | ||
225 | 129 | ||
226 | inode_vp = XFS_ITOV_NULL(ip); | 130 | /* |
227 | if (inode_vp == NULL) { | 131 | * If lookup is racing with unlink, then we |
228 | /* | 132 | * should return an error immediately so we |
229 | * If IRECLAIM is set this inode is | 133 | * don't remove it from the reclaim list and |
230 | * on its way out of the system, | 134 | * potentially leak the inode. |
231 | * we need to pause and try again. | 135 | */ |
232 | */ | 136 | if ((ip->i_d.di_mode == 0) && |
233 | if (xfs_iflags_test(ip, XFS_IRECLAIM)) { | 137 | !(flags & XFS_IGET_CREATE)) { |
234 | read_unlock(&ih->ih_lock); | 138 | read_unlock(&pag->pag_ici_lock); |
235 | delay(1); | 139 | xfs_put_perag(mp, pag); |
236 | XFS_STATS_INC(xs_ig_frecycle); | 140 | return ENOENT; |
237 | 141 | } | |
238 | goto again; | ||
239 | } | ||
240 | ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | ||
241 | |||
242 | /* | ||
243 | * If lookup is racing with unlink, then we | ||
244 | * should return an error immediately so we | ||
245 | * don't remove it from the reclaim list and | ||
246 | * potentially leak the inode. | ||
247 | */ | ||
248 | if ((ip->i_d.di_mode == 0) && | ||
249 | !(flags & XFS_IGET_CREATE)) { | ||
250 | read_unlock(&ih->ih_lock); | ||
251 | return ENOENT; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * There may be transactions sitting in the | ||
256 | * incore log buffers or being flushed to disk | ||
257 | * at this time. We can't clear the | ||
258 | * XFS_IRECLAIMABLE flag until these | ||
259 | * transactions have hit the disk, otherwise we | ||
260 | * will void the guarantee the flag provides | ||
261 | * xfs_iunpin() | ||
262 | */ | ||
263 | if (xfs_ipincount(ip)) { | ||
264 | read_unlock(&ih->ih_lock); | ||
265 | xfs_log_force(mp, 0, | ||
266 | XFS_LOG_FORCE|XFS_LOG_SYNC); | ||
267 | XFS_STATS_INC(xs_ig_frecycle); | ||
268 | goto again; | ||
269 | } | ||
270 | |||
271 | vn_trace_exit(vp, "xfs_iget.alloc", | ||
272 | (inst_t *)__return_address); | ||
273 | 142 | ||
274 | XFS_STATS_INC(xs_ig_found); | 143 | /* |
144 | * There may be transactions sitting in the | ||
145 | * incore log buffers or being flushed to disk | ||
146 | * at this time. We can't clear the | ||
147 | * XFS_IRECLAIMABLE flag until these | ||
148 | * transactions have hit the disk, otherwise we | ||
149 | * will void the guarantee the flag provides | ||
150 | * xfs_iunpin() | ||
151 | */ | ||
152 | if (xfs_ipincount(ip)) { | ||
153 | read_unlock(&pag->pag_ici_lock); | ||
154 | xfs_log_force(mp, 0, | ||
155 | XFS_LOG_FORCE|XFS_LOG_SYNC); | ||
156 | XFS_STATS_INC(xs_ig_frecycle); | ||
157 | goto again; | ||
158 | } | ||
275 | 159 | ||
276 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); | 160 | vn_trace_exit(vp, "xfs_iget.alloc", |
277 | version = ih->ih_version; | 161 | (inst_t *)__return_address); |
278 | read_unlock(&ih->ih_lock); | ||
279 | xfs_ihash_promote(ih, ip, version); | ||
280 | 162 | ||
281 | XFS_MOUNT_ILOCK(mp); | 163 | XFS_STATS_INC(xs_ig_found); |
282 | list_del_init(&ip->i_reclaim); | ||
283 | XFS_MOUNT_IUNLOCK(mp); | ||
284 | 164 | ||
285 | goto finish_inode; | 165 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); |
166 | read_unlock(&pag->pag_ici_lock); | ||
286 | 167 | ||
287 | } else if (vp != inode_vp) { | 168 | XFS_MOUNT_ILOCK(mp); |
288 | struct inode *inode = vn_to_inode(inode_vp); | 169 | list_del_init(&ip->i_reclaim); |
170 | XFS_MOUNT_IUNLOCK(mp); | ||
289 | 171 | ||
290 | /* The inode is being torn down, pause and | 172 | goto finish_inode; |
291 | * try again. | ||
292 | */ | ||
293 | if (inode->i_state & (I_FREEING | I_CLEAR)) { | ||
294 | read_unlock(&ih->ih_lock); | ||
295 | delay(1); | ||
296 | XFS_STATS_INC(xs_ig_frecycle); | ||
297 | 173 | ||
298 | goto again; | 174 | } else if (vp != inode_vp) { |
299 | } | 175 | struct inode *inode = vn_to_inode(inode_vp); |
300 | /* Chances are the other vnode (the one in the inode) is being torn | ||
301 | * down right now, and we landed on top of it. Question is, what do | ||
302 | * we do? Unhook the old inode and hook up the new one? | ||
303 | */ | ||
304 | cmn_err(CE_PANIC, | ||
305 | "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", | ||
306 | inode_vp, vp); | ||
307 | } | ||
308 | 176 | ||
309 | /* | 177 | /* The inode is being torn down, pause and |
310 | * Inode cache hit: if ip is not at the front of | 178 | * try again. |
311 | * its hash chain, move it there now. | ||
312 | * Do this with the lock held for update, but | ||
313 | * do statistics after releasing the lock. | ||
314 | */ | 179 | */ |
315 | version = ih->ih_version; | 180 | if (inode->i_state & (I_FREEING | I_CLEAR)) { |
316 | read_unlock(&ih->ih_lock); | 181 | read_unlock(&pag->pag_ici_lock); |
317 | xfs_ihash_promote(ih, ip, version); | 182 | delay(1); |
318 | XFS_STATS_INC(xs_ig_found); | 183 | XFS_STATS_INC(xs_ig_frecycle); |
319 | 184 | ||
320 | finish_inode: | 185 | goto again; |
321 | if (ip->i_d.di_mode == 0) { | ||
322 | if (!(flags & XFS_IGET_CREATE)) | ||
323 | return ENOENT; | ||
324 | xfs_iocore_inode_reinit(ip); | ||
325 | } | 186 | } |
187 | /* Chances are the other vnode (the one in the inode) is being torn | ||
188 | * down right now, and we landed on top of it. Question is, what do | ||
189 | * we do? Unhook the old inode and hook up the new one? | ||
190 | */ | ||
191 | cmn_err(CE_PANIC, | ||
192 | "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", | ||
193 | inode_vp, vp); | ||
194 | } | ||
326 | 195 | ||
327 | if (lock_flags != 0) | 196 | /* |
328 | xfs_ilock(ip, lock_flags); | 197 | * Inode cache hit |
198 | */ | ||
199 | read_unlock(&pag->pag_ici_lock); | ||
200 | XFS_STATS_INC(xs_ig_found); | ||
329 | 201 | ||
330 | xfs_iflags_clear(ip, XFS_ISTALE); | 202 | finish_inode: |
331 | vn_trace_exit(vp, "xfs_iget.found", | 203 | if (ip->i_d.di_mode == 0) { |
332 | (inst_t *)__return_address); | 204 | if (!(flags & XFS_IGET_CREATE)) { |
333 | goto return_ip; | 205 | xfs_put_perag(mp, pag); |
206 | return ENOENT; | ||
207 | } | ||
208 | xfs_iocore_inode_reinit(ip); | ||
334 | } | 209 | } |
210 | |||
211 | if (lock_flags != 0) | ||
212 | xfs_ilock(ip, lock_flags); | ||
213 | |||
214 | xfs_iflags_clear(ip, XFS_ISTALE); | ||
215 | vn_trace_exit(vp, "xfs_iget.found", | ||
216 | (inst_t *)__return_address); | ||
217 | goto return_ip; | ||
335 | } | 218 | } |
336 | 219 | ||
337 | /* | 220 | /* |
338 | * Inode cache miss: save the hash chain version stamp and unlock | 221 | * Inode cache miss |
339 | * the chain, so we don't deadlock in vn_alloc. | ||
340 | */ | 222 | */ |
223 | read_unlock(&pag->pag_ici_lock); | ||
341 | XFS_STATS_INC(xs_ig_missed); | 224 | XFS_STATS_INC(xs_ig_missed); |
342 | 225 | ||
343 | version = ih->ih_version; | ||
344 | |||
345 | read_unlock(&ih->ih_lock); | ||
346 | |||
347 | /* | 226 | /* |
348 | * Read the disk inode attributes into a new inode structure and get | 227 | * Read the disk inode attributes into a new inode structure and get |
349 | * a new vnode for it. This should also initialize i_ino and i_mount. | 228 | * a new vnode for it. This should also initialize i_ino and i_mount. |
350 | */ | 229 | */ |
351 | error = xfs_iread(mp, tp, ino, &ip, bno, | 230 | error = xfs_iread(mp, tp, ino, &ip, bno, |
352 | (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0); | 231 | (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0); |
353 | if (error) | 232 | if (error) { |
233 | xfs_put_perag(mp, pag); | ||
354 | return error; | 234 | return error; |
235 | } | ||
355 | 236 | ||
356 | vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address); | 237 | vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address); |
357 | 238 | ||
358 | xfs_inode_lock_init(ip, vp); | 239 | xfs_inode_lock_init(ip, vp); |
359 | xfs_iocore_inode_init(ip); | 240 | xfs_iocore_inode_init(ip); |
360 | |||
361 | if (lock_flags) | 241 | if (lock_flags) |
362 | xfs_ilock(ip, lock_flags); | 242 | xfs_ilock(ip, lock_flags); |
363 | 243 | ||
364 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | 244 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { |
365 | xfs_idestroy(ip); | 245 | xfs_idestroy(ip); |
246 | xfs_put_perag(mp, pag); | ||
366 | return ENOENT; | 247 | return ENOENT; |
367 | } | 248 | } |
368 | 249 | ||
369 | /* | 250 | /* |
370 | * Put ip on its hash chain, unless someone else hashed a duplicate | 251 | * This is a bit messy - we preallocate everything we _might_ |
371 | * after we released the hash lock. | 252 | * need before we pick up the ici lock. That way we don't have to |
253 | * juggle locks and go all the way back to the start. | ||
372 | */ | 254 | */ |
373 | write_lock(&ih->ih_lock); | 255 | new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP); |
256 | if (radix_tree_preload(GFP_KERNEL)) { | ||
257 | delay(1); | ||
258 | goto again; | ||
259 | } | ||
260 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | ||
261 | first_index = agino & mask; | ||
262 | write_lock(&pag->pag_ici_lock); | ||
374 | 263 | ||
375 | if (ih->ih_version != version) { | 264 | /* |
376 | for (iq = ih->ih_next; iq != NULL; iq = iq->i_next) { | 265 | * Find the cluster if it exists |
377 | if (iq->i_ino == ino) { | 266 | */ |
378 | write_unlock(&ih->ih_lock); | 267 | icl = NULL; |
379 | xfs_idestroy(ip); | 268 | if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq, |
269 | first_index, 1)) { | ||
270 | if ((iq->i_ino & mask) == first_index) | ||
271 | icl = iq->i_cluster; | ||
272 | } | ||
380 | 273 | ||
381 | XFS_STATS_INC(xs_ig_dup); | 274 | /* |
382 | goto again; | 275 | * insert the new inode |
383 | } | 276 | */ |
384 | } | 277 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); |
278 | if (unlikely(error)) { | ||
279 | BUG_ON(error != -EEXIST); | ||
280 | write_unlock(&pag->pag_ici_lock); | ||
281 | radix_tree_preload_end(); | ||
282 | xfs_idestroy(ip); | ||
283 | XFS_STATS_INC(xs_ig_dup); | ||
284 | goto again; | ||
385 | } | 285 | } |
386 | 286 | ||
387 | /* | 287 | /* |
388 | * These values _must_ be set before releasing ihlock! | 288 | * These values _must_ be set before releasing ihlock! |
389 | */ | 289 | */ |
390 | ip->i_hash = ih; | ||
391 | if ((iq = ih->ih_next)) { | ||
392 | iq->i_prevp = &ip->i_next; | ||
393 | } | ||
394 | ip->i_next = iq; | ||
395 | ip->i_prevp = &ih->ih_next; | ||
396 | ih->ih_next = ip; | ||
397 | ip->i_udquot = ip->i_gdquot = NULL; | 290 | ip->i_udquot = ip->i_gdquot = NULL; |
398 | ih->ih_version++; | ||
399 | xfs_iflags_set(ip, XFS_INEW); | 291 | xfs_iflags_set(ip, XFS_INEW); |
400 | write_unlock(&ih->ih_lock); | ||
401 | 292 | ||
402 | /* | 293 | ASSERT(ip->i_cluster == NULL); |
403 | * put ip on its cluster's hash chain | ||
404 | */ | ||
405 | ASSERT(ip->i_chash == NULL && ip->i_cprev == NULL && | ||
406 | ip->i_cnext == NULL); | ||
407 | |||
408 | chlnew = NULL; | ||
409 | ch = XFS_CHASH(mp, ip->i_blkno); | ||
410 | chlredo: | ||
411 | s = mutex_spinlock(&ch->ch_lock); | ||
412 | for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) { | ||
413 | if (chl->chl_blkno == ip->i_blkno) { | ||
414 | |||
415 | /* insert this inode into the doubly-linked list | ||
416 | * where chl points */ | ||
417 | if ((iq = chl->chl_ip)) { | ||
418 | ip->i_cprev = iq->i_cprev; | ||
419 | iq->i_cprev->i_cnext = ip; | ||
420 | iq->i_cprev = ip; | ||
421 | ip->i_cnext = iq; | ||
422 | } else { | ||
423 | ip->i_cnext = ip; | ||
424 | ip->i_cprev = ip; | ||
425 | } | ||
426 | chl->chl_ip = ip; | ||
427 | ip->i_chash = chl; | ||
428 | break; | ||
429 | } | ||
430 | } | ||
431 | 294 | ||
432 | /* no hash list found for this block; add a new hash list */ | 295 | if (!icl) { |
433 | if (chl == NULL) { | 296 | spin_lock_init(&new_icl->icl_lock); |
434 | if (chlnew == NULL) { | 297 | INIT_HLIST_HEAD(&new_icl->icl_inodes); |
435 | mutex_spinunlock(&ch->ch_lock, s); | 298 | icl = new_icl; |
436 | ASSERT(xfs_chashlist_zone != NULL); | 299 | new_icl = NULL; |
437 | chlnew = (xfs_chashlist_t *) | ||
438 | kmem_zone_alloc(xfs_chashlist_zone, | ||
439 | KM_SLEEP); | ||
440 | ASSERT(chlnew != NULL); | ||
441 | goto chlredo; | ||
442 | } else { | ||
443 | ip->i_cnext = ip; | ||
444 | ip->i_cprev = ip; | ||
445 | ip->i_chash = chlnew; | ||
446 | chlnew->chl_ip = ip; | ||
447 | chlnew->chl_blkno = ip->i_blkno; | ||
448 | if (ch->ch_list) | ||
449 | ch->ch_list->chl_prev = chlnew; | ||
450 | chlnew->chl_next = ch->ch_list; | ||
451 | chlnew->chl_prev = NULL; | ||
452 | ch->ch_list = chlnew; | ||
453 | chlnew = NULL; | ||
454 | } | ||
455 | } else { | 300 | } else { |
456 | if (chlnew != NULL) { | 301 | ASSERT(!hlist_empty(&icl->icl_inodes)); |
457 | kmem_zone_free(xfs_chashlist_zone, chlnew); | ||
458 | } | ||
459 | } | 302 | } |
303 | spin_lock(&icl->icl_lock); | ||
304 | hlist_add_head(&ip->i_cnode, &icl->icl_inodes); | ||
305 | ip->i_cluster = icl; | ||
306 | spin_unlock(&icl->icl_lock); | ||
460 | 307 | ||
461 | mutex_spinunlock(&ch->ch_lock, s); | 308 | write_unlock(&pag->pag_ici_lock); |
462 | 309 | radix_tree_preload_end(); | |
310 | if (new_icl) | ||
311 | kmem_zone_free(xfs_icluster_zone, new_icl); | ||
463 | 312 | ||
464 | /* | 313 | /* |
465 | * Link ip to its mount and thread it on the mount's inode list. | 314 | * Link ip to its mount and thread it on the mount's inode list. |
@@ -478,6 +327,7 @@ finish_inode: | |||
478 | mp->m_inodes = ip; | 327 | mp->m_inodes = ip; |
479 | 328 | ||
480 | XFS_MOUNT_IUNLOCK(mp); | 329 | XFS_MOUNT_IUNLOCK(mp); |
330 | xfs_put_perag(mp, pag); | ||
481 | 331 | ||
482 | return_ip: | 332 | return_ip: |
483 | ASSERT(ip->i_df.if_ext_max == | 333 | ASSERT(ip->i_df.if_ext_max == |
@@ -587,32 +437,19 @@ xfs_inode_incore(xfs_mount_t *mp, | |||
587 | xfs_ino_t ino, | 437 | xfs_ino_t ino, |
588 | xfs_trans_t *tp) | 438 | xfs_trans_t *tp) |
589 | { | 439 | { |
590 | xfs_ihash_t *ih; | ||
591 | xfs_inode_t *ip; | 440 | xfs_inode_t *ip; |
592 | ulong version; | 441 | xfs_perag_t *pag; |
593 | 442 | ||
594 | ih = XFS_IHASH(mp, ino); | 443 | pag = xfs_get_perag(mp, ino); |
595 | read_lock(&ih->ih_lock); | 444 | read_lock(&pag->pag_ici_lock); |
596 | for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { | 445 | ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino)); |
597 | if (ip->i_ino == ino) { | 446 | read_unlock(&pag->pag_ici_lock); |
598 | /* | 447 | xfs_put_perag(mp, pag); |
599 | * If we find it and tp matches, return it. | 448 | |
600 | * Also move it to the front of the hash list | 449 | /* the returned inode must match the transaction */ |
601 | * if we find it and it is not already there. | 450 | if (ip && (ip->i_transp != tp)) |
602 | * Otherwise break from the loop and return | 451 | return NULL; |
603 | * NULL. | 452 | return ip; |
604 | */ | ||
605 | if (ip->i_transp == tp) { | ||
606 | version = ih->ih_version; | ||
607 | read_unlock(&ih->ih_lock); | ||
608 | xfs_ihash_promote(ih, ip, version); | ||
609 | return (ip); | ||
610 | } | ||
611 | break; | ||
612 | } | ||
613 | } | ||
614 | read_unlock(&ih->ih_lock); | ||
615 | return (NULL); | ||
616 | } | 453 | } |
617 | 454 | ||
618 | /* | 455 | /* |
@@ -718,58 +555,26 @@ void | |||
718 | xfs_iextract( | 555 | xfs_iextract( |
719 | xfs_inode_t *ip) | 556 | xfs_inode_t *ip) |
720 | { | 557 | { |
721 | xfs_ihash_t *ih; | 558 | xfs_mount_t *mp = ip->i_mount; |
559 | xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); | ||
722 | xfs_inode_t *iq; | 560 | xfs_inode_t *iq; |
723 | xfs_mount_t *mp; | 561 | |
724 | xfs_chash_t *ch; | 562 | write_lock(&pag->pag_ici_lock); |
725 | xfs_chashlist_t *chl, *chm; | 563 | radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); |
726 | SPLDECL(s); | 564 | write_unlock(&pag->pag_ici_lock); |
727 | 565 | xfs_put_perag(mp, pag); | |
728 | ih = ip->i_hash; | ||
729 | write_lock(&ih->ih_lock); | ||
730 | if ((iq = ip->i_next)) { | ||
731 | iq->i_prevp = ip->i_prevp; | ||
732 | } | ||
733 | *ip->i_prevp = iq; | ||
734 | ih->ih_version++; | ||
735 | write_unlock(&ih->ih_lock); | ||
736 | 566 | ||
737 | /* | 567 | /* |
738 | * Remove from cluster hash list | 568 | * Remove from cluster list |
739 | * 1) delete the chashlist if this is the last inode on the chashlist | ||
740 | * 2) unchain from list of inodes | ||
741 | * 3) point chashlist->chl_ip to 'chl_next' if to this inode. | ||
742 | */ | 569 | */ |
743 | mp = ip->i_mount; | 570 | mp = ip->i_mount; |
744 | ch = XFS_CHASH(mp, ip->i_blkno); | 571 | spin_lock(&ip->i_cluster->icl_lock); |
745 | s = mutex_spinlock(&ch->ch_lock); | 572 | hlist_del(&ip->i_cnode); |
746 | 573 | spin_unlock(&ip->i_cluster->icl_lock); | |
747 | if (ip->i_cnext == ip) { | 574 | |
748 | /* Last inode on chashlist */ | 575 | /* was last inode in cluster? */ |
749 | ASSERT(ip->i_cnext == ip && ip->i_cprev == ip); | 576 | if (hlist_empty(&ip->i_cluster->icl_inodes)) |
750 | ASSERT(ip->i_chash != NULL); | 577 | kmem_zone_free(xfs_icluster_zone, ip->i_cluster); |
751 | chm=NULL; | ||
752 | chl = ip->i_chash; | ||
753 | if (chl->chl_prev) | ||
754 | chl->chl_prev->chl_next = chl->chl_next; | ||
755 | else | ||
756 | ch->ch_list = chl->chl_next; | ||
757 | if (chl->chl_next) | ||
758 | chl->chl_next->chl_prev = chl->chl_prev; | ||
759 | kmem_zone_free(xfs_chashlist_zone, chl); | ||
760 | } else { | ||
761 | /* delete one inode from a non-empty list */ | ||
762 | iq = ip->i_cnext; | ||
763 | iq->i_cprev = ip->i_cprev; | ||
764 | ip->i_cprev->i_cnext = iq; | ||
765 | if (ip->i_chash->chl_ip == ip) { | ||
766 | ip->i_chash->chl_ip = iq; | ||
767 | } | ||
768 | ip->i_chash = __return_address; | ||
769 | ip->i_cprev = __return_address; | ||
770 | ip->i_cnext = __return_address; | ||
771 | } | ||
772 | mutex_spinunlock(&ch->ch_lock, s); | ||
773 | 578 | ||
774 | /* | 579 | /* |
775 | * Remove from mount's inode list. | 580 | * Remove from mount's inode list. |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 41a0c73b601a..c1b917bd5951 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -52,7 +52,7 @@ | |||
52 | 52 | ||
53 | kmem_zone_t *xfs_ifork_zone; | 53 | kmem_zone_t *xfs_ifork_zone; |
54 | kmem_zone_t *xfs_inode_zone; | 54 | kmem_zone_t *xfs_inode_zone; |
55 | kmem_zone_t *xfs_chashlist_zone; | 55 | kmem_zone_t *xfs_icluster_zone; |
56 | 56 | ||
57 | /* | 57 | /* |
58 | * Used in xfs_itruncate(). This is the maximum number of extents | 58 | * Used in xfs_itruncate(). This is the maximum number of extents |
@@ -2182,10 +2182,10 @@ xfs_ifree_cluster( | |||
2182 | int i, j, found, pre_flushed; | 2182 | int i, j, found, pre_flushed; |
2183 | xfs_daddr_t blkno; | 2183 | xfs_daddr_t blkno; |
2184 | xfs_buf_t *bp; | 2184 | xfs_buf_t *bp; |
2185 | xfs_ihash_t *ih; | ||
2186 | xfs_inode_t *ip, **ip_found; | 2185 | xfs_inode_t *ip, **ip_found; |
2187 | xfs_inode_log_item_t *iip; | 2186 | xfs_inode_log_item_t *iip; |
2188 | xfs_log_item_t *lip; | 2187 | xfs_log_item_t *lip; |
2188 | xfs_perag_t *pag = xfs_get_perag(mp, inum); | ||
2189 | SPLDECL(s); | 2189 | SPLDECL(s); |
2190 | 2190 | ||
2191 | if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { | 2191 | if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { |
@@ -2220,23 +2220,20 @@ xfs_ifree_cluster( | |||
2220 | */ | 2220 | */ |
2221 | found = 0; | 2221 | found = 0; |
2222 | for (i = 0; i < ninodes; i++) { | 2222 | for (i = 0; i < ninodes; i++) { |
2223 | ih = XFS_IHASH(mp, inum + i); | 2223 | read_lock(&pag->pag_ici_lock); |
2224 | read_lock(&ih->ih_lock); | 2224 | ip = radix_tree_lookup(&pag->pag_ici_root, |
2225 | for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { | 2225 | XFS_INO_TO_AGINO(mp, (inum + i))); |
2226 | if (ip->i_ino == inum + i) | ||
2227 | break; | ||
2228 | } | ||
2229 | 2226 | ||
2230 | /* Inode not in memory or we found it already, | 2227 | /* Inode not in memory or we found it already, |
2231 | * nothing to do | 2228 | * nothing to do |
2232 | */ | 2229 | */ |
2233 | if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { | 2230 | if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { |
2234 | read_unlock(&ih->ih_lock); | 2231 | read_unlock(&pag->pag_ici_lock); |
2235 | continue; | 2232 | continue; |
2236 | } | 2233 | } |
2237 | 2234 | ||
2238 | if (xfs_inode_clean(ip)) { | 2235 | if (xfs_inode_clean(ip)) { |
2239 | read_unlock(&ih->ih_lock); | 2236 | read_unlock(&pag->pag_ici_lock); |
2240 | continue; | 2237 | continue; |
2241 | } | 2238 | } |
2242 | 2239 | ||
@@ -2259,7 +2256,7 @@ xfs_ifree_cluster( | |||
2259 | ip_found[found++] = ip; | 2256 | ip_found[found++] = ip; |
2260 | } | 2257 | } |
2261 | } | 2258 | } |
2262 | read_unlock(&ih->ih_lock); | 2259 | read_unlock(&pag->pag_ici_lock); |
2263 | continue; | 2260 | continue; |
2264 | } | 2261 | } |
2265 | 2262 | ||
@@ -2277,8 +2274,7 @@ xfs_ifree_cluster( | |||
2277 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 2274 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
2278 | } | 2275 | } |
2279 | } | 2276 | } |
2280 | 2277 | read_unlock(&pag->pag_ici_lock); | |
2281 | read_unlock(&ih->ih_lock); | ||
2282 | } | 2278 | } |
2283 | 2279 | ||
2284 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, | 2280 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, |
@@ -2333,6 +2329,7 @@ xfs_ifree_cluster( | |||
2333 | } | 2329 | } |
2334 | 2330 | ||
2335 | kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *)); | 2331 | kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *)); |
2332 | xfs_put_perag(mp, pag); | ||
2336 | } | 2333 | } |
2337 | 2334 | ||
2338 | /* | 2335 | /* |
@@ -3050,12 +3047,11 @@ xfs_iflush( | |||
3050 | xfs_mount_t *mp; | 3047 | xfs_mount_t *mp; |
3051 | int error; | 3048 | int error; |
3052 | /* REFERENCED */ | 3049 | /* REFERENCED */ |
3053 | xfs_chash_t *ch; | ||
3054 | xfs_inode_t *iq; | 3050 | xfs_inode_t *iq; |
3055 | int clcount; /* count of inodes clustered */ | 3051 | int clcount; /* count of inodes clustered */ |
3056 | int bufwasdelwri; | 3052 | int bufwasdelwri; |
3053 | struct hlist_node *entry; | ||
3057 | enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; | 3054 | enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; |
3058 | SPLDECL(s); | ||
3059 | 3055 | ||
3060 | XFS_STATS_INC(xs_iflush_count); | 3056 | XFS_STATS_INC(xs_iflush_count); |
3061 | 3057 | ||
@@ -3169,14 +3165,14 @@ xfs_iflush( | |||
3169 | * inode clustering: | 3165 | * inode clustering: |
3170 | * see if other inodes can be gathered into this write | 3166 | * see if other inodes can be gathered into this write |
3171 | */ | 3167 | */ |
3172 | 3168 | spin_lock(&ip->i_cluster->icl_lock); | |
3173 | ip->i_chash->chl_buf = bp; | 3169 | ip->i_cluster->icl_buf = bp; |
3174 | |||
3175 | ch = XFS_CHASH(mp, ip->i_blkno); | ||
3176 | s = mutex_spinlock(&ch->ch_lock); | ||
3177 | 3170 | ||
3178 | clcount = 0; | 3171 | clcount = 0; |
3179 | for (iq = ip->i_cnext; iq != ip; iq = iq->i_cnext) { | 3172 | hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) { |
3173 | if (iq == ip) | ||
3174 | continue; | ||
3175 | |||
3180 | /* | 3176 | /* |
3181 | * Do an un-protected check to see if the inode is dirty and | 3177 | * Do an un-protected check to see if the inode is dirty and |
3182 | * is a candidate for flushing. These checks will be repeated | 3178 | * is a candidate for flushing. These checks will be repeated |
@@ -3227,7 +3223,7 @@ xfs_iflush( | |||
3227 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | 3223 | xfs_iunlock(iq, XFS_ILOCK_SHARED); |
3228 | } | 3224 | } |
3229 | } | 3225 | } |
3230 | mutex_spinunlock(&ch->ch_lock, s); | 3226 | spin_unlock(&ip->i_cluster->icl_lock); |
3231 | 3227 | ||
3232 | if (clcount) { | 3228 | if (clcount) { |
3233 | XFS_STATS_INC(xs_icluster_flushcnt); | 3229 | XFS_STATS_INC(xs_icluster_flushcnt); |
@@ -3264,7 +3260,7 @@ cluster_corrupt_out: | |||
3264 | /* Corruption detected in the clustering loop. Invalidate the | 3260 | /* Corruption detected in the clustering loop. Invalidate the |
3265 | * inode buffer and shut down the filesystem. | 3261 | * inode buffer and shut down the filesystem. |
3266 | */ | 3262 | */ |
3267 | mutex_spinunlock(&ch->ch_lock, s); | 3263 | spin_unlock(&ip->i_cluster->icl_lock); |
3268 | 3264 | ||
3269 | /* | 3265 | /* |
3270 | * Clean up the buffer. If it was B_DELWRI, just release it -- | 3266 | * Clean up the buffer. If it was B_DELWRI, just release it -- |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 873b9f783d29..b6dd23d9b3d6 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -172,41 +172,18 @@ typedef struct xfs_iocore { | |||
172 | extern void xfs_iocore_inode_init(struct xfs_inode *); | 172 | extern void xfs_iocore_inode_init(struct xfs_inode *); |
173 | extern void xfs_iocore_inode_reinit(struct xfs_inode *); | 173 | extern void xfs_iocore_inode_reinit(struct xfs_inode *); |
174 | 174 | ||
175 | |||
176 | /* | ||
177 | * This is the type used in the xfs inode hash table. | ||
178 | * An array of these is allocated for each mounted | ||
179 | * file system to hash the inodes for that file system. | ||
180 | */ | ||
181 | typedef struct xfs_ihash { | ||
182 | struct xfs_inode *ih_next; | ||
183 | rwlock_t ih_lock; | ||
184 | uint ih_version; | ||
185 | } xfs_ihash_t; | ||
186 | |||
187 | #define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)(ino)) % (mp)->m_ihsize)) | ||
188 | |||
189 | /* | 175 | /* |
190 | * This is the xfs inode cluster hash. This hash is used by xfs_iflush to | 176 | * This is the xfs inode cluster structure. This structure is used by |
191 | * find inodes that share a cluster and can be flushed to disk at the same | 177 | * xfs_iflush to find inodes that share a cluster and can be flushed to disk at |
192 | * time. | 178 | * the same time. |
193 | */ | 179 | */ |
194 | typedef struct xfs_chashlist { | 180 | typedef struct xfs_icluster { |
195 | struct xfs_chashlist *chl_next; | 181 | struct hlist_head icl_inodes; /* list of inodes on cluster */ |
196 | struct xfs_chashlist *chl_prev; | 182 | xfs_daddr_t icl_blkno; /* starting block number of |
197 | struct xfs_inode *chl_ip; | ||
198 | xfs_daddr_t chl_blkno; /* starting block number of | ||
199 | * the cluster */ | 183 | * the cluster */ |
200 | struct xfs_buf *chl_buf; /* the inode buffer */ | 184 | struct xfs_buf *icl_buf; /* the inode buffer */ |
201 | } xfs_chashlist_t; | 185 | lock_t icl_lock; /* inode list lock */ |
202 | 186 | } xfs_icluster_t; | |
203 | typedef struct xfs_chash { | ||
204 | xfs_chashlist_t *ch_list; | ||
205 | lock_t ch_lock; | ||
206 | } xfs_chash_t; | ||
207 | |||
208 | #define XFS_CHASH(mp,blk) ((mp)->m_chash + (((uint)blk) % (mp)->m_chsize)) | ||
209 | |||
210 | 187 | ||
211 | /* | 188 | /* |
212 | * This is the xfs in-core inode structure. | 189 | * This is the xfs in-core inode structure. |
@@ -269,21 +246,15 @@ typedef struct xfs_icdinode { | |||
269 | } xfs_icdinode_t; | 246 | } xfs_icdinode_t; |
270 | 247 | ||
271 | typedef struct { | 248 | typedef struct { |
272 | struct xfs_ihash *ip_hash; /* pointer to hash header */ | ||
273 | struct xfs_inode *ip_next; /* inode hash link forw */ | ||
274 | struct xfs_inode *ip_mnext; /* next inode in mount list */ | 249 | struct xfs_inode *ip_mnext; /* next inode in mount list */ |
275 | struct xfs_inode *ip_mprev; /* ptr to prev inode */ | 250 | struct xfs_inode *ip_mprev; /* ptr to prev inode */ |
276 | struct xfs_inode **ip_prevp; /* ptr to prev i_next */ | ||
277 | struct xfs_mount *ip_mount; /* fs mount struct ptr */ | 251 | struct xfs_mount *ip_mount; /* fs mount struct ptr */ |
278 | } xfs_iptr_t; | 252 | } xfs_iptr_t; |
279 | 253 | ||
280 | typedef struct xfs_inode { | 254 | typedef struct xfs_inode { |
281 | /* Inode linking and identification information. */ | 255 | /* Inode linking and identification information. */ |
282 | struct xfs_ihash *i_hash; /* pointer to hash header */ | ||
283 | struct xfs_inode *i_next; /* inode hash link forw */ | ||
284 | struct xfs_inode *i_mnext; /* next inode in mount list */ | 256 | struct xfs_inode *i_mnext; /* next inode in mount list */ |
285 | struct xfs_inode *i_mprev; /* ptr to prev inode */ | 257 | struct xfs_inode *i_mprev; /* ptr to prev inode */ |
286 | struct xfs_inode **i_prevp; /* ptr to prev i_next */ | ||
287 | struct xfs_mount *i_mount; /* fs mount struct ptr */ | 258 | struct xfs_mount *i_mount; /* fs mount struct ptr */ |
288 | struct list_head i_reclaim; /* reclaim list */ | 259 | struct list_head i_reclaim; /* reclaim list */ |
289 | struct bhv_desc i_bhv_desc; /* inode behavior descriptor*/ | 260 | struct bhv_desc i_bhv_desc; /* inode behavior descriptor*/ |
@@ -324,9 +295,8 @@ typedef struct xfs_inode { | |||
324 | unsigned int i_delayed_blks; /* count of delay alloc blks */ | 295 | unsigned int i_delayed_blks; /* count of delay alloc blks */ |
325 | 296 | ||
326 | xfs_icdinode_t i_d; /* most of ondisk inode */ | 297 | xfs_icdinode_t i_d; /* most of ondisk inode */ |
327 | xfs_chashlist_t *i_chash; /* cluster hash list header */ | 298 | xfs_icluster_t *i_cluster; /* cluster list header */ |
328 | struct xfs_inode *i_cnext; /* cluster hash link forward */ | 299 | struct hlist_node i_cnode; /* cluster link node */ |
329 | struct xfs_inode *i_cprev; /* cluster hash link backward */ | ||
330 | 300 | ||
331 | xfs_fsize_t i_size; /* in-memory size */ | 301 | xfs_fsize_t i_size; /* in-memory size */ |
332 | /* Trace buffers per inode. */ | 302 | /* Trace buffers per inode. */ |
@@ -521,8 +491,6 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags) | |||
521 | */ | 491 | */ |
522 | void xfs_ihash_init(struct xfs_mount *); | 492 | void xfs_ihash_init(struct xfs_mount *); |
523 | void xfs_ihash_free(struct xfs_mount *); | 493 | void xfs_ihash_free(struct xfs_mount *); |
524 | void xfs_chash_init(struct xfs_mount *); | ||
525 | void xfs_chash_free(struct xfs_mount *); | ||
526 | xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, | 494 | xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, |
527 | struct xfs_trans *); | 495 | struct xfs_trans *); |
528 | void xfs_inode_lock_init(xfs_inode_t *, struct bhv_vnode *); | 496 | void xfs_inode_lock_init(xfs_inode_t *, struct bhv_vnode *); |
@@ -633,7 +601,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); | |||
633 | #define xfs_inobp_check(mp, bp) | 601 | #define xfs_inobp_check(mp, bp) |
634 | #endif /* DEBUG */ | 602 | #endif /* DEBUG */ |
635 | 603 | ||
636 | extern struct kmem_zone *xfs_chashlist_zone; | 604 | extern struct kmem_zone *xfs_icluster_zone; |
637 | extern struct kmem_zone *xfs_ifork_zone; | 605 | extern struct kmem_zone *xfs_ifork_zone; |
638 | extern struct kmem_zone *xfs_inode_zone; | 606 | extern struct kmem_zone *xfs_inode_zone; |
639 | extern struct kmem_zone *xfs_ili_zone; | 607 | extern struct kmem_zone *xfs_ili_zone; |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index f4daf1ec9931..71f25947251d 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -160,11 +160,6 @@ xfs_mount_free( | |||
160 | xfs_mount_t *mp, | 160 | xfs_mount_t *mp, |
161 | int remove_bhv) | 161 | int remove_bhv) |
162 | { | 162 | { |
163 | if (mp->m_ihash) | ||
164 | xfs_ihash_free(mp); | ||
165 | if (mp->m_chash) | ||
166 | xfs_chash_free(mp); | ||
167 | |||
168 | if (mp->m_perag) { | 163 | if (mp->m_perag) { |
169 | int agno; | 164 | int agno; |
170 | 165 | ||
@@ -342,6 +337,17 @@ xfs_mount_validate_sb( | |||
342 | return 0; | 337 | return 0; |
343 | } | 338 | } |
344 | 339 | ||
340 | STATIC void | ||
341 | xfs_initialize_perag_icache( | ||
342 | xfs_perag_t *pag) | ||
343 | { | ||
344 | if (!pag->pag_ici_init) { | ||
345 | rwlock_init(&pag->pag_ici_lock); | ||
346 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); | ||
347 | pag->pag_ici_init = 1; | ||
348 | } | ||
349 | } | ||
350 | |||
345 | xfs_agnumber_t | 351 | xfs_agnumber_t |
346 | xfs_initialize_perag( | 352 | xfs_initialize_perag( |
347 | bhv_vfs_t *vfs, | 353 | bhv_vfs_t *vfs, |
@@ -396,12 +402,14 @@ xfs_initialize_perag( | |||
396 | pag->pagi_inodeok = 1; | 402 | pag->pagi_inodeok = 1; |
397 | if (index < max_metadata) | 403 | if (index < max_metadata) |
398 | pag->pagf_metadata = 1; | 404 | pag->pagf_metadata = 1; |
405 | xfs_initialize_perag_icache(pag); | ||
399 | } | 406 | } |
400 | } else { | 407 | } else { |
401 | /* Setup default behavior for smaller filesystems */ | 408 | /* Setup default behavior for smaller filesystems */ |
402 | for (index = 0; index < agcount; index++) { | 409 | for (index = 0; index < agcount; index++) { |
403 | pag = &mp->m_perag[index]; | 410 | pag = &mp->m_perag[index]; |
404 | pag->pagi_inodeok = 1; | 411 | pag->pagi_inodeok = 1; |
412 | xfs_initialize_perag_icache(pag); | ||
405 | } | 413 | } |
406 | } | 414 | } |
407 | return index; | 415 | return index; |
@@ -1033,13 +1041,6 @@ xfs_mountfs( | |||
1033 | xfs_trans_init(mp); | 1041 | xfs_trans_init(mp); |
1034 | 1042 | ||
1035 | /* | 1043 | /* |
1036 | * Allocate and initialize the inode hash table for this | ||
1037 | * file system. | ||
1038 | */ | ||
1039 | xfs_ihash_init(mp); | ||
1040 | xfs_chash_init(mp); | ||
1041 | |||
1042 | /* | ||
1043 | * Allocate and initialize the per-ag data. | 1044 | * Allocate and initialize the per-ag data. |
1044 | */ | 1045 | */ |
1045 | init_rwsem(&mp->m_peraglock); | 1046 | init_rwsem(&mp->m_peraglock); |
@@ -1190,8 +1191,6 @@ xfs_mountfs( | |||
1190 | error3: | 1191 | error3: |
1191 | xfs_log_unmount_dealloc(mp); | 1192 | xfs_log_unmount_dealloc(mp); |
1192 | error2: | 1193 | error2: |
1193 | xfs_ihash_free(mp); | ||
1194 | xfs_chash_free(mp); | ||
1195 | for (agno = 0; agno < sbp->sb_agcount; agno++) | 1194 | for (agno = 0; agno < sbp->sb_agcount; agno++) |
1196 | if (mp->m_perag[agno].pagb_list) | 1195 | if (mp->m_perag[agno].pagb_list) |
1197 | kmem_free(mp->m_perag[agno].pagb_list, | 1196 | kmem_free(mp->m_perag[agno].pagb_list, |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 9ceff40326d0..bc23cb407701 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -57,10 +57,7 @@ struct log; | |||
57 | struct bhv_vfs; | 57 | struct bhv_vfs; |
58 | struct bhv_vnode; | 58 | struct bhv_vnode; |
59 | struct xfs_mount_args; | 59 | struct xfs_mount_args; |
60 | struct xfs_ihash; | ||
61 | struct xfs_chash; | ||
62 | struct xfs_inode; | 60 | struct xfs_inode; |
63 | struct xfs_perag; | ||
64 | struct xfs_iocore; | 61 | struct xfs_iocore; |
65 | struct xfs_bmbt_irec; | 62 | struct xfs_bmbt_irec; |
66 | struct xfs_bmap_free; | 63 | struct xfs_bmap_free; |
@@ -335,8 +332,6 @@ typedef struct xfs_mount { | |||
335 | xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ | 332 | xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ |
336 | lock_t m_agirotor_lock;/* .. and lock protecting it */ | 333 | lock_t m_agirotor_lock;/* .. and lock protecting it */ |
337 | xfs_agnumber_t m_maxagi; /* highest inode alloc group */ | 334 | xfs_agnumber_t m_maxagi; /* highest inode alloc group */ |
338 | size_t m_ihsize; /* size of next field */ | ||
339 | struct xfs_ihash *m_ihash; /* fs private inode hash table*/ | ||
340 | struct xfs_inode *m_inodes; /* active inode list */ | 335 | struct xfs_inode *m_inodes; /* active inode list */ |
341 | struct list_head m_del_inodes; /* inodes to reclaim */ | 336 | struct list_head m_del_inodes; /* inodes to reclaim */ |
342 | mutex_t m_ilock; /* inode list mutex */ | 337 | mutex_t m_ilock; /* inode list mutex */ |
@@ -458,7 +453,7 @@ typedef struct xfs_mount { | |||
458 | #define XFS_MOUNT_IDELETE (1ULL << 18) /* delete empty inode clusters*/ | 453 | #define XFS_MOUNT_IDELETE (1ULL << 18) /* delete empty inode clusters*/ |
459 | #define XFS_MOUNT_SWALLOC (1ULL << 19) /* turn on stripe width | 454 | #define XFS_MOUNT_SWALLOC (1ULL << 19) /* turn on stripe width |
460 | * allocation */ | 455 | * allocation */ |
461 | #define XFS_MOUNT_IHASHSIZE (1ULL << 20) /* inode hash table size */ | 456 | /* (1ULL << 20) -- currently unused */ |
462 | #define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */ | 457 | #define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */ |
463 | #define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred | 458 | #define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred |
464 | * I/O size in stat() */ | 459 | * I/O size in stat() */ |
@@ -572,6 +567,21 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) | |||
572 | } | 567 | } |
573 | 568 | ||
574 | /* | 569 | /* |
570 | * perag get/put wrappers for eventual ref counting | ||
571 | */ | ||
572 | static inline xfs_perag_t * | ||
573 | xfs_get_perag(struct xfs_mount *mp, xfs_ino_t ino) | ||
574 | { | ||
575 | return &mp->m_perag[XFS_INO_TO_AGNO(mp, ino)]; | ||
576 | } | ||
577 | |||
578 | static inline void | ||
579 | xfs_put_perag(struct xfs_mount *mp, xfs_perag_t *pag) | ||
580 | { | ||
581 | /* nothing to see here, move along */ | ||
582 | } | ||
583 | |||
584 | /* | ||
575 | * Per-cpu superblock locking functions | 585 | * Per-cpu superblock locking functions |
576 | */ | 586 | */ |
577 | #ifdef HAVE_PERCPU_SB | 587 | #ifdef HAVE_PERCPU_SB |
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 53d9600af4a4..187318e8d259 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
23 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | ||
25 | #include "xfs_dir2.h" | 26 | #include "xfs_dir2.h" |
26 | #include "xfs_dmapi.h" | 27 | #include "xfs_dmapi.h" |
27 | #include "xfs_mount.h" | 28 | #include "xfs_mount.h" |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index ceb4f6e99960..5b2ff59f19cf 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
23 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | ||
25 | #include "xfs_dmapi.h" | 26 | #include "xfs_dmapi.h" |
26 | #include "xfs_mount.h" | 27 | #include "xfs_mount.h" |
27 | #include "xfs_trans_priv.h" | 28 | #include "xfs_trans_priv.h" |
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index b290270dd4a6..27cce2a9c7e9 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "xfs_inum.h" | 22 | #include "xfs_inum.h" |
23 | #include "xfs_trans.h" | 23 | #include "xfs_trans.h" |
24 | #include "xfs_sb.h" | 24 | #include "xfs_sb.h" |
25 | #include "xfs_ag.h" | ||
25 | #include "xfs_dmapi.h" | 26 | #include "xfs_dmapi.h" |
26 | #include "xfs_mount.h" | 27 | #include "xfs_mount.h" |
27 | #include "xfs_trans_priv.h" | 28 | #include "xfs_trans_priv.h" |
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 4a27648b5446..1644be14a144 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c | |||
@@ -117,8 +117,8 @@ xfs_init(void) | |||
117 | xfs_ili_zone = | 117 | xfs_ili_zone = |
118 | kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", | 118 | kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", |
119 | KM_ZONE_SPREAD, NULL); | 119 | KM_ZONE_SPREAD, NULL); |
120 | xfs_chashlist_zone = | 120 | xfs_icluster_zone = |
121 | kmem_zone_init_flags(sizeof(xfs_chashlist_t), "xfs_chashlist", | 121 | kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster", |
122 | KM_ZONE_SPREAD, NULL); | 122 | KM_ZONE_SPREAD, NULL); |
123 | 123 | ||
124 | /* | 124 | /* |
@@ -163,7 +163,7 @@ xfs_cleanup(void) | |||
163 | extern kmem_zone_t *xfs_efd_zone; | 163 | extern kmem_zone_t *xfs_efd_zone; |
164 | extern kmem_zone_t *xfs_efi_zone; | 164 | extern kmem_zone_t *xfs_efi_zone; |
165 | extern kmem_zone_t *xfs_buf_item_zone; | 165 | extern kmem_zone_t *xfs_buf_item_zone; |
166 | extern kmem_zone_t *xfs_chashlist_zone; | 166 | extern kmem_zone_t *xfs_icluster_zone; |
167 | 167 | ||
168 | xfs_cleanup_procfs(); | 168 | xfs_cleanup_procfs(); |
169 | xfs_sysctl_unregister(); | 169 | xfs_sysctl_unregister(); |
@@ -199,7 +199,7 @@ xfs_cleanup(void) | |||
199 | kmem_zone_destroy(xfs_efi_zone); | 199 | kmem_zone_destroy(xfs_efi_zone); |
200 | kmem_zone_destroy(xfs_ifork_zone); | 200 | kmem_zone_destroy(xfs_ifork_zone); |
201 | kmem_zone_destroy(xfs_ili_zone); | 201 | kmem_zone_destroy(xfs_ili_zone); |
202 | kmem_zone_destroy(xfs_chashlist_zone); | 202 | kmem_zone_destroy(xfs_icluster_zone); |
203 | } | 203 | } |
204 | 204 | ||
205 | /* | 205 | /* |
@@ -246,7 +246,6 @@ xfs_start_flags( | |||
246 | ap->logbufsize); | 246 | ap->logbufsize); |
247 | return XFS_ERROR(EINVAL); | 247 | return XFS_ERROR(EINVAL); |
248 | } | 248 | } |
249 | mp->m_ihsize = ap->ihashsize; | ||
250 | mp->m_logbsize = ap->logbufsize; | 249 | mp->m_logbsize = ap->logbufsize; |
251 | mp->m_fsname_len = strlen(ap->fsname) + 1; | 250 | mp->m_fsname_len = strlen(ap->fsname) + 1; |
252 | mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP); | 251 | mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP); |
@@ -293,8 +292,6 @@ xfs_start_flags( | |||
293 | mp->m_readio_log = mp->m_writeio_log = ap->iosizelog; | 292 | mp->m_readio_log = mp->m_writeio_log = ap->iosizelog; |
294 | } | 293 | } |
295 | 294 | ||
296 | if (ap->flags & XFSMNT_IHASHSIZE) | ||
297 | mp->m_flags |= XFS_MOUNT_IHASHSIZE; | ||
298 | if (ap->flags & XFSMNT_IDELETE) | 295 | if (ap->flags & XFSMNT_IDELETE) |
299 | mp->m_flags |= XFS_MOUNT_IDELETE; | 296 | mp->m_flags |= XFS_MOUNT_IDELETE; |
300 | if (ap->flags & XFSMNT_DIRSYNC) | 297 | if (ap->flags & XFSMNT_DIRSYNC) |
@@ -1673,7 +1670,6 @@ xfs_vget( | |||
1673 | #define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ | 1670 | #define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ |
1674 | #define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ | 1671 | #define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ |
1675 | #define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ | 1672 | #define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ |
1676 | #define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */ | ||
1677 | #define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ | 1673 | #define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ |
1678 | #define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and | 1674 | #define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and |
1679 | * unwritten extent conversion */ | 1675 | * unwritten extent conversion */ |
@@ -1799,15 +1795,6 @@ xfs_parseargs( | |||
1799 | iosize = suffix_strtoul(value, &eov, 10); | 1795 | iosize = suffix_strtoul(value, &eov, 10); |
1800 | args->flags |= XFSMNT_IOSIZE; | 1796 | args->flags |= XFSMNT_IOSIZE; |
1801 | args->iosizelog = ffs(iosize) - 1; | 1797 | args->iosizelog = ffs(iosize) - 1; |
1802 | } else if (!strcmp(this_char, MNTOPT_IHASHSIZE)) { | ||
1803 | if (!value || !*value) { | ||
1804 | cmn_err(CE_WARN, | ||
1805 | "XFS: %s option requires an argument", | ||
1806 | this_char); | ||
1807 | return EINVAL; | ||
1808 | } | ||
1809 | args->flags |= XFSMNT_IHASHSIZE; | ||
1810 | args->ihashsize = simple_strtoul(value, &eov, 10); | ||
1811 | } else if (!strcmp(this_char, MNTOPT_GRPID) || | 1798 | } else if (!strcmp(this_char, MNTOPT_GRPID) || |
1812 | !strcmp(this_char, MNTOPT_BSDGROUPS)) { | 1799 | !strcmp(this_char, MNTOPT_BSDGROUPS)) { |
1813 | vfsp->vfs_flag |= VFS_GRPID; | 1800 | vfsp->vfs_flag |= VFS_GRPID; |
@@ -1876,6 +1863,9 @@ xfs_parseargs( | |||
1876 | args->flags &= ~XFSMNT_ATTR2; | 1863 | args->flags &= ~XFSMNT_ATTR2; |
1877 | } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { | 1864 | } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { |
1878 | args->flags2 |= XFSMNT2_FILESTREAMS; | 1865 | args->flags2 |= XFSMNT2_FILESTREAMS; |
1866 | } else if (!strcmp(this_char, "ihashsize")) { | ||
1867 | cmn_err(CE_WARN, | ||
1868 | "XFS: ihashsize no longer used, option is deprecated."); | ||
1879 | } else if (!strcmp(this_char, "osyncisdsync")) { | 1869 | } else if (!strcmp(this_char, "osyncisdsync")) { |
1880 | /* no-op, this is now the default */ | 1870 | /* no-op, this is now the default */ |
1881 | cmn_err(CE_WARN, | 1871 | cmn_err(CE_WARN, |
@@ -1966,9 +1956,6 @@ xfs_showargs( | |||
1966 | seq_puts(m, xfs_infop->str); | 1956 | seq_puts(m, xfs_infop->str); |
1967 | } | 1957 | } |
1968 | 1958 | ||
1969 | if (mp->m_flags & XFS_MOUNT_IHASHSIZE) | ||
1970 | seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", (int)mp->m_ihsize); | ||
1971 | |||
1972 | if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) | 1959 | if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) |
1973 | seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", | 1960 | seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", |
1974 | (int)(1 << mp->m_writeio_log) >> 10); | 1961 | (int)(1 << mp->m_writeio_log) >> 10); |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index bde4a1ad90f2..15bc01b2d6a0 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -3876,7 +3876,7 @@ xfs_finish_reclaim( | |||
3876 | int locked, | 3876 | int locked, |
3877 | int sync_mode) | 3877 | int sync_mode) |
3878 | { | 3878 | { |
3879 | xfs_ihash_t *ih = ip->i_hash; | 3879 | xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); |
3880 | bhv_vnode_t *vp = XFS_ITOV_NULL(ip); | 3880 | bhv_vnode_t *vp = XFS_ITOV_NULL(ip); |
3881 | int error; | 3881 | int error; |
3882 | 3882 | ||
@@ -3888,12 +3888,12 @@ xfs_finish_reclaim( | |||
3888 | * Once we have the XFS_IRECLAIM flag set it will not touch | 3888 | * Once we have the XFS_IRECLAIM flag set it will not touch |
3889 | * us. | 3889 | * us. |
3890 | */ | 3890 | */ |
3891 | write_lock(&ih->ih_lock); | 3891 | write_lock(&pag->pag_ici_lock); |
3892 | spin_lock(&ip->i_flags_lock); | 3892 | spin_lock(&ip->i_flags_lock); |
3893 | if (__xfs_iflags_test(ip, XFS_IRECLAIM) || | 3893 | if (__xfs_iflags_test(ip, XFS_IRECLAIM) || |
3894 | (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) { | 3894 | (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) { |
3895 | spin_unlock(&ip->i_flags_lock); | 3895 | spin_unlock(&ip->i_flags_lock); |
3896 | write_unlock(&ih->ih_lock); | 3896 | write_unlock(&pag->pag_ici_lock); |
3897 | if (locked) { | 3897 | if (locked) { |
3898 | xfs_ifunlock(ip); | 3898 | xfs_ifunlock(ip); |
3899 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 3899 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
@@ -3902,7 +3902,8 @@ xfs_finish_reclaim( | |||
3902 | } | 3902 | } |
3903 | __xfs_iflags_set(ip, XFS_IRECLAIM); | 3903 | __xfs_iflags_set(ip, XFS_IRECLAIM); |
3904 | spin_unlock(&ip->i_flags_lock); | 3904 | spin_unlock(&ip->i_flags_lock); |
3905 | write_unlock(&ih->ih_lock); | 3905 | write_unlock(&pag->pag_ici_lock); |
3906 | xfs_put_perag(ip->i_mount, pag); | ||
3906 | 3907 | ||
3907 | /* | 3908 | /* |
3908 | * If the inode is still dirty, then flush it out. If the inode | 3909 | * If the inode is still dirty, then flush it out. If the inode |