diff options
-rw-r--r-- | fs/xfs/xfs_iget.c | 348 |
1 files changed, 191 insertions, 157 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 58865fe47806..b2539b17c954 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -40,161 +40,119 @@ | |||
40 | #include "xfs_utils.h" | 40 | #include "xfs_utils.h" |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * Look up an inode by number in the given file system. | 43 | * Check the validity of the inode we just found it the cache |
44 | * The inode is looked up in the cache held in each AG. | ||
45 | * If the inode is found in the cache, attach it to the provided | ||
46 | * vnode. | ||
47 | * | ||
48 | * If it is not in core, read it in from the file system's device, | ||
49 | * add it to the cache and attach the provided vnode. | ||
50 | * | ||
51 | * The inode is locked according to the value of the lock_flags parameter. | ||
52 | * This flag parameter indicates how and if the inode's IO lock and inode lock | ||
53 | * should be taken. | ||
54 | * | ||
55 | * mp -- the mount point structure for the current file system. It points | ||
56 | * to the inode hash table. | ||
57 | * tp -- a pointer to the current transaction if there is one. This is | ||
58 | * simply passed through to the xfs_iread() call. | ||
59 | * ino -- the number of the inode desired. This is the unique identifier | ||
60 | * within the file system for the inode being requested. | ||
61 | * lock_flags -- flags indicating how to lock the inode. See the comment | ||
62 | * for xfs_ilock() for a list of valid values. | ||
63 | * bno -- the block number starting the buffer containing the inode, | ||
64 | * if known (as by bulkstat), else 0. | ||
65 | */ | 44 | */ |
66 | STATIC int | 45 | static int |
67 | xfs_iget_core( | 46 | xfs_iget_cache_hit( |
68 | struct inode *inode, | 47 | struct inode *inode, |
69 | xfs_mount_t *mp, | 48 | struct xfs_perag *pag, |
70 | xfs_trans_t *tp, | 49 | struct xfs_inode *ip, |
71 | xfs_ino_t ino, | 50 | int flags, |
72 | uint flags, | 51 | int lock_flags) __releases(pag->pag_ici_lock) |
73 | uint lock_flags, | ||
74 | xfs_inode_t **ipp, | ||
75 | xfs_daddr_t bno) | ||
76 | { | 52 | { |
77 | struct inode *old_inode; | 53 | struct xfs_mount *mp = ip->i_mount; |
78 | xfs_inode_t *ip; | 54 | struct inode *old_inode; |
79 | int error; | 55 | int error = 0; |
80 | unsigned long first_index, mask; | ||
81 | xfs_perag_t *pag; | ||
82 | xfs_agino_t agino; | ||
83 | 56 | ||
84 | /* the radix tree exists only in inode capable AGs */ | 57 | /* |
85 | if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) | 58 | * If INEW is set this inode is being set up |
86 | return EINVAL; | 59 | * Pause and try again. |
87 | 60 | */ | |
88 | /* get the perag structure and ensure that it's inode capable */ | 61 | if (xfs_iflags_test(ip, XFS_INEW)) { |
89 | pag = xfs_get_perag(mp, ino); | 62 | error = EAGAIN; |
90 | if (!pag->pagi_inodeok) | 63 | XFS_STATS_INC(xs_ig_frecycle); |
91 | return EINVAL; | 64 | goto out_error; |
92 | ASSERT(pag->pag_ici_init); | 65 | } |
93 | agino = XFS_INO_TO_AGINO(mp, ino); | ||
94 | |||
95 | again: | ||
96 | read_lock(&pag->pag_ici_lock); | ||
97 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | ||
98 | 66 | ||
99 | if (ip != NULL) { | 67 | old_inode = ip->i_vnode; |
68 | if (old_inode == NULL) { | ||
100 | /* | 69 | /* |
101 | * If INEW is set this inode is being set up | 70 | * If IRECLAIM is set this inode is |
71 | * on its way out of the system, | ||
102 | * we need to pause and try again. | 72 | * we need to pause and try again. |
103 | */ | 73 | */ |
104 | if (xfs_iflags_test(ip, XFS_INEW)) { | 74 | if (xfs_iflags_test(ip, XFS_IRECLAIM)) { |
105 | read_unlock(&pag->pag_ici_lock); | 75 | error = EAGAIN; |
106 | delay(1); | ||
107 | XFS_STATS_INC(xs_ig_frecycle); | 76 | XFS_STATS_INC(xs_ig_frecycle); |
77 | goto out_error; | ||
78 | } | ||
79 | ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | ||
108 | 80 | ||
109 | goto again; | 81 | /* |
82 | * If lookup is racing with unlink, then we | ||
83 | * should return an error immediately so we | ||
84 | * don't remove it from the reclaim list and | ||
85 | * potentially leak the inode. | ||
86 | */ | ||
87 | if ((ip->i_d.di_mode == 0) && | ||
88 | !(flags & XFS_IGET_CREATE)) { | ||
89 | error = ENOENT; | ||
90 | goto out_error; | ||
110 | } | 91 | } |
92 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); | ||
111 | 93 | ||
112 | old_inode = ip->i_vnode; | 94 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); |
113 | if (old_inode == NULL) { | 95 | read_unlock(&pag->pag_ici_lock); |
114 | /* | 96 | |
115 | * If IRECLAIM is set this inode is | 97 | XFS_MOUNT_ILOCK(mp); |
116 | * on its way out of the system, | 98 | list_del_init(&ip->i_reclaim); |
117 | * we need to pause and try again. | 99 | XFS_MOUNT_IUNLOCK(mp); |
118 | */ | 100 | |
119 | if (xfs_iflags_test(ip, XFS_IRECLAIM)) { | 101 | } else if (inode != old_inode) { |
120 | read_unlock(&pag->pag_ici_lock); | 102 | /* The inode is being torn down, pause and |
121 | delay(1); | 103 | * try again. |
122 | XFS_STATS_INC(xs_ig_frecycle); | 104 | */ |
123 | 105 | if (old_inode->i_state & (I_FREEING | I_CLEAR)) { | |
124 | goto again; | 106 | error = EAGAIN; |
125 | } | 107 | XFS_STATS_INC(xs_ig_frecycle); |
126 | ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | 108 | goto out_error; |
127 | 109 | } | |
128 | /* | ||
129 | * If lookup is racing with unlink, then we | ||
130 | * should return an error immediately so we | ||
131 | * don't remove it from the reclaim list and | ||
132 | * potentially leak the inode. | ||
133 | */ | ||
134 | if ((ip->i_d.di_mode == 0) && | ||
135 | !(flags & XFS_IGET_CREATE)) { | ||
136 | read_unlock(&pag->pag_ici_lock); | ||
137 | xfs_put_perag(mp, pag); | ||
138 | return ENOENT; | ||
139 | } | ||
140 | |||
141 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); | ||
142 | |||
143 | XFS_STATS_INC(xs_ig_found); | ||
144 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); | ||
145 | read_unlock(&pag->pag_ici_lock); | ||
146 | |||
147 | XFS_MOUNT_ILOCK(mp); | ||
148 | list_del_init(&ip->i_reclaim); | ||
149 | XFS_MOUNT_IUNLOCK(mp); | ||
150 | |||
151 | goto finish_inode; | ||
152 | |||
153 | } else if (inode != old_inode) { | ||
154 | /* The inode is being torn down, pause and | ||
155 | * try again. | ||
156 | */ | ||
157 | if (old_inode->i_state & (I_FREEING | I_CLEAR)) { | ||
158 | read_unlock(&pag->pag_ici_lock); | ||
159 | delay(1); | ||
160 | XFS_STATS_INC(xs_ig_frecycle); | ||
161 | |||
162 | goto again; | ||
163 | } | ||
164 | /* Chances are the other vnode (the one in the inode) is being torn | 110 | /* Chances are the other vnode (the one in the inode) is being torn |
165 | * down right now, and we landed on top of it. Question is, what do | 111 | * down right now, and we landed on top of it. Question is, what do |
166 | * we do? Unhook the old inode and hook up the new one? | 112 | * we do? Unhook the old inode and hook up the new one? |
167 | */ | 113 | */ |
168 | cmn_err(CE_PANIC, | 114 | cmn_err(CE_PANIC, |
169 | "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", | 115 | "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", |
170 | old_inode, inode); | 116 | old_inode, inode); |
171 | } | 117 | } else { |
172 | |||
173 | /* | ||
174 | * Inode cache hit | ||
175 | */ | ||
176 | read_unlock(&pag->pag_ici_lock); | 118 | read_unlock(&pag->pag_ici_lock); |
177 | XFS_STATS_INC(xs_ig_found); | 119 | } |
178 | 120 | ||
179 | finish_inode: | 121 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { |
180 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { | 122 | error = ENOENT; |
181 | xfs_put_perag(mp, pag); | 123 | goto out; |
182 | return ENOENT; | 124 | } |
183 | } | ||
184 | 125 | ||
185 | if (lock_flags != 0) | 126 | if (lock_flags != 0) |
186 | xfs_ilock(ip, lock_flags); | 127 | xfs_ilock(ip, lock_flags); |
187 | 128 | ||
188 | xfs_iflags_clear(ip, XFS_ISTALE); | 129 | xfs_iflags_clear(ip, XFS_ISTALE); |
189 | xfs_itrace_exit_tag(ip, "xfs_iget.found"); | 130 | xfs_itrace_exit_tag(ip, "xfs_iget.found"); |
190 | goto return_ip; | 131 | XFS_STATS_INC(xs_ig_found); |
191 | } | 132 | return 0; |
192 | 133 | ||
193 | /* | 134 | out_error: |
194 | * Inode cache miss | ||
195 | */ | ||
196 | read_unlock(&pag->pag_ici_lock); | 135 | read_unlock(&pag->pag_ici_lock); |
197 | XFS_STATS_INC(xs_ig_missed); | 136 | out: |
137 | return error; | ||
138 | } | ||
139 | |||
140 | |||
141 | static int | ||
142 | xfs_iget_cache_miss( | ||
143 | struct xfs_mount *mp, | ||
144 | struct xfs_perag *pag, | ||
145 | xfs_trans_t *tp, | ||
146 | xfs_ino_t ino, | ||
147 | struct xfs_inode **ipp, | ||
148 | xfs_daddr_t bno, | ||
149 | int flags, | ||
150 | int lock_flags) __releases(pag->pag_ici_lock) | ||
151 | { | ||
152 | struct xfs_inode *ip; | ||
153 | int error; | ||
154 | unsigned long first_index, mask; | ||
155 | xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); | ||
198 | 156 | ||
199 | /* | 157 | /* |
200 | * Read the disk inode attributes into a new inode structure and get | 158 | * Read the disk inode attributes into a new inode structure and get |
@@ -202,17 +160,14 @@ finish_inode: | |||
202 | */ | 160 | */ |
203 | error = xfs_iread(mp, tp, ino, &ip, bno, | 161 | error = xfs_iread(mp, tp, ino, &ip, bno, |
204 | (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0); | 162 | (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0); |
205 | if (error) { | 163 | if (error) |
206 | xfs_put_perag(mp, pag); | ||
207 | return error; | 164 | return error; |
208 | } | ||
209 | 165 | ||
210 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); | 166 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); |
211 | 167 | ||
212 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | 168 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { |
213 | xfs_idestroy(ip); | 169 | error = ENOENT; |
214 | xfs_put_perag(mp, pag); | 170 | goto out_destroy; |
215 | return ENOENT; | ||
216 | } | 171 | } |
217 | 172 | ||
218 | /* | 173 | /* |
@@ -220,9 +175,8 @@ finish_inode: | |||
220 | * write spinlock. | 175 | * write spinlock. |
221 | */ | 176 | */ |
222 | if (radix_tree_preload(GFP_KERNEL)) { | 177 | if (radix_tree_preload(GFP_KERNEL)) { |
223 | xfs_idestroy(ip); | 178 | error = EAGAIN; |
224 | delay(1); | 179 | goto out_destroy; |
225 | goto again; | ||
226 | } | 180 | } |
227 | 181 | ||
228 | if (lock_flags) | 182 | if (lock_flags) |
@@ -231,32 +185,104 @@ finish_inode: | |||
231 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | 185 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); |
232 | first_index = agino & mask; | 186 | first_index = agino & mask; |
233 | write_lock(&pag->pag_ici_lock); | 187 | write_lock(&pag->pag_ici_lock); |
234 | /* | 188 | |
235 | * insert the new inode | 189 | /* insert the new inode */ |
236 | */ | ||
237 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); | 190 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); |
238 | if (unlikely(error)) { | 191 | if (unlikely(error)) { |
239 | BUG_ON(error != -EEXIST); | 192 | WARN_ON(error != -EEXIST); |
240 | write_unlock(&pag->pag_ici_lock); | ||
241 | radix_tree_preload_end(); | ||
242 | if (lock_flags) | ||
243 | xfs_iunlock(ip, lock_flags); | ||
244 | xfs_idestroy(ip); | ||
245 | XFS_STATS_INC(xs_ig_dup); | 193 | XFS_STATS_INC(xs_ig_dup); |
246 | goto again; | 194 | error = EAGAIN; |
195 | goto out_unlock; | ||
247 | } | 196 | } |
248 | 197 | ||
249 | /* | 198 | /* These values _must_ be set before releasing the radix tree lock! */ |
250 | * These values _must_ be set before releasing the radix tree lock! | ||
251 | */ | ||
252 | ip->i_udquot = ip->i_gdquot = NULL; | 199 | ip->i_udquot = ip->i_gdquot = NULL; |
253 | xfs_iflags_set(ip, XFS_INEW); | 200 | xfs_iflags_set(ip, XFS_INEW); |
254 | 201 | ||
255 | write_unlock(&pag->pag_ici_lock); | 202 | write_unlock(&pag->pag_ici_lock); |
256 | radix_tree_preload_end(); | 203 | radix_tree_preload_end(); |
204 | *ipp = ip; | ||
205 | return 0; | ||
206 | |||
207 | out_unlock: | ||
208 | write_unlock(&pag->pag_ici_lock); | ||
209 | radix_tree_preload_end(); | ||
210 | out_destroy: | ||
211 | xfs_idestroy(ip); | ||
212 | return error; | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * Look up an inode by number in the given file system. | ||
217 | * The inode is looked up in the cache held in each AG. | ||
218 | * If the inode is found in the cache, attach it to the provided | ||
219 | * vnode. | ||
220 | * | ||
221 | * If it is not in core, read it in from the file system's device, | ||
222 | * add it to the cache and attach the provided vnode. | ||
223 | * | ||
224 | * The inode is locked according to the value of the lock_flags parameter. | ||
225 | * This flag parameter indicates how and if the inode's IO lock and inode lock | ||
226 | * should be taken. | ||
227 | * | ||
228 | * mp -- the mount point structure for the current file system. It points | ||
229 | * to the inode hash table. | ||
230 | * tp -- a pointer to the current transaction if there is one. This is | ||
231 | * simply passed through to the xfs_iread() call. | ||
232 | * ino -- the number of the inode desired. This is the unique identifier | ||
233 | * within the file system for the inode being requested. | ||
234 | * lock_flags -- flags indicating how to lock the inode. See the comment | ||
235 | * for xfs_ilock() for a list of valid values. | ||
236 | * bno -- the block number starting the buffer containing the inode, | ||
237 | * if known (as by bulkstat), else 0. | ||
238 | */ | ||
239 | STATIC int | ||
240 | xfs_iget_core( | ||
241 | struct inode *inode, | ||
242 | xfs_mount_t *mp, | ||
243 | xfs_trans_t *tp, | ||
244 | xfs_ino_t ino, | ||
245 | uint flags, | ||
246 | uint lock_flags, | ||
247 | xfs_inode_t **ipp, | ||
248 | xfs_daddr_t bno) | ||
249 | { | ||
250 | xfs_inode_t *ip; | ||
251 | int error; | ||
252 | xfs_perag_t *pag; | ||
253 | xfs_agino_t agino; | ||
254 | |||
255 | /* the radix tree exists only in inode capable AGs */ | ||
256 | if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) | ||
257 | return EINVAL; | ||
258 | |||
259 | /* get the perag structure and ensure that it's inode capable */ | ||
260 | pag = xfs_get_perag(mp, ino); | ||
261 | if (!pag->pagi_inodeok) | ||
262 | return EINVAL; | ||
263 | ASSERT(pag->pag_ici_init); | ||
264 | agino = XFS_INO_TO_AGINO(mp, ino); | ||
265 | |||
266 | again: | ||
267 | error = 0; | ||
268 | read_lock(&pag->pag_ici_lock); | ||
269 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | ||
270 | |||
271 | if (ip) { | ||
272 | error = xfs_iget_cache_hit(inode, pag, ip, flags, lock_flags); | ||
273 | if (error) | ||
274 | goto out_error_or_again; | ||
275 | } else { | ||
276 | read_unlock(&pag->pag_ici_lock); | ||
277 | XFS_STATS_INC(xs_ig_missed); | ||
278 | |||
279 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno, | ||
280 | flags, lock_flags); | ||
281 | if (error) | ||
282 | goto out_error_or_again; | ||
283 | } | ||
257 | xfs_put_perag(mp, pag); | 284 | xfs_put_perag(mp, pag); |
258 | 285 | ||
259 | return_ip: | ||
260 | ASSERT(ip->i_df.if_ext_max == | 286 | ASSERT(ip->i_df.if_ext_max == |
261 | XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); | 287 | XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); |
262 | 288 | ||
@@ -276,6 +302,14 @@ finish_inode: | |||
276 | if (ip->i_d.di_mode != 0) | 302 | if (ip->i_d.di_mode != 0) |
277 | xfs_setup_inode(ip); | 303 | xfs_setup_inode(ip); |
278 | return 0; | 304 | return 0; |
305 | |||
306 | out_error_or_again: | ||
307 | if (error == EAGAIN) { | ||
308 | delay(1); | ||
309 | goto again; | ||
310 | } | ||
311 | xfs_put_perag(mp, pag); | ||
312 | return error; | ||
279 | } | 313 | } |
280 | 314 | ||
281 | 315 | ||