diff options
Diffstat (limited to 'fs/xfs/xfs_iget.c')
-rw-r--r-- | fs/xfs/xfs_iget.c | 735 |
1 files changed, 399 insertions, 336 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index e229e9e001c2..e2fb6210d4c5 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -38,281 +38,283 @@ | |||
38 | #include "xfs_ialloc.h" | 38 | #include "xfs_ialloc.h" |
39 | #include "xfs_quota.h" | 39 | #include "xfs_quota.h" |
40 | #include "xfs_utils.h" | 40 | #include "xfs_utils.h" |
41 | #include "xfs_trans_priv.h" | ||
42 | #include "xfs_inode_item.h" | ||
43 | #include "xfs_bmap.h" | ||
44 | #include "xfs_btree_trace.h" | ||
45 | #include "xfs_dir2_trace.h" | ||
46 | |||
41 | 47 | ||
42 | /* | 48 | /* |
43 | * Look up an inode by number in the given file system. | 49 | * Allocate and initialise an xfs_inode. |
44 | * The inode is looked up in the cache held in each AG. | ||
45 | * If the inode is found in the cache, attach it to the provided | ||
46 | * vnode. | ||
47 | * | ||
48 | * If it is not in core, read it in from the file system's device, | ||
49 | * add it to the cache and attach the provided vnode. | ||
50 | * | ||
51 | * The inode is locked according to the value of the lock_flags parameter. | ||
52 | * This flag parameter indicates how and if the inode's IO lock and inode lock | ||
53 | * should be taken. | ||
54 | * | ||
55 | * mp -- the mount point structure for the current file system. It points | ||
56 | * to the inode hash table. | ||
57 | * tp -- a pointer to the current transaction if there is one. This is | ||
58 | * simply passed through to the xfs_iread() call. | ||
59 | * ino -- the number of the inode desired. This is the unique identifier | ||
60 | * within the file system for the inode being requested. | ||
61 | * lock_flags -- flags indicating how to lock the inode. See the comment | ||
62 | * for xfs_ilock() for a list of valid values. | ||
63 | * bno -- the block number starting the buffer containing the inode, | ||
64 | * if known (as by bulkstat), else 0. | ||
65 | */ | 50 | */ |
66 | STATIC int | 51 | STATIC struct xfs_inode * |
67 | xfs_iget_core( | 52 | xfs_inode_alloc( |
68 | struct inode *inode, | 53 | struct xfs_mount *mp, |
69 | xfs_mount_t *mp, | 54 | xfs_ino_t ino) |
70 | xfs_trans_t *tp, | ||
71 | xfs_ino_t ino, | ||
72 | uint flags, | ||
73 | uint lock_flags, | ||
74 | xfs_inode_t **ipp, | ||
75 | xfs_daddr_t bno) | ||
76 | { | 55 | { |
77 | struct inode *old_inode; | 56 | struct xfs_inode *ip; |
78 | xfs_inode_t *ip; | ||
79 | xfs_inode_t *iq; | ||
80 | int error; | ||
81 | unsigned long first_index, mask; | ||
82 | xfs_perag_t *pag; | ||
83 | xfs_agino_t agino; | ||
84 | 57 | ||
85 | /* the radix tree exists only in inode capable AGs */ | 58 | /* |
86 | if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) | 59 | * if this didn't occur in transactions, we could use |
87 | return EINVAL; | 60 | * KM_MAYFAIL and return NULL here on ENOMEM. Set the |
61 | * code up to do this anyway. | ||
62 | */ | ||
63 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); | ||
64 | if (!ip) | ||
65 | return NULL; | ||
88 | 66 | ||
89 | /* get the perag structure and ensure that it's inode capable */ | 67 | ASSERT(atomic_read(&ip->i_iocount) == 0); |
90 | pag = xfs_get_perag(mp, ino); | 68 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
91 | if (!pag->pagi_inodeok) | 69 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
92 | return EINVAL; | 70 | ASSERT(completion_done(&ip->i_flush)); |
93 | ASSERT(pag->pag_ici_init); | ||
94 | agino = XFS_INO_TO_AGINO(mp, ino); | ||
95 | 71 | ||
96 | again: | 72 | /* |
97 | read_lock(&pag->pag_ici_lock); | 73 | * initialise the VFS inode here to get failures |
98 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | 74 | * out of the way early. |
75 | */ | ||
76 | if (!inode_init_always(mp->m_super, VFS_I(ip))) { | ||
77 | kmem_zone_free(xfs_inode_zone, ip); | ||
78 | return NULL; | ||
79 | } | ||
80 | |||
81 | /* initialise the xfs inode */ | ||
82 | ip->i_ino = ino; | ||
83 | ip->i_mount = mp; | ||
84 | memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); | ||
85 | ip->i_afp = NULL; | ||
86 | memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); | ||
87 | ip->i_flags = 0; | ||
88 | ip->i_update_core = 0; | ||
89 | ip->i_update_size = 0; | ||
90 | ip->i_delayed_blks = 0; | ||
91 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); | ||
92 | ip->i_size = 0; | ||
93 | ip->i_new_size = 0; | ||
94 | |||
95 | /* | ||
96 | * Initialize inode's trace buffers. | ||
97 | */ | ||
98 | #ifdef XFS_INODE_TRACE | ||
99 | ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); | ||
100 | #endif | ||
101 | #ifdef XFS_BMAP_TRACE | ||
102 | ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); | ||
103 | #endif | ||
104 | #ifdef XFS_BTREE_TRACE | ||
105 | ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); | ||
106 | #endif | ||
107 | #ifdef XFS_RW_TRACE | ||
108 | ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); | ||
109 | #endif | ||
110 | #ifdef XFS_ILOCK_TRACE | ||
111 | ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); | ||
112 | #endif | ||
113 | #ifdef XFS_DIR2_TRACE | ||
114 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); | ||
115 | #endif | ||
116 | |||
117 | return ip; | ||
118 | } | ||
119 | |||
120 | /* | ||
121 | * Check the validity of the inode we just found it the cache | ||
122 | */ | ||
123 | static int | ||
124 | xfs_iget_cache_hit( | ||
125 | struct xfs_perag *pag, | ||
126 | struct xfs_inode *ip, | ||
127 | int flags, | ||
128 | int lock_flags) __releases(pag->pag_ici_lock) | ||
129 | { | ||
130 | struct xfs_mount *mp = ip->i_mount; | ||
131 | int error = EAGAIN; | ||
132 | |||
133 | /* | ||
134 | * If INEW is set this inode is being set up | ||
135 | * If IRECLAIM is set this inode is being torn down | ||
136 | * Pause and try again. | ||
137 | */ | ||
138 | if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { | ||
139 | XFS_STATS_INC(xs_ig_frecycle); | ||
140 | goto out_error; | ||
141 | } | ||
142 | |||
143 | /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ | ||
144 | if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { | ||
99 | 145 | ||
100 | if (ip != NULL) { | ||
101 | /* | 146 | /* |
102 | * If INEW is set this inode is being set up | 147 | * If lookup is racing with unlink, then we should return an |
103 | * we need to pause and try again. | 148 | * error immediately so we don't remove it from the reclaim |
149 | * list and potentially leak the inode. | ||
104 | */ | 150 | */ |
105 | if (xfs_iflags_test(ip, XFS_INEW)) { | 151 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { |
106 | read_unlock(&pag->pag_ici_lock); | 152 | error = ENOENT; |
107 | delay(1); | 153 | goto out_error; |
108 | XFS_STATS_INC(xs_ig_frecycle); | ||
109 | |||
110 | goto again; | ||
111 | } | 154 | } |
112 | 155 | ||
113 | old_inode = ip->i_vnode; | 156 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); |
114 | if (old_inode == NULL) { | ||
115 | /* | ||
116 | * If IRECLAIM is set this inode is | ||
117 | * on its way out of the system, | ||
118 | * we need to pause and try again. | ||
119 | */ | ||
120 | if (xfs_iflags_test(ip, XFS_IRECLAIM)) { | ||
121 | read_unlock(&pag->pag_ici_lock); | ||
122 | delay(1); | ||
123 | XFS_STATS_INC(xs_ig_frecycle); | ||
124 | |||
125 | goto again; | ||
126 | } | ||
127 | ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | ||
128 | |||
129 | /* | ||
130 | * If lookup is racing with unlink, then we | ||
131 | * should return an error immediately so we | ||
132 | * don't remove it from the reclaim list and | ||
133 | * potentially leak the inode. | ||
134 | */ | ||
135 | if ((ip->i_d.di_mode == 0) && | ||
136 | !(flags & XFS_IGET_CREATE)) { | ||
137 | read_unlock(&pag->pag_ici_lock); | ||
138 | xfs_put_perag(mp, pag); | ||
139 | return ENOENT; | ||
140 | } | ||
141 | |||
142 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); | ||
143 | |||
144 | XFS_STATS_INC(xs_ig_found); | ||
145 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); | ||
146 | read_unlock(&pag->pag_ici_lock); | ||
147 | |||
148 | XFS_MOUNT_ILOCK(mp); | ||
149 | list_del_init(&ip->i_reclaim); | ||
150 | XFS_MOUNT_IUNLOCK(mp); | ||
151 | |||
152 | goto finish_inode; | ||
153 | |||
154 | } else if (inode != old_inode) { | ||
155 | /* The inode is being torn down, pause and | ||
156 | * try again. | ||
157 | */ | ||
158 | if (old_inode->i_state & (I_FREEING | I_CLEAR)) { | ||
159 | read_unlock(&pag->pag_ici_lock); | ||
160 | delay(1); | ||
161 | XFS_STATS_INC(xs_ig_frecycle); | ||
162 | |||
163 | goto again; | ||
164 | } | ||
165 | /* Chances are the other vnode (the one in the inode) is being torn | ||
166 | * down right now, and we landed on top of it. Question is, what do | ||
167 | * we do? Unhook the old inode and hook up the new one? | ||
168 | */ | ||
169 | cmn_err(CE_PANIC, | ||
170 | "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", | ||
171 | old_inode, inode); | ||
172 | } | ||
173 | 157 | ||
174 | /* | 158 | /* |
175 | * Inode cache hit | 159 | * We need to re-initialise the VFS inode as it has been |
160 | * 'freed' by the VFS. Do this here so we can deal with | ||
161 | * errors cleanly, then tag it so it can be set up correctly | ||
162 | * later. | ||
176 | */ | 163 | */ |
177 | read_unlock(&pag->pag_ici_lock); | 164 | if (!inode_init_always(mp->m_super, VFS_I(ip))) { |
178 | XFS_STATS_INC(xs_ig_found); | 165 | error = ENOMEM; |
179 | 166 | goto out_error; | |
180 | finish_inode: | ||
181 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { | ||
182 | xfs_put_perag(mp, pag); | ||
183 | return ENOENT; | ||
184 | } | 167 | } |
185 | 168 | ||
186 | if (lock_flags != 0) | 169 | /* |
187 | xfs_ilock(ip, lock_flags); | 170 | * We must set the XFS_INEW flag before clearing the |
171 | * XFS_IRECLAIMABLE flag so that if a racing lookup does | ||
172 | * not find the XFS_IRECLAIMABLE above but has the igrab() | ||
173 | * below succeed we can safely check XFS_INEW to detect | ||
174 | * that this inode is still being initialised. | ||
175 | */ | ||
176 | xfs_iflags_set(ip, XFS_INEW); | ||
177 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); | ||
178 | |||
179 | /* clear the radix tree reclaim flag as well. */ | ||
180 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | ||
181 | } else if (!igrab(VFS_I(ip))) { | ||
182 | /* If the VFS inode is being torn down, pause and try again. */ | ||
183 | XFS_STATS_INC(xs_ig_frecycle); | ||
184 | goto out_error; | ||
185 | } else if (xfs_iflags_test(ip, XFS_INEW)) { | ||
186 | /* | ||
187 | * We are racing with another cache hit that is | ||
188 | * currently recycling this inode out of the XFS_IRECLAIMABLE | ||
189 | * state. Wait for the initialisation to complete before | ||
190 | * continuing. | ||
191 | */ | ||
192 | wait_on_inode(VFS_I(ip)); | ||
193 | } | ||
188 | 194 | ||
189 | xfs_iflags_clear(ip, XFS_ISTALE); | 195 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { |
190 | xfs_itrace_exit_tag(ip, "xfs_iget.found"); | 196 | error = ENOENT; |
191 | goto return_ip; | 197 | iput(VFS_I(ip)); |
198 | goto out_error; | ||
192 | } | 199 | } |
193 | 200 | ||
194 | /* | 201 | /* We've got a live one. */ |
195 | * Inode cache miss | ||
196 | */ | ||
197 | read_unlock(&pag->pag_ici_lock); | 202 | read_unlock(&pag->pag_ici_lock); |
198 | XFS_STATS_INC(xs_ig_missed); | ||
199 | 203 | ||
200 | /* | 204 | if (lock_flags != 0) |
201 | * Read the disk inode attributes into a new inode structure and get | 205 | xfs_ilock(ip, lock_flags); |
202 | * a new vnode for it. This should also initialize i_ino and i_mount. | ||
203 | */ | ||
204 | error = xfs_iread(mp, tp, ino, &ip, bno, | ||
205 | (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0); | ||
206 | if (error) { | ||
207 | xfs_put_perag(mp, pag); | ||
208 | return error; | ||
209 | } | ||
210 | 206 | ||
211 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); | 207 | xfs_iflags_clear(ip, XFS_ISTALE); |
208 | xfs_itrace_exit_tag(ip, "xfs_iget.found"); | ||
209 | XFS_STATS_INC(xs_ig_found); | ||
210 | return 0; | ||
211 | |||
212 | out_error: | ||
213 | read_unlock(&pag->pag_ici_lock); | ||
214 | return error; | ||
215 | } | ||
212 | 216 | ||
213 | 217 | ||
214 | mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, | 218 | static int |
215 | "xfsino", ip->i_ino); | 219 | xfs_iget_cache_miss( |
216 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 220 | struct xfs_mount *mp, |
217 | init_waitqueue_head(&ip->i_ipin_wait); | 221 | struct xfs_perag *pag, |
218 | atomic_set(&ip->i_pincount, 0); | 222 | xfs_trans_t *tp, |
223 | xfs_ino_t ino, | ||
224 | struct xfs_inode **ipp, | ||
225 | xfs_daddr_t bno, | ||
226 | int flags, | ||
227 | int lock_flags) __releases(pag->pag_ici_lock) | ||
228 | { | ||
229 | struct xfs_inode *ip; | ||
230 | int error; | ||
231 | unsigned long first_index, mask; | ||
232 | xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); | ||
219 | 233 | ||
220 | /* | 234 | ip = xfs_inode_alloc(mp, ino); |
221 | * Because we want to use a counting completion, complete | 235 | if (!ip) |
222 | * the flush completion once to allow a single access to | 236 | return ENOMEM; |
223 | * the flush completion without blocking. | ||
224 | */ | ||
225 | init_completion(&ip->i_flush); | ||
226 | complete(&ip->i_flush); | ||
227 | 237 | ||
228 | if (lock_flags) | 238 | error = xfs_iread(mp, tp, ip, bno, flags); |
229 | xfs_ilock(ip, lock_flags); | 239 | if (error) |
240 | goto out_destroy; | ||
241 | |||
242 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); | ||
230 | 243 | ||
231 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | 244 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { |
232 | xfs_idestroy(ip); | 245 | error = ENOENT; |
233 | xfs_put_perag(mp, pag); | 246 | goto out_destroy; |
234 | return ENOENT; | ||
235 | } | 247 | } |
236 | 248 | ||
249 | if (lock_flags) | ||
250 | xfs_ilock(ip, lock_flags); | ||
251 | |||
237 | /* | 252 | /* |
238 | * Preload the radix tree so we can insert safely under the | 253 | * Preload the radix tree so we can insert safely under the |
239 | * write spinlock. | 254 | * write spinlock. Note that we cannot sleep inside the preload |
255 | * region. | ||
240 | */ | 256 | */ |
241 | if (radix_tree_preload(GFP_KERNEL)) { | 257 | if (radix_tree_preload(GFP_KERNEL)) { |
242 | xfs_idestroy(ip); | 258 | error = EAGAIN; |
243 | delay(1); | 259 | goto out_unlock; |
244 | goto again; | ||
245 | } | 260 | } |
261 | |||
246 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | 262 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); |
247 | first_index = agino & mask; | 263 | first_index = agino & mask; |
248 | write_lock(&pag->pag_ici_lock); | 264 | write_lock(&pag->pag_ici_lock); |
249 | /* | 265 | |
250 | * insert the new inode | 266 | /* insert the new inode */ |
251 | */ | ||
252 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); | 267 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); |
253 | if (unlikely(error)) { | 268 | if (unlikely(error)) { |
254 | BUG_ON(error != -EEXIST); | 269 | WARN_ON(error != -EEXIST); |
255 | write_unlock(&pag->pag_ici_lock); | ||
256 | radix_tree_preload_end(); | ||
257 | xfs_idestroy(ip); | ||
258 | XFS_STATS_INC(xs_ig_dup); | 270 | XFS_STATS_INC(xs_ig_dup); |
259 | goto again; | 271 | error = EAGAIN; |
272 | goto out_preload_end; | ||
260 | } | 273 | } |
261 | 274 | ||
262 | /* | 275 | /* These values _must_ be set before releasing the radix tree lock! */ |
263 | * These values _must_ be set before releasing the radix tree lock! | ||
264 | */ | ||
265 | ip->i_udquot = ip->i_gdquot = NULL; | 276 | ip->i_udquot = ip->i_gdquot = NULL; |
266 | xfs_iflags_set(ip, XFS_INEW); | 277 | xfs_iflags_set(ip, XFS_INEW); |
267 | 278 | ||
268 | write_unlock(&pag->pag_ici_lock); | 279 | write_unlock(&pag->pag_ici_lock); |
269 | radix_tree_preload_end(); | 280 | radix_tree_preload_end(); |
270 | |||
271 | /* | ||
272 | * Link ip to its mount and thread it on the mount's inode list. | ||
273 | */ | ||
274 | XFS_MOUNT_ILOCK(mp); | ||
275 | if ((iq = mp->m_inodes)) { | ||
276 | ASSERT(iq->i_mprev->i_mnext == iq); | ||
277 | ip->i_mprev = iq->i_mprev; | ||
278 | iq->i_mprev->i_mnext = ip; | ||
279 | iq->i_mprev = ip; | ||
280 | ip->i_mnext = iq; | ||
281 | } else { | ||
282 | ip->i_mnext = ip; | ||
283 | ip->i_mprev = ip; | ||
284 | } | ||
285 | mp->m_inodes = ip; | ||
286 | |||
287 | XFS_MOUNT_IUNLOCK(mp); | ||
288 | xfs_put_perag(mp, pag); | ||
289 | |||
290 | return_ip: | ||
291 | ASSERT(ip->i_df.if_ext_max == | ||
292 | XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); | ||
293 | |||
294 | xfs_iflags_set(ip, XFS_IMODIFIED); | ||
295 | *ipp = ip; | 281 | *ipp = ip; |
296 | |||
297 | /* | ||
298 | * Set up the Linux with the Linux inode. | ||
299 | */ | ||
300 | ip->i_vnode = inode; | ||
301 | inode->i_private = ip; | ||
302 | |||
303 | /* | ||
304 | * If we have a real type for an on-disk inode, we can set ops(&unlock) | ||
305 | * now. If it's a new inode being created, xfs_ialloc will handle it. | ||
306 | */ | ||
307 | if (ip->i_d.di_mode != 0) | ||
308 | xfs_setup_inode(ip); | ||
309 | return 0; | 282 | return 0; |
310 | } | ||
311 | 283 | ||
284 | out_preload_end: | ||
285 | write_unlock(&pag->pag_ici_lock); | ||
286 | radix_tree_preload_end(); | ||
287 | out_unlock: | ||
288 | if (lock_flags) | ||
289 | xfs_iunlock(ip, lock_flags); | ||
290 | out_destroy: | ||
291 | xfs_destroy_inode(ip); | ||
292 | return error; | ||
293 | } | ||
312 | 294 | ||
313 | /* | 295 | /* |
314 | * The 'normal' internal xfs_iget, if needed it will | 296 | * Look up an inode by number in the given file system. |
315 | * 'allocate', or 'get', the vnode. | 297 | * The inode is looked up in the cache held in each AG. |
298 | * If the inode is found in the cache, initialise the vfs inode | ||
299 | * if necessary. | ||
300 | * | ||
301 | * If it is not in core, read it in from the file system's device, | ||
302 | * add it to the cache and initialise the vfs inode. | ||
303 | * | ||
304 | * The inode is locked according to the value of the lock_flags parameter. | ||
305 | * This flag parameter indicates how and if the inode's IO lock and inode lock | ||
306 | * should be taken. | ||
307 | * | ||
308 | * mp -- the mount point structure for the current file system. It points | ||
309 | * to the inode hash table. | ||
310 | * tp -- a pointer to the current transaction if there is one. This is | ||
311 | * simply passed through to the xfs_iread() call. | ||
312 | * ino -- the number of the inode desired. This is the unique identifier | ||
313 | * within the file system for the inode being requested. | ||
314 | * lock_flags -- flags indicating how to lock the inode. See the comment | ||
315 | * for xfs_ilock() for a list of valid values. | ||
316 | * bno -- the block number starting the buffer containing the inode, | ||
317 | * if known (as by bulkstat), else 0. | ||
316 | */ | 318 | */ |
317 | int | 319 | int |
318 | xfs_iget( | 320 | xfs_iget( |
@@ -324,61 +326,64 @@ xfs_iget( | |||
324 | xfs_inode_t **ipp, | 326 | xfs_inode_t **ipp, |
325 | xfs_daddr_t bno) | 327 | xfs_daddr_t bno) |
326 | { | 328 | { |
327 | struct inode *inode; | ||
328 | xfs_inode_t *ip; | 329 | xfs_inode_t *ip; |
329 | int error; | 330 | int error; |
331 | xfs_perag_t *pag; | ||
332 | xfs_agino_t agino; | ||
330 | 333 | ||
331 | XFS_STATS_INC(xs_ig_attempts); | 334 | /* the radix tree exists only in inode capable AGs */ |
335 | if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) | ||
336 | return EINVAL; | ||
332 | 337 | ||
333 | retry: | 338 | /* get the perag structure and ensure that it's inode capable */ |
334 | inode = iget_locked(mp->m_super, ino); | 339 | pag = xfs_get_perag(mp, ino); |
335 | if (!inode) | 340 | if (!pag->pagi_inodeok) |
336 | /* If we got no inode we are out of memory */ | 341 | return EINVAL; |
337 | return ENOMEM; | 342 | ASSERT(pag->pag_ici_init); |
343 | agino = XFS_INO_TO_AGINO(mp, ino); | ||
338 | 344 | ||
339 | if (inode->i_state & I_NEW) { | 345 | again: |
340 | XFS_STATS_INC(vn_active); | 346 | error = 0; |
341 | XFS_STATS_INC(vn_alloc); | 347 | read_lock(&pag->pag_ici_lock); |
342 | 348 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | |
343 | error = xfs_iget_core(inode, mp, tp, ino, flags, | 349 | |
344 | lock_flags, ipp, bno); | 350 | if (ip) { |
345 | if (error) { | 351 | error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); |
346 | make_bad_inode(inode); | 352 | if (error) |
347 | if (inode->i_state & I_NEW) | 353 | goto out_error_or_again; |
348 | unlock_new_inode(inode); | 354 | } else { |
349 | iput(inode); | 355 | read_unlock(&pag->pag_ici_lock); |
350 | } | 356 | XFS_STATS_INC(xs_ig_missed); |
351 | return error; | 357 | |
358 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno, | ||
359 | flags, lock_flags); | ||
360 | if (error) | ||
361 | goto out_error_or_again; | ||
352 | } | 362 | } |
363 | xfs_put_perag(mp, pag); | ||
353 | 364 | ||
365 | *ipp = ip; | ||
366 | |||
367 | ASSERT(ip->i_df.if_ext_max == | ||
368 | XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); | ||
354 | /* | 369 | /* |
355 | * If the inode is not fully constructed due to | 370 | * If we have a real type for an on-disk inode, we can set ops(&unlock) |
356 | * filehandle mismatches wait for the inode to go | 371 | * now. If it's a new inode being created, xfs_ialloc will handle it. |
357 | * away and try again. | ||
358 | * | ||
359 | * iget_locked will call __wait_on_freeing_inode | ||
360 | * to wait for the inode to go away. | ||
361 | */ | 372 | */ |
362 | if (is_bad_inode(inode)) { | 373 | if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) |
363 | iput(inode); | 374 | xfs_setup_inode(ip); |
364 | delay(1); | 375 | return 0; |
365 | goto retry; | ||
366 | } | ||
367 | 376 | ||
368 | ip = XFS_I(inode); | 377 | out_error_or_again: |
369 | if (!ip) { | 378 | if (error == EAGAIN) { |
370 | iput(inode); | ||
371 | delay(1); | 379 | delay(1); |
372 | goto retry; | 380 | goto again; |
373 | } | 381 | } |
374 | 382 | xfs_put_perag(mp, pag); | |
375 | if (lock_flags != 0) | 383 | return error; |
376 | xfs_ilock(ip, lock_flags); | ||
377 | XFS_STATS_INC(xs_ig_found); | ||
378 | *ipp = ip; | ||
379 | return 0; | ||
380 | } | 384 | } |
381 | 385 | ||
386 | |||
382 | /* | 387 | /* |
383 | * Look for the inode corresponding to the given ino in the hash table. | 388 | * Look for the inode corresponding to the given ino in the hash table. |
384 | * If it is there and its i_transp pointer matches tp, return it. | 389 | * If it is there and its i_transp pointer matches tp, return it. |
@@ -444,99 +449,109 @@ xfs_iput_new( | |||
444 | IRELE(ip); | 449 | IRELE(ip); |
445 | } | 450 | } |
446 | 451 | ||
447 | |||
448 | /* | 452 | /* |
449 | * This routine embodies the part of the reclaim code that pulls | 453 | * This is called free all the memory associated with an inode. |
450 | * the inode from the inode hash table and the mount structure's | 454 | * It must free the inode itself and any buffers allocated for |
451 | * inode list. | 455 | * if_extents/if_data and if_broot. It must also free the lock |
452 | * This should only be called from xfs_reclaim(). | 456 | * associated with the inode. |
457 | * | ||
458 | * Note: because we don't initialise everything on reallocation out | ||
459 | * of the zone, we must ensure we nullify everything correctly before | ||
460 | * freeing the structure. | ||
453 | */ | 461 | */ |
454 | void | 462 | void |
455 | xfs_ireclaim(xfs_inode_t *ip) | 463 | xfs_ireclaim( |
464 | struct xfs_inode *ip) | ||
456 | { | 465 | { |
457 | /* | 466 | struct xfs_mount *mp = ip->i_mount; |
458 | * Remove from old hash list and mount list. | 467 | struct xfs_perag *pag; |
459 | */ | ||
460 | XFS_STATS_INC(xs_ig_reclaims); | ||
461 | 468 | ||
462 | xfs_iextract(ip); | 469 | XFS_STATS_INC(xs_ig_reclaims); |
463 | |||
464 | /* | ||
465 | * Here we do a spurious inode lock in order to coordinate with | ||
466 | * xfs_sync(). This is because xfs_sync() references the inodes | ||
467 | * in the mount list without taking references on the corresponding | ||
468 | * vnodes. We make that OK here by ensuring that we wait until | ||
469 | * the inode is unlocked in xfs_sync() before we go ahead and | ||
470 | * free it. We get both the regular lock and the io lock because | ||
471 | * the xfs_sync() code may need to drop the regular one but will | ||
472 | * still hold the io lock. | ||
473 | */ | ||
474 | xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
475 | |||
476 | /* | ||
477 | * Release dquots (and their references) if any. An inode may escape | ||
478 | * xfs_inactive and get here via vn_alloc->vn_reclaim path. | ||
479 | */ | ||
480 | XFS_QM_DQDETACH(ip->i_mount, ip); | ||
481 | |||
482 | /* | ||
483 | * Pull our behavior descriptor from the vnode chain. | ||
484 | */ | ||
485 | if (ip->i_vnode) { | ||
486 | ip->i_vnode->i_private = NULL; | ||
487 | ip->i_vnode = NULL; | ||
488 | } | ||
489 | 470 | ||
490 | /* | 471 | /* |
491 | * Free all memory associated with the inode. | 472 | * Remove the inode from the per-AG radix tree. It doesn't matter |
473 | * if it was never added to it because radix_tree_delete can deal | ||
474 | * with that case just fine. | ||
492 | */ | 475 | */ |
493 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 476 | pag = xfs_get_perag(mp, ip->i_ino); |
494 | xfs_idestroy(ip); | ||
495 | } | ||
496 | |||
497 | /* | ||
498 | * This routine removes an about-to-be-destroyed inode from | ||
499 | * all of the lists in which it is located with the exception | ||
500 | * of the behavior chain. | ||
501 | */ | ||
502 | void | ||
503 | xfs_iextract( | ||
504 | xfs_inode_t *ip) | ||
505 | { | ||
506 | xfs_mount_t *mp = ip->i_mount; | ||
507 | xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); | ||
508 | xfs_inode_t *iq; | ||
509 | |||
510 | write_lock(&pag->pag_ici_lock); | 477 | write_lock(&pag->pag_ici_lock); |
511 | radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); | 478 | radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); |
512 | write_unlock(&pag->pag_ici_lock); | 479 | write_unlock(&pag->pag_ici_lock); |
513 | xfs_put_perag(mp, pag); | 480 | xfs_put_perag(mp, pag); |
514 | 481 | ||
515 | /* | 482 | /* |
516 | * Remove from mount's inode list. | 483 | * Here we do an (almost) spurious inode lock in order to coordinate |
484 | * with inode cache radix tree lookups. This is because the lookup | ||
485 | * can reference the inodes in the cache without taking references. | ||
486 | * | ||
487 | * We make that OK here by ensuring that we wait until the inode is | ||
488 | * unlocked after the lookup before we go ahead and free it. We get | ||
489 | * both the ilock and the iolock because the code may need to drop the | ||
490 | * ilock one but will still hold the iolock. | ||
517 | */ | 491 | */ |
518 | XFS_MOUNT_ILOCK(mp); | 492 | xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
519 | ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL)); | ||
520 | iq = ip->i_mnext; | ||
521 | iq->i_mprev = ip->i_mprev; | ||
522 | ip->i_mprev->i_mnext = iq; | ||
523 | |||
524 | /* | 493 | /* |
525 | * Fix up the head pointer if it points to the inode being deleted. | 494 | * Release dquots (and their references) if any. |
526 | */ | 495 | */ |
527 | if (mp->m_inodes == ip) { | 496 | XFS_QM_DQDETACH(ip->i_mount, ip); |
528 | if (ip == iq) { | 497 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
529 | mp->m_inodes = NULL; | 498 | |
530 | } else { | 499 | switch (ip->i_d.di_mode & S_IFMT) { |
531 | mp->m_inodes = iq; | 500 | case S_IFREG: |
532 | } | 501 | case S_IFDIR: |
502 | case S_IFLNK: | ||
503 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | ||
504 | break; | ||
533 | } | 505 | } |
534 | 506 | ||
535 | /* Deal with the deleted inodes list */ | 507 | if (ip->i_afp) |
536 | list_del_init(&ip->i_reclaim); | 508 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); |
537 | 509 | ||
538 | mp->m_ireclaims++; | 510 | #ifdef XFS_INODE_TRACE |
539 | XFS_MOUNT_IUNLOCK(mp); | 511 | ktrace_free(ip->i_trace); |
512 | #endif | ||
513 | #ifdef XFS_BMAP_TRACE | ||
514 | ktrace_free(ip->i_xtrace); | ||
515 | #endif | ||
516 | #ifdef XFS_BTREE_TRACE | ||
517 | ktrace_free(ip->i_btrace); | ||
518 | #endif | ||
519 | #ifdef XFS_RW_TRACE | ||
520 | ktrace_free(ip->i_rwtrace); | ||
521 | #endif | ||
522 | #ifdef XFS_ILOCK_TRACE | ||
523 | ktrace_free(ip->i_lock_trace); | ||
524 | #endif | ||
525 | #ifdef XFS_DIR2_TRACE | ||
526 | ktrace_free(ip->i_dir_trace); | ||
527 | #endif | ||
528 | if (ip->i_itemp) { | ||
529 | /* | ||
530 | * Only if we are shutting down the fs will we see an | ||
531 | * inode still in the AIL. If it is there, we should remove | ||
532 | * it to prevent a use-after-free from occurring. | ||
533 | */ | ||
534 | xfs_log_item_t *lip = &ip->i_itemp->ili_item; | ||
535 | struct xfs_ail *ailp = lip->li_ailp; | ||
536 | |||
537 | ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || | ||
538 | XFS_FORCED_SHUTDOWN(ip->i_mount)); | ||
539 | if (lip->li_flags & XFS_LI_IN_AIL) { | ||
540 | spin_lock(&ailp->xa_lock); | ||
541 | if (lip->li_flags & XFS_LI_IN_AIL) | ||
542 | xfs_trans_ail_delete(ailp, lip); | ||
543 | else | ||
544 | spin_unlock(&ailp->xa_lock); | ||
545 | } | ||
546 | xfs_inode_item_destroy(ip); | ||
547 | ip->i_itemp = NULL; | ||
548 | } | ||
549 | /* asserts to verify all state is correct here */ | ||
550 | ASSERT(atomic_read(&ip->i_iocount) == 0); | ||
551 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
552 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
553 | ASSERT(completion_done(&ip->i_flush)); | ||
554 | kmem_zone_free(xfs_inode_zone, ip); | ||
540 | } | 555 | } |
541 | 556 | ||
542 | /* | 557 | /* |
@@ -737,7 +752,7 @@ xfs_iunlock( | |||
737 | * it is in the AIL and anyone is waiting on it. Don't do | 752 | * it is in the AIL and anyone is waiting on it. Don't do |
738 | * this if the caller has asked us not to. | 753 | * this if the caller has asked us not to. |
739 | */ | 754 | */ |
740 | xfs_trans_unlocked_item(ip->i_mount, | 755 | xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp, |
741 | (xfs_log_item_t*)(ip->i_itemp)); | 756 | (xfs_log_item_t*)(ip->i_itemp)); |
742 | } | 757 | } |
743 | xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); | 758 | xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); |
@@ -790,3 +805,51 @@ xfs_isilocked( | |||
790 | } | 805 | } |
791 | #endif | 806 | #endif |
792 | 807 | ||
808 | #ifdef XFS_INODE_TRACE | ||
809 | |||
810 | #define KTRACE_ENTER(ip, vk, s, line, ra) \ | ||
811 | ktrace_enter((ip)->i_trace, \ | ||
812 | /* 0 */ (void *)(__psint_t)(vk), \ | ||
813 | /* 1 */ (void *)(s), \ | ||
814 | /* 2 */ (void *)(__psint_t) line, \ | ||
815 | /* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \ | ||
816 | /* 4 */ (void *)(ra), \ | ||
817 | /* 5 */ NULL, \ | ||
818 | /* 6 */ (void *)(__psint_t)current_cpu(), \ | ||
819 | /* 7 */ (void *)(__psint_t)current_pid(), \ | ||
820 | /* 8 */ (void *)__return_address, \ | ||
821 | /* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL) | ||
822 | |||
823 | /* | ||
824 | * Vnode tracing code. | ||
825 | */ | ||
826 | void | ||
827 | _xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra) | ||
828 | { | ||
829 | KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra); | ||
830 | } | ||
831 | |||
832 | void | ||
833 | _xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra) | ||
834 | { | ||
835 | KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra); | ||
836 | } | ||
837 | |||
838 | void | ||
839 | xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra) | ||
840 | { | ||
841 | KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra); | ||
842 | } | ||
843 | |||
844 | void | ||
845 | _xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra) | ||
846 | { | ||
847 | KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra); | ||
848 | } | ||
849 | |||
850 | void | ||
851 | xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra) | ||
852 | { | ||
853 | KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra); | ||
854 | } | ||
855 | #endif /* XFS_INODE_TRACE */ | ||