aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2/inode.c
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2010-11-03 16:01:07 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2010-11-15 07:44:42 -0500
commit044b9414c7caf9a26192c73a5b88fa1a8a32a1c1 (patch)
tree9596bb669a68b04eebc40864c3b3fd71d3d1e273 /fs/gfs2/inode.c
parent0143832cc96d0bf78486297aad5c8fb2c2ead02a (diff)
GFS2: Fix inode deallocation race
This area of the code has always been a bit delicate due to the subtleties of lock ordering. The problem is that for "normal" alloc/dealloc, we always grab the inode locks first and the rgrp lock later. In order to ensure no races in looking up the unlinked, but still allocated inodes, we need to hold the rgrp lock when we do the lookup, which means that we can't take the inode glock. The solution is to borrow the technique already used by NFS to solve what is essentially the same problem (given an inode number, look up the inode carefully, checking that it really is in the expected state). We cannot do that directly from the allocation code (lock ordering again) so we give the job to the pre-existing delete workqueue and carry on with the allocation as normal. If we find there is no space, we do a journal flush (required anyway if space from a deallocation is to be released) which should block against the pending deallocations, so we should always get the space back. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2/inode.c')
-rw-r--r--fs/gfs2/inode.c152
1 files changed, 35 insertions, 117 deletions
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 06370f8bd8cf..e1213f7f9217 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -73,49 +73,6 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
73 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 73 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
74} 74}
75 75
76struct gfs2_skip_data {
77 u64 no_addr;
78 int skipped;
79};
80
81static int iget_skip_test(struct inode *inode, void *opaque)
82{
83 struct gfs2_inode *ip = GFS2_I(inode);
84 struct gfs2_skip_data *data = opaque;
85
86 if (ip->i_no_addr == data->no_addr) {
87 if (inode->i_state & (I_FREEING|I_WILL_FREE)){
88 data->skipped = 1;
89 return 0;
90 }
91 return 1;
92 }
93 return 0;
94}
95
96static int iget_skip_set(struct inode *inode, void *opaque)
97{
98 struct gfs2_inode *ip = GFS2_I(inode);
99 struct gfs2_skip_data *data = opaque;
100
101 if (data->skipped)
102 return 1;
103 inode->i_ino = (unsigned long)(data->no_addr);
104 ip->i_no_addr = data->no_addr;
105 return 0;
106}
107
108static struct inode *gfs2_iget_skip(struct super_block *sb,
109 u64 no_addr)
110{
111 struct gfs2_skip_data data;
112 unsigned long hash = (unsigned long)no_addr;
113
114 data.no_addr = no_addr;
115 data.skipped = 0;
116 return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data);
117}
118
119/** 76/**
120 * GFS2 lookup code fills in vfs inode contents based on info obtained 77 * GFS2 lookup code fills in vfs inode contents based on info obtained
121 * from directory entry inside gfs2_inode_lookup(). This has caused issues 78 * from directory entry inside gfs2_inode_lookup(). This has caused issues
@@ -243,93 +200,54 @@ fail:
243 return ERR_PTR(error); 200 return ERR_PTR(error);
244} 201}
245 202
246/** 203struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
247 * gfs2_process_unlinked_inode - Lookup an unlinked inode for reclamation 204 u64 *no_formal_ino, unsigned int blktype)
248 * and try to reclaim it by doing iput.
249 *
250 * This function assumes no rgrp locks are currently held.
251 *
252 * @sb: The super block
253 * no_addr: The inode number
254 *
255 */
256
257void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
258{ 205{
259 struct gfs2_sbd *sdp; 206 struct super_block *sb = sdp->sd_vfs;
260 struct gfs2_inode *ip; 207 struct gfs2_holder i_gh;
261 struct gfs2_glock *io_gl = NULL;
262 int error;
263 struct gfs2_holder gh;
264 struct inode *inode; 208 struct inode *inode;
209 int error;
265 210
266 inode = gfs2_iget_skip(sb, no_addr); 211 error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
267 212 LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
268 if (!inode) 213 if (error)
269 return; 214 return ERR_PTR(error);
270
271 /* If it's not a new inode, someone's using it, so leave it alone. */
272 if (!(inode->i_state & I_NEW)) {
273 iput(inode);
274 return;
275 }
276
277 ip = GFS2_I(inode);
278 sdp = GFS2_SB(inode);
279 ip->i_no_formal_ino = -1;
280 215
281 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 216 error = gfs2_check_blk_type(sdp, no_addr, blktype);
282 if (unlikely(error)) 217 if (error)
283 goto fail; 218 goto fail;
284 ip->i_gl->gl_object = ip;
285 219
286 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 220 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
287 if (unlikely(error)) 221 if (IS_ERR(inode))
288 goto fail_put; 222 goto fail;
289
290 set_bit(GIF_INVALID, &ip->i_flags);
291 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, LM_FLAG_TRY | GL_EXACT,
292 &ip->i_iopen_gh);
293 if (unlikely(error))
294 goto fail_iopen;
295 223
296 ip->i_iopen_gh.gh_gl->gl_object = ip; 224 error = gfs2_inode_refresh(GFS2_I(inode));
297 gfs2_glock_put(io_gl); 225 if (error)
298 io_gl = NULL; 226 goto fail_iput;
299 227
300 inode->i_mode = DT2IF(DT_UNKNOWN); 228 /* Pick up the works we bypass in gfs2_inode_lookup */
229 if (inode->i_state & I_NEW)
230 gfs2_set_iop(inode);
301 231
302 /* 232 /* Two extra checks for NFS only */
303 * We must read the inode in order to work out its type in 233 if (no_formal_ino) {
304 * this case. Note that this doesn't happen often as we normally 234 error = -ESTALE;
305 * know the type beforehand. This code path only occurs during 235 if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino)
306 * unlinked inode recovery (where it is safe to do this glock, 236 goto fail_iput;
307 * which is not true in the general case).
308 */
309 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY,
310 &gh);
311 if (unlikely(error))
312 goto fail_glock;
313 237
314 /* Inode is now uptodate */ 238 error = -EIO;
315 gfs2_glock_dq_uninit(&gh); 239 if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM)
316 gfs2_set_iop(inode); 240 goto fail_iput;
317 241
318 /* The iput will cause it to be deleted. */ 242 error = 0;
319 iput(inode); 243 }
320 return;
321 244
322fail_glock:
323 gfs2_glock_dq(&ip->i_iopen_gh);
324fail_iopen:
325 if (io_gl)
326 gfs2_glock_put(io_gl);
327fail_put:
328 ip->i_gl->gl_object = NULL;
329 gfs2_glock_put(ip->i_gl);
330fail: 245fail:
331 iget_failed(inode); 246 gfs2_glock_dq_uninit(&i_gh);
332 return; 247 return error ? ERR_PTR(error) : inode;
248fail_iput:
249 iput(inode);
250 goto fail;
333} 251}
334 252
335static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) 253static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)