diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2010-11-03 16:01:07 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2010-11-15 07:44:42 -0500 |
commit | 044b9414c7caf9a26192c73a5b88fa1a8a32a1c1 (patch) | |
tree | 9596bb669a68b04eebc40864c3b3fd71d3d1e273 /fs/gfs2/inode.c | |
parent | 0143832cc96d0bf78486297aad5c8fb2c2ead02a (diff) |
GFS2: Fix inode deallocation race
This area of the code has always been a bit delicate due to the
subtleties of lock ordering. The problem is that for "normal"
alloc/dealloc, we always grab the inode locks first and the rgrp lock
later.
In order to ensure no races in looking up the unlinked, but still
allocated inodes, we need to hold the rgrp lock when we do the lookup,
which means that we can't take the inode glock.
The solution is to borrow the technique already used by NFS to solve
what is essentially the same problem (given an inode number, look up
the inode carefully, checking that it really is in the expected
state).
We cannot do that directly from the allocation code (lock ordering
again) so we give the job to the pre-existing delete workqueue and
carry on with the allocation as normal.
If we find there is no space, we do a journal flush (required anyway
if space from a deallocation is to be released) which should block
against the pending deallocations, so we should always get the space
back.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2/inode.c')
-rw-r--r-- | fs/gfs2/inode.c | 152 |
1 files changed, 35 insertions, 117 deletions
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 06370f8bd8cf..e1213f7f9217 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -73,49 +73,6 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) | |||
73 | return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); | 73 | return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); |
74 | } | 74 | } |
75 | 75 | ||
76 | struct gfs2_skip_data { | ||
77 | u64 no_addr; | ||
78 | int skipped; | ||
79 | }; | ||
80 | |||
81 | static int iget_skip_test(struct inode *inode, void *opaque) | ||
82 | { | ||
83 | struct gfs2_inode *ip = GFS2_I(inode); | ||
84 | struct gfs2_skip_data *data = opaque; | ||
85 | |||
86 | if (ip->i_no_addr == data->no_addr) { | ||
87 | if (inode->i_state & (I_FREEING|I_WILL_FREE)){ | ||
88 | data->skipped = 1; | ||
89 | return 0; | ||
90 | } | ||
91 | return 1; | ||
92 | } | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | static int iget_skip_set(struct inode *inode, void *opaque) | ||
97 | { | ||
98 | struct gfs2_inode *ip = GFS2_I(inode); | ||
99 | struct gfs2_skip_data *data = opaque; | ||
100 | |||
101 | if (data->skipped) | ||
102 | return 1; | ||
103 | inode->i_ino = (unsigned long)(data->no_addr); | ||
104 | ip->i_no_addr = data->no_addr; | ||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | static struct inode *gfs2_iget_skip(struct super_block *sb, | ||
109 | u64 no_addr) | ||
110 | { | ||
111 | struct gfs2_skip_data data; | ||
112 | unsigned long hash = (unsigned long)no_addr; | ||
113 | |||
114 | data.no_addr = no_addr; | ||
115 | data.skipped = 0; | ||
116 | return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data); | ||
117 | } | ||
118 | |||
119 | /** | 76 | /** |
120 | * GFS2 lookup code fills in vfs inode contents based on info obtained | 77 | * GFS2 lookup code fills in vfs inode contents based on info obtained |
121 | * from directory entry inside gfs2_inode_lookup(). This has caused issues | 78 | * from directory entry inside gfs2_inode_lookup(). This has caused issues |
@@ -243,93 +200,54 @@ fail: | |||
243 | return ERR_PTR(error); | 200 | return ERR_PTR(error); |
244 | } | 201 | } |
245 | 202 | ||
246 | /** | 203 | struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, |
247 | * gfs2_process_unlinked_inode - Lookup an unlinked inode for reclamation | 204 | u64 *no_formal_ino, unsigned int blktype) |
248 | * and try to reclaim it by doing iput. | ||
249 | * | ||
250 | * This function assumes no rgrp locks are currently held. | ||
251 | * | ||
252 | * @sb: The super block | ||
253 | * no_addr: The inode number | ||
254 | * | ||
255 | */ | ||
256 | |||
257 | void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) | ||
258 | { | 205 | { |
259 | struct gfs2_sbd *sdp; | 206 | struct super_block *sb = sdp->sd_vfs; |
260 | struct gfs2_inode *ip; | 207 | struct gfs2_holder i_gh; |
261 | struct gfs2_glock *io_gl = NULL; | ||
262 | int error; | ||
263 | struct gfs2_holder gh; | ||
264 | struct inode *inode; | 208 | struct inode *inode; |
209 | int error; | ||
265 | 210 | ||
266 | inode = gfs2_iget_skip(sb, no_addr); | 211 | error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, |
267 | 212 | LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | |
268 | if (!inode) | 213 | if (error) |
269 | return; | 214 | return ERR_PTR(error); |
270 | |||
271 | /* If it's not a new inode, someone's using it, so leave it alone. */ | ||
272 | if (!(inode->i_state & I_NEW)) { | ||
273 | iput(inode); | ||
274 | return; | ||
275 | } | ||
276 | |||
277 | ip = GFS2_I(inode); | ||
278 | sdp = GFS2_SB(inode); | ||
279 | ip->i_no_formal_ino = -1; | ||
280 | 215 | ||
281 | error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); | 216 | error = gfs2_check_blk_type(sdp, no_addr, blktype); |
282 | if (unlikely(error)) | 217 | if (error) |
283 | goto fail; | 218 | goto fail; |
284 | ip->i_gl->gl_object = ip; | ||
285 | 219 | ||
286 | error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); | 220 | inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); |
287 | if (unlikely(error)) | 221 | if (IS_ERR(inode)) |
288 | goto fail_put; | 222 | goto fail; |
289 | |||
290 | set_bit(GIF_INVALID, &ip->i_flags); | ||
291 | error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, LM_FLAG_TRY | GL_EXACT, | ||
292 | &ip->i_iopen_gh); | ||
293 | if (unlikely(error)) | ||
294 | goto fail_iopen; | ||
295 | 223 | ||
296 | ip->i_iopen_gh.gh_gl->gl_object = ip; | 224 | error = gfs2_inode_refresh(GFS2_I(inode)); |
297 | gfs2_glock_put(io_gl); | 225 | if (error) |
298 | io_gl = NULL; | 226 | goto fail_iput; |
299 | 227 | ||
300 | inode->i_mode = DT2IF(DT_UNKNOWN); | 228 | /* Pick up the works we bypass in gfs2_inode_lookup */ |
229 | if (inode->i_state & I_NEW) | ||
230 | gfs2_set_iop(inode); | ||
301 | 231 | ||
302 | /* | 232 | /* Two extra checks for NFS only */ |
303 | * We must read the inode in order to work out its type in | 233 | if (no_formal_ino) { |
304 | * this case. Note that this doesn't happen often as we normally | 234 | error = -ESTALE; |
305 | * know the type beforehand. This code path only occurs during | 235 | if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino) |
306 | * unlinked inode recovery (where it is safe to do this glock, | 236 | goto fail_iput; |
307 | * which is not true in the general case). | ||
308 | */ | ||
309 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY, | ||
310 | &gh); | ||
311 | if (unlikely(error)) | ||
312 | goto fail_glock; | ||
313 | 237 | ||
314 | /* Inode is now uptodate */ | 238 | error = -EIO; |
315 | gfs2_glock_dq_uninit(&gh); | 239 | if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) |
316 | gfs2_set_iop(inode); | 240 | goto fail_iput; |
317 | 241 | ||
318 | /* The iput will cause it to be deleted. */ | 242 | error = 0; |
319 | iput(inode); | 243 | } |
320 | return; | ||
321 | 244 | ||
322 | fail_glock: | ||
323 | gfs2_glock_dq(&ip->i_iopen_gh); | ||
324 | fail_iopen: | ||
325 | if (io_gl) | ||
326 | gfs2_glock_put(io_gl); | ||
327 | fail_put: | ||
328 | ip->i_gl->gl_object = NULL; | ||
329 | gfs2_glock_put(ip->i_gl); | ||
330 | fail: | 245 | fail: |
331 | iget_failed(inode); | 246 | gfs2_glock_dq_uninit(&i_gh); |
332 | return; | 247 | return error ? ERR_PTR(error) : inode; |
248 | fail_iput: | ||
249 | iput(inode); | ||
250 | goto fail; | ||
333 | } | 251 | } |
334 | 252 | ||
335 | static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | 253 | static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) |