diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2008-02-10 01:20:05 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2008-02-10 01:20:05 -0500 |
commit | 8009f9fb3067fef6c2ca0c16f6bac786ae28639d (patch) | |
tree | acf8bbfd2833a63baa9d2194ed30bf7e7dcb3075 | |
parent | 0040d9875dcccfcb2131417b10fbd9841bc5f05b (diff) |
ext4: Fix circular locking dependency with migrate and rm.
In order to prevent a circular locking dependency when an unlink
operation is racing with an ext4 migration, we delay taking i_data_sem
until just before switch the inode format, and use i_mutex to prevent
writes and truncates during the first part of the migration operation.
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | fs/ext4/migrate.c | 117 |
1 files changed, 74 insertions, 43 deletions
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 9ee1f7cfb2c5..8c6c685b9d22 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -61,10 +61,9 @@ static int finish_range(handle_t *handle, struct inode *inode, | |||
61 | retval = ext4_journal_restart(handle, needed); | 61 | retval = ext4_journal_restart(handle, needed); |
62 | if (retval) | 62 | if (retval) |
63 | goto err_out; | 63 | goto err_out; |
64 | } | 64 | } else if (needed) { |
65 | if (needed) { | ||
66 | retval = ext4_journal_extend(handle, needed); | 65 | retval = ext4_journal_extend(handle, needed); |
67 | if (retval != 0) { | 66 | if (retval) { |
68 | /* | 67 | /* |
69 | * IF not able to extend the journal restart the journal | 68 | * IF not able to extend the journal restart the journal |
70 | */ | 69 | */ |
@@ -220,6 +219,26 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode, | |||
220 | 219 | ||
221 | } | 220 | } |
222 | 221 | ||
222 | static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) | ||
223 | { | ||
224 | int retval = 0, needed; | ||
225 | |||
226 | if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS) | ||
227 | return 0; | ||
228 | /* | ||
229 | * We are freeing a blocks. During this we touch | ||
230 | * superblock, group descriptor and block bitmap. | ||
231 | * So allocate a credit of 3. We may update | ||
232 | * quota (user and group). | ||
233 | */ | ||
234 | needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | ||
235 | |||
236 | if (ext4_journal_extend(handle, needed) != 0) | ||
237 | retval = ext4_journal_restart(handle, needed); | ||
238 | |||
239 | return retval; | ||
240 | } | ||
241 | |||
223 | static int free_dind_blocks(handle_t *handle, | 242 | static int free_dind_blocks(handle_t *handle, |
224 | struct inode *inode, __le32 i_data) | 243 | struct inode *inode, __le32 i_data) |
225 | { | 244 | { |
@@ -234,11 +253,14 @@ static int free_dind_blocks(handle_t *handle, | |||
234 | 253 | ||
235 | tmp_idata = (__le32 *)bh->b_data; | 254 | tmp_idata = (__le32 *)bh->b_data; |
236 | for (i = 0; i < max_entries; i++) { | 255 | for (i = 0; i < max_entries; i++) { |
237 | if (tmp_idata[i]) | 256 | if (tmp_idata[i]) { |
257 | extend_credit_for_blkdel(handle, inode); | ||
238 | ext4_free_blocks(handle, inode, | 258 | ext4_free_blocks(handle, inode, |
239 | le32_to_cpu(tmp_idata[i]), 1, 1); | 259 | le32_to_cpu(tmp_idata[i]), 1, 1); |
260 | } | ||
240 | } | 261 | } |
241 | put_bh(bh); | 262 | put_bh(bh); |
263 | extend_credit_for_blkdel(handle, inode); | ||
242 | ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); | 264 | ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); |
243 | return 0; | 265 | return 0; |
244 | } | 266 | } |
@@ -267,29 +289,32 @@ static int free_tind_blocks(handle_t *handle, | |||
267 | } | 289 | } |
268 | } | 290 | } |
269 | put_bh(bh); | 291 | put_bh(bh); |
292 | extend_credit_for_blkdel(handle, inode); | ||
270 | ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); | 293 | ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); |
271 | return 0; | 294 | return 0; |
272 | } | 295 | } |
273 | 296 | ||
274 | static int free_ind_block(handle_t *handle, struct inode *inode) | 297 | static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) |
275 | { | 298 | { |
276 | int retval; | 299 | int retval; |
277 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
278 | 300 | ||
279 | if (ei->i_data[EXT4_IND_BLOCK]) | 301 | /* ei->i_data[EXT4_IND_BLOCK] */ |
302 | if (i_data[0]) { | ||
303 | extend_credit_for_blkdel(handle, inode); | ||
280 | ext4_free_blocks(handle, inode, | 304 | ext4_free_blocks(handle, inode, |
281 | le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1); | 305 | le32_to_cpu(i_data[0]), 1, 1); |
306 | } | ||
282 | 307 | ||
283 | if (ei->i_data[EXT4_DIND_BLOCK]) { | 308 | /* ei->i_data[EXT4_DIND_BLOCK] */ |
284 | retval = free_dind_blocks(handle, inode, | 309 | if (i_data[1]) { |
285 | ei->i_data[EXT4_DIND_BLOCK]); | 310 | retval = free_dind_blocks(handle, inode, i_data[1]); |
286 | if (retval) | 311 | if (retval) |
287 | return retval; | 312 | return retval; |
288 | } | 313 | } |
289 | 314 | ||
290 | if (ei->i_data[EXT4_TIND_BLOCK]) { | 315 | /* ei->i_data[EXT4_TIND_BLOCK] */ |
291 | retval = free_tind_blocks(handle, inode, | 316 | if (i_data[2]) { |
292 | ei->i_data[EXT4_TIND_BLOCK]); | 317 | retval = free_tind_blocks(handle, inode, i_data[2]); |
293 | if (retval) | 318 | if (retval) |
294 | return retval; | 319 | return retval; |
295 | } | 320 | } |
@@ -297,15 +322,13 @@ static int free_ind_block(handle_t *handle, struct inode *inode) | |||
297 | } | 322 | } |
298 | 323 | ||
299 | static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | 324 | static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, |
300 | struct inode *tmp_inode, int retval) | 325 | struct inode *tmp_inode) |
301 | { | 326 | { |
327 | int retval; | ||
328 | __le32 i_data[3]; | ||
302 | struct ext4_inode_info *ei = EXT4_I(inode); | 329 | struct ext4_inode_info *ei = EXT4_I(inode); |
303 | struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); | 330 | struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); |
304 | 331 | ||
305 | retval = free_ind_block(handle, inode); | ||
306 | if (retval) | ||
307 | goto err_out; | ||
308 | |||
309 | /* | 332 | /* |
310 | * One credit accounted for writing the | 333 | * One credit accounted for writing the |
311 | * i_data field of the original inode | 334 | * i_data field of the original inode |
@@ -317,6 +340,11 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
317 | goto err_out; | 340 | goto err_out; |
318 | } | 341 | } |
319 | 342 | ||
343 | i_data[0] = ei->i_data[EXT4_IND_BLOCK]; | ||
344 | i_data[1] = ei->i_data[EXT4_DIND_BLOCK]; | ||
345 | i_data[2] = ei->i_data[EXT4_TIND_BLOCK]; | ||
346 | |||
347 | down_write(&EXT4_I(inode)->i_data_sem); | ||
320 | /* | 348 | /* |
321 | * We have the extent map build with the tmp inode. | 349 | * We have the extent map build with the tmp inode. |
322 | * Now copy the i_data across | 350 | * Now copy the i_data across |
@@ -336,8 +364,15 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
336 | spin_lock(&inode->i_lock); | 364 | spin_lock(&inode->i_lock); |
337 | inode->i_blocks += tmp_inode->i_blocks; | 365 | inode->i_blocks += tmp_inode->i_blocks; |
338 | spin_unlock(&inode->i_lock); | 366 | spin_unlock(&inode->i_lock); |
367 | up_write(&EXT4_I(inode)->i_data_sem); | ||
339 | 368 | ||
369 | /* | ||
370 | * We mark the inode dirty after, because we decrement the | ||
371 | * i_blocks when freeing the indirect meta-data blocks | ||
372 | */ | ||
373 | retval = free_ind_block(handle, inode, i_data); | ||
340 | ext4_mark_inode_dirty(handle, inode); | 374 | ext4_mark_inode_dirty(handle, inode); |
375 | |||
341 | err_out: | 376 | err_out: |
342 | return retval; | 377 | return retval; |
343 | } | 378 | } |
@@ -365,6 +400,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, | |||
365 | } | 400 | } |
366 | } | 401 | } |
367 | put_bh(bh); | 402 | put_bh(bh); |
403 | extend_credit_for_blkdel(handle, inode); | ||
368 | ext4_free_blocks(handle, inode, block, 1, 1); | 404 | ext4_free_blocks(handle, inode, block, 1, 1); |
369 | return retval; | 405 | return retval; |
370 | } | 406 | } |
@@ -420,7 +456,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, | |||
420 | */ | 456 | */ |
421 | return retval; | 457 | return retval; |
422 | 458 | ||
423 | down_write(&EXT4_I(inode)->i_data_sem); | ||
424 | handle = ext4_journal_start(inode, | 459 | handle = ext4_journal_start(inode, |
425 | EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + | 460 | EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + |
426 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + | 461 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + |
@@ -454,13 +489,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, | |||
454 | ext4_orphan_add(handle, tmp_inode); | 489 | ext4_orphan_add(handle, tmp_inode); |
455 | ext4_journal_stop(handle); | 490 | ext4_journal_stop(handle); |
456 | 491 | ||
457 | ei = EXT4_I(inode); | ||
458 | i_data = ei->i_data; | ||
459 | memset(&lb, 0, sizeof(lb)); | ||
460 | |||
461 | /* 32 bit block address 4 bytes */ | ||
462 | max_entries = inode->i_sb->s_blocksize >> 2; | ||
463 | |||
464 | /* | 492 | /* |
465 | * start with one credit accounted for | 493 | * start with one credit accounted for |
466 | * superblock modification. | 494 | * superblock modification. |
@@ -469,7 +497,20 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, | |||
469 | * trascation that created the inode. Later as and | 497 | * trascation that created the inode. Later as and |
470 | * when we add extents we extent the journal | 498 | * when we add extents we extent the journal |
471 | */ | 499 | */ |
500 | /* | ||
501 | * inode_mutex prevent write and truncate on the file. Read still goes | ||
502 | * through. We take i_data_sem in ext4_ext_swap_inode_data before we | ||
503 | * switch the inode format to prevent read. | ||
504 | */ | ||
505 | mutex_lock(&(inode->i_mutex)); | ||
472 | handle = ext4_journal_start(inode, 1); | 506 | handle = ext4_journal_start(inode, 1); |
507 | |||
508 | ei = EXT4_I(inode); | ||
509 | i_data = ei->i_data; | ||
510 | memset(&lb, 0, sizeof(lb)); | ||
511 | |||
512 | /* 32 bit block address 4 bytes */ | ||
513 | max_entries = inode->i_sb->s_blocksize >> 2; | ||
473 | for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { | 514 | for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { |
474 | if (i_data[i]) { | 515 | if (i_data[i]) { |
475 | retval = update_extent_range(handle, tmp_inode, | 516 | retval = update_extent_range(handle, tmp_inode, |
@@ -507,19 +548,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, | |||
507 | */ | 548 | */ |
508 | retval = finish_range(handle, tmp_inode, &lb); | 549 | retval = finish_range(handle, tmp_inode, &lb); |
509 | err_out: | 550 | err_out: |
510 | /* | ||
511 | * We are either freeing extent information or indirect | ||
512 | * blocks. During this we touch superblock, group descriptor | ||
513 | * and block bitmap. Later we mark the tmp_inode dirty | ||
514 | * via ext4_ext_tree_init. So allocate a credit of 4 | ||
515 | * We may update quota (user and group). | ||
516 | * | ||
517 | * FIXME!! we may be touching bitmaps in different block groups. | ||
518 | */ | ||
519 | if (ext4_journal_extend(handle, | ||
520 | 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0) | ||
521 | ext4_journal_restart(handle, | ||
522 | 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); | ||
523 | if (retval) | 551 | if (retval) |
524 | /* | 552 | /* |
525 | * Failure case delete the extent information with the | 553 | * Failure case delete the extent information with the |
@@ -528,7 +556,11 @@ err_out: | |||
528 | free_ext_block(handle, tmp_inode); | 556 | free_ext_block(handle, tmp_inode); |
529 | else | 557 | else |
530 | retval = ext4_ext_swap_inode_data(handle, inode, | 558 | retval = ext4_ext_swap_inode_data(handle, inode, |
531 | tmp_inode, retval); | 559 | tmp_inode); |
560 | |||
561 | /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ | ||
562 | if (ext4_journal_extend(handle, 1) != 0) | ||
563 | ext4_journal_restart(handle, 1); | ||
532 | 564 | ||
533 | /* | 565 | /* |
534 | * Mark the tmp_inode as of size zero | 566 | * Mark the tmp_inode as of size zero |
@@ -556,8 +588,7 @@ err_out: | |||
556 | tmp_inode->i_nlink = 0; | 588 | tmp_inode->i_nlink = 0; |
557 | 589 | ||
558 | ext4_journal_stop(handle); | 590 | ext4_journal_stop(handle); |
559 | 591 | mutex_unlock(&(inode->i_mutex)); | |
560 | up_write(&EXT4_I(inode)->i_data_sem); | ||
561 | 592 | ||
562 | if (tmp_inode) | 593 | if (tmp_inode) |
563 | iput(tmp_inode); | 594 | iput(tmp_inode); |