aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAmir G <amir73il@users.sourceforge.net>2010-07-27 11:56:05 -0400
committerTheodore Ts'o <tytso@mit.edu>2010-07-27 11:56:05 -0400
commit40389687382bf0ae71458e7c0f828137a438a956 (patch)
treed2a49d494754e7fa28d33c2389f78a5b5da0db4f
parenta271fe8527fe9637bdd82c97123b1356940dd84b (diff)
ext4: Fix block bitmap inconsistencies after a crash when deleting files
We have experienced bitmap inconsistencies after crash during file delete under heavy load. The crash is not file system related and I the following patch in ext4_free_branches() fixes the recovery problem. If the transaction is restarted and there is a crash before the new transaction is committed, then after recovery, the blocks that this indirect block points to have been freed, but the indirect block itself has not been freed and may still point to some of the free blocks (because of the ext4_forget()). So ext4_forget() should be called inside ext4_free_blocks() to avoid this problem. Signed-off-by: Amir Goldstein <amir73il@users.sf.net> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/inode.c35
1 files changed, 13 insertions, 22 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 755ba8682233..699d1d01c5df 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4490,27 +4490,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4490 depth); 4490 depth);
4491 4491
4492 /* 4492 /*
4493 * We've probably journalled the indirect block several
4494 * times during the truncate. But it's no longer
4495 * needed and we now drop it from the transaction via
4496 * jbd2_journal_revoke().
4497 *
4498 * That's easy if it's exclusively part of this
4499 * transaction. But if it's part of the committing
4500 * transaction then jbd2_journal_forget() will simply
4501 * brelse() it. That means that if the underlying
4502 * block is reallocated in ext4_get_block(),
4503 * unmap_underlying_metadata() will find this block
4504 * and will try to get rid of it. damn, damn.
4505 *
4506 * If this block has already been committed to the
4507 * journal, a revoke record will be written. And
4508 * revoke records must be emitted *before* clearing
4509 * this block's bit in the bitmaps.
4510 */
4511 ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
4512
4513 /*
4514 * Everything below this this pointer has been 4493 * Everything below this this pointer has been
4515 * released. Now let this top-of-subtree go. 4494 * released. Now let this top-of-subtree go.
4516 * 4495 *
@@ -4534,8 +4513,20 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4534 blocks_for_truncate(inode)); 4513 blocks_for_truncate(inode));
4535 } 4514 }
4536 4515
4516 /*
4517 * The forget flag here is critical because if
4518 * we are journaling (and not doing data
4519 * journaling), we have to make sure a revoke
4520 * record is written to prevent the journal
4521 * replay from overwriting the (former)
4522 * indirect block if it gets reallocated as a
4523 * data block. This must happen in the same
4524 * transaction where the data blocks are
4525 * actually freed.
4526 */
4537 ext4_free_blocks(handle, inode, 0, nr, 1, 4527 ext4_free_blocks(handle, inode, 0, nr, 1,
4538 EXT4_FREE_BLOCKS_METADATA); 4528 EXT4_FREE_BLOCKS_METADATA|
4529 EXT4_FREE_BLOCKS_FORGET);
4539 4530
4540 if (parent_bh) { 4531 if (parent_bh) {
4541 /* 4532 /*