aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext3
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2010-07-12 15:04:31 -0400
committerJan Kara <jack@suse.cz>2010-07-21 10:04:26 -0400
commitf25f624263445785b94f39739a6339ba9ed3275d (patch)
tree8ec38c718b0af19d6e9cbbc9f368c43d7af14b9b /fs/ext3
parent4c4d3901225518ed1a4c938ba15ba09842a00770 (diff)
ext3: Avoid filesystem corruption after a crash under heavy delete load
It can happen that ext3_free_branches calls ext3_forget() for an indirect block in an earlier transaction than a transaction in which we clear pointer to this indirect block. Thus if we crash before a transaction clearing the block pointer is committed, we will see indirect block pointing to already freed blocks and complain during orphan list cleanup. The fix is simple: Make sure ext3_forget() is called in the transaction doing block pointer clearing. This is a backport of an ext4 fix by Amir G. <amir73il@users.sourceforge.net> Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/ext3')
-rw-r--r--fs/ext3/inode.c46
1 files changed, 25 insertions, 21 deletions
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a786db403efc..436e5bbccbc2 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2270,27 +2270,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2270 depth); 2270 depth);
2271 2271
2272 /* 2272 /*
2273 * We've probably journalled the indirect block several
2274 * times during the truncate. But it's no longer
2275 * needed and we now drop it from the transaction via
2276 * journal_revoke().
2277 *
2278 * That's easy if it's exclusively part of this
2279 * transaction. But if it's part of the committing
2280 * transaction then journal_forget() will simply
2281 * brelse() it. That means that if the underlying
2282 * block is reallocated in ext3_get_block(),
2283 * unmap_underlying_metadata() will find this block
2284 * and will try to get rid of it. damn, damn.
2285 *
2286 * If this block has already been committed to the
2287 * journal, a revoke record will be written. And
2288 * revoke records must be emitted *before* clearing
2289 * this block's bit in the bitmaps.
2290 */
2291 ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
2292
2293 /*
2294 * Everything below this this pointer has been 2273 * Everything below this this pointer has been
2295 * released. Now let this top-of-subtree go. 2274 * released. Now let this top-of-subtree go.
2296 * 2275 *
@@ -2313,6 +2292,31 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2313 truncate_restart_transaction(handle, inode); 2292 truncate_restart_transaction(handle, inode);
2314 } 2293 }
2315 2294
2295 /*
2296 * We've probably journalled the indirect block several
2297 * times during the truncate. But it's no longer
2298 * needed and we now drop it from the transaction via
2299 * journal_revoke().
2300 *
2301 * That's easy if it's exclusively part of this
2302 * transaction. But if it's part of the committing
2303 * transaction then journal_forget() will simply
2304 * brelse() it. That means that if the underlying
2305 * block is reallocated in ext3_get_block(),
2306 * unmap_underlying_metadata() will find this block
2307 * and will try to get rid of it. damn, damn. Thus
2308 * we don't allow a block to be reallocated until
2309 * a transaction freeing it has fully committed.
2310 *
2311 * We also have to make sure journal replay after a
2312 * crash does not overwrite non-journaled data blocks
2313 * with old metadata when the block got reallocated for
2314 * data. Thus we have to store a revoke record for a
2315 * block in the same transaction in which we free the
2316 * block.
2317 */
2318 ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
2319
2316 ext3_free_blocks(handle, inode, nr, 1); 2320 ext3_free_blocks(handle, inode, nr, 1);
2317 2321
2318 if (parent_bh) { 2322 if (parent_bh) {