diff options
| author | Jan Kara <jack@suse.cz> | 2010-07-12 15:04:31 -0400 |
|---|---|---|
| committer | Jan Kara <jack@suse.cz> | 2010-07-21 10:04:26 -0400 |
| commit | f25f624263445785b94f39739a6339ba9ed3275d (patch) | |
| tree | 8ec38c718b0af19d6e9cbbc9f368c43d7af14b9b /fs/ext3 | |
| parent | 4c4d3901225518ed1a4c938ba15ba09842a00770 (diff) | |
ext3: Avoid filesystem corruption after a crash under heavy delete load
It can happen that ext3_free_branches calls ext3_forget() for an indirect block
in an earlier transaction than a transaction in which we clear pointer to this
indirect block. Thus if we crash before a transaction clearing the block
pointer is committed, we will see indirect block pointing to already freed
blocks and complain during orphan list cleanup.
The fix is simple: Make sure ext3_forget() is called in the transaction
doing block pointer clearing.
This is a backport of an ext4 fix by Amir G. <amir73il@users.sourceforge.net>
Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/ext3')
| -rw-r--r-- | fs/ext3/inode.c | 46 |
1 files changed, 25 insertions, 21 deletions
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index a786db403efc..436e5bbccbc2 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
| @@ -2270,27 +2270,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, | |||
| 2270 | depth); | 2270 | depth); |
| 2271 | 2271 | ||
| 2272 | /* | 2272 | /* |
| 2273 | * We've probably journalled the indirect block several | ||
| 2274 | * times during the truncate. But it's no longer | ||
| 2275 | * needed and we now drop it from the transaction via | ||
| 2276 | * journal_revoke(). | ||
| 2277 | * | ||
| 2278 | * That's easy if it's exclusively part of this | ||
| 2279 | * transaction. But if it's part of the committing | ||
| 2280 | * transaction then journal_forget() will simply | ||
| 2281 | * brelse() it. That means that if the underlying | ||
| 2282 | * block is reallocated in ext3_get_block(), | ||
| 2283 | * unmap_underlying_metadata() will find this block | ||
| 2284 | * and will try to get rid of it. damn, damn. | ||
| 2285 | * | ||
| 2286 | * If this block has already been committed to the | ||
| 2287 | * journal, a revoke record will be written. And | ||
| 2288 | * revoke records must be emitted *before* clearing | ||
| 2289 | * this block's bit in the bitmaps. | ||
| 2290 | */ | ||
| 2291 | ext3_forget(handle, 1, inode, bh, bh->b_blocknr); | ||
| 2292 | |||
| 2293 | /* | ||
| 2294 | * Everything below this this pointer has been | 2273 | * Everything below this this pointer has been |
| 2295 | * released. Now let this top-of-subtree go. | 2274 | * released. Now let this top-of-subtree go. |
| 2296 | * | 2275 | * |
| @@ -2313,6 +2292,31 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, | |||
| 2313 | truncate_restart_transaction(handle, inode); | 2292 | truncate_restart_transaction(handle, inode); |
| 2314 | } | 2293 | } |
| 2315 | 2294 | ||
| 2295 | /* | ||
| 2296 | * We've probably journalled the indirect block several | ||
| 2297 | * times during the truncate. But it's no longer | ||
| 2298 | * needed and we now drop it from the transaction via | ||
| 2299 | * journal_revoke(). | ||
| 2300 | * | ||
| 2301 | * That's easy if it's exclusively part of this | ||
| 2302 | * transaction. But if it's part of the committing | ||
| 2303 | * transaction then journal_forget() will simply | ||
| 2304 | * brelse() it. That means that if the underlying | ||
| 2305 | * block is reallocated in ext3_get_block(), | ||
| 2306 | * unmap_underlying_metadata() will find this block | ||
| 2307 | * and will try to get rid of it. damn, damn. Thus | ||
| 2308 | * we don't allow a block to be reallocated until | ||
| 2309 | * a transaction freeing it has fully committed. | ||
| 2310 | * | ||
| 2311 | * We also have to make sure journal replay after a | ||
| 2312 | * crash does not overwrite non-journaled data blocks | ||
| 2313 | * with old metadata when the block got reallocated for | ||
| 2314 | * data. Thus we have to store a revoke record for a | ||
| 2315 | * block in the same transaction in which we free the | ||
| 2316 | * block. | ||
| 2317 | */ | ||
| 2318 | ext3_forget(handle, 1, inode, bh, bh->b_blocknr); | ||
| 2319 | |||
| 2316 | ext3_free_blocks(handle, inode, nr, 1); | 2320 | ext3_free_blocks(handle, inode, nr, 1); |
| 2317 | 2321 | ||
| 2318 | if (parent_bh) { | 2322 | if (parent_bh) { |
