diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-09-08 11:18:08 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:07 -0400 |
commit | 4bef084857ab8fe71cf49eae349c25e440a49150 (patch) | |
tree | 7a9a850515538421c2976f8ee4c1bea5ceced59c /fs/btrfs/transaction.c | |
parent | 325cd4bafeb6cfb44addd6e807a9b74170d1be31 (diff) |
Btrfs: Tree logging fixes
* Pin down data blocks to prevent them from being reallocated like so:
trans 1: allocate file extent
trans 2: free file extent
trans 3: free file extent during old snapshot deletion
trans 3: allocate file extent to new file
trans 3: fsync new file
Before the tree logging code, this was legal because the fsync
would commit the transation that did the final data extent free
and the transaction that allocated the extent to the new file
at the same time.
With the tree logging code, the tree log subtransaction can commit
before the transaction that freed the extent. If we crash,
we're left with two different files using the extent.
* Don't wait in start_transaction if log replay is going on. This
avoids deadlocks from iput while we're cleaning up link counts in the
replay code.
* Don't deadlock in replay_one_name by trying to read an inode off
the disk while holding paths for the directory
* Hold the buffer lock while we mark a buffer as written. This
closes a race where someone is changing a buffer while we write it.
They are supposed to mark it dirty again after they change it, but
this violates the cow rules.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r-- | fs/btrfs/transaction.c | 16 |
1 files changed, 13 insertions, 3 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 49c4f5b40ed6..61a377bcb2fb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -161,7 +161,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
161 | int ret; | 161 | int ret; |
162 | 162 | ||
163 | mutex_lock(&root->fs_info->trans_mutex); | 163 | mutex_lock(&root->fs_info->trans_mutex); |
164 | if ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2) | 164 | if (!root->fs_info->log_root_recovering && |
165 | ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)) | ||
165 | wait_current_trans(root); | 166 | wait_current_trans(root); |
166 | ret = join_transaction(root); | 167 | ret = join_transaction(root); |
167 | BUG_ON(ret); | 168 | BUG_ON(ret); |
@@ -328,9 +329,17 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | |||
328 | 329 | ||
329 | index = start >> PAGE_CACHE_SHIFT; | 330 | index = start >> PAGE_CACHE_SHIFT; |
330 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | 331 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; |
331 | page = find_lock_page(btree_inode->i_mapping, index); | 332 | page = find_get_page(btree_inode->i_mapping, index); |
332 | if (!page) | 333 | if (!page) |
333 | continue; | 334 | continue; |
335 | |||
336 | btree_lock_page_hook(page); | ||
337 | if (!page->mapping) { | ||
338 | unlock_page(page); | ||
339 | page_cache_release(page); | ||
340 | continue; | ||
341 | } | ||
342 | |||
334 | if (PageWriteback(page)) { | 343 | if (PageWriteback(page)) { |
335 | if (PageDirty(page)) | 344 | if (PageDirty(page)) |
336 | wait_on_page_writeback(page); | 345 | wait_on_page_writeback(page); |
@@ -360,7 +369,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | |||
360 | if (!page) | 369 | if (!page) |
361 | continue; | 370 | continue; |
362 | if (PageDirty(page)) { | 371 | if (PageDirty(page)) { |
363 | lock_page(page); | 372 | btree_lock_page_hook(page); |
373 | wait_on_page_writeback(page); | ||
364 | err = write_one_page(page, 0); | 374 | err = write_one_page(page, 0); |
365 | if (err) | 375 | if (err) |
366 | werr = err; | 376 | werr = err; |