diff options
author | Chris Mason <chris.mason@oracle.com> | 2009-04-21 11:53:38 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-04-21 12:45:12 -0400 |
commit | 546888da82082555a56528730a83f0afd12f33bf (patch) | |
tree | 98ee868d1b8a4bd390a980fed707f91419b79fb5 /fs | |
parent | 8c594ea81d7abbbffdda447b127f8ba8d76f319d (diff) |
Btrfs: fix btrfs fallocate oops and deadlock
Btrfs fallocate was incorrectly starting a transaction with a lock held
on the extent_io tree for the file, which could deadlock. Strictly
speaking it was using join_transaction which would be safe, but it is better
to move the transaction outside of the lock.
When preallocated extents are overwritten, btrfs_mark_buffer_dirty was
being called on an unlocked buffer. This was triggering an assertion and
oops because the lock is supposed to be held.
The bug was calling btrfs_mark_buffer_dirty on a leaf after btrfs_del_item had
been run. btrfs_del_item takes care of dirtying things, so the solution is a
to skip the btrfs_mark_buffer_dirty call in this case.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/file.c | 4 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 36 |
2 files changed, 31 insertions, 9 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e21c0060ee73..482f8db2cfd0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -830,7 +830,7 @@ again: | |||
830 | 830 | ||
831 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); | 831 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); |
832 | BUG_ON(ret); | 832 | BUG_ON(ret); |
833 | goto done; | 833 | goto release; |
834 | } else if (split == start) { | 834 | } else if (split == start) { |
835 | if (locked_end < extent_end) { | 835 | if (locked_end < extent_end) { |
836 | ret = try_lock_extent(&BTRFS_I(inode)->io_tree, | 836 | ret = try_lock_extent(&BTRFS_I(inode)->io_tree, |
@@ -926,6 +926,8 @@ again: | |||
926 | } | 926 | } |
927 | done: | 927 | done: |
928 | btrfs_mark_buffer_dirty(leaf); | 928 | btrfs_mark_buffer_dirty(leaf); |
929 | |||
930 | release: | ||
929 | btrfs_release_path(root, path); | 931 | btrfs_release_path(root, path); |
930 | if (split_end && split == start) { | 932 | if (split_end && split == start) { |
931 | split = end; | 933 | split = end; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a0d1dd492a58..65219f6a16a1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -4970,10 +4970,10 @@ out_fail: | |||
4970 | return err; | 4970 | return err; |
4971 | } | 4971 | } |
4972 | 4972 | ||
4973 | static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | 4973 | static int prealloc_file_range(struct btrfs_trans_handle *trans, |
4974 | struct inode *inode, u64 start, u64 end, | ||
4974 | u64 alloc_hint, int mode) | 4975 | u64 alloc_hint, int mode) |
4975 | { | 4976 | { |
4976 | struct btrfs_trans_handle *trans; | ||
4977 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4977 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4978 | struct btrfs_key ins; | 4978 | struct btrfs_key ins; |
4979 | u64 alloc_size; | 4979 | u64 alloc_size; |
@@ -4981,10 +4981,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
4981 | u64 num_bytes = end - start; | 4981 | u64 num_bytes = end - start; |
4982 | int ret = 0; | 4982 | int ret = 0; |
4983 | 4983 | ||
4984 | trans = btrfs_join_transaction(root, 1); | ||
4985 | BUG_ON(!trans); | ||
4986 | btrfs_set_trans_block_group(trans, inode); | ||
4987 | |||
4988 | while (num_bytes > 0) { | 4984 | while (num_bytes > 0) { |
4989 | alloc_size = min(num_bytes, root->fs_info->max_extent); | 4985 | alloc_size = min(num_bytes, root->fs_info->max_extent); |
4990 | ret = btrfs_reserve_extent(trans, root, alloc_size, | 4986 | ret = btrfs_reserve_extent(trans, root, alloc_size, |
@@ -5015,7 +5011,6 @@ out: | |||
5015 | BUG_ON(ret); | 5011 | BUG_ON(ret); |
5016 | } | 5012 | } |
5017 | 5013 | ||
5018 | btrfs_end_transaction(trans, root); | ||
5019 | return ret; | 5014 | return ret; |
5020 | } | 5015 | } |
5021 | 5016 | ||
@@ -5029,11 +5024,18 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5029 | u64 alloc_hint = 0; | 5024 | u64 alloc_hint = 0; |
5030 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | 5025 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; |
5031 | struct extent_map *em; | 5026 | struct extent_map *em; |
5027 | struct btrfs_trans_handle *trans; | ||
5032 | int ret; | 5028 | int ret; |
5033 | 5029 | ||
5034 | alloc_start = offset & ~mask; | 5030 | alloc_start = offset & ~mask; |
5035 | alloc_end = (offset + len + mask) & ~mask; | 5031 | alloc_end = (offset + len + mask) & ~mask; |
5036 | 5032 | ||
5033 | /* | ||
5034 | * wait for ordered IO before we have any locks. We'll loop again | ||
5035 | * below with the locks held. | ||
5036 | */ | ||
5037 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
5038 | |||
5037 | mutex_lock(&inode->i_mutex); | 5039 | mutex_lock(&inode->i_mutex); |
5038 | if (alloc_start > inode->i_size) { | 5040 | if (alloc_start > inode->i_size) { |
5039 | ret = btrfs_cont_expand(inode, alloc_start); | 5041 | ret = btrfs_cont_expand(inode, alloc_start); |
@@ -5043,6 +5045,16 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5043 | 5045 | ||
5044 | while (1) { | 5046 | while (1) { |
5045 | struct btrfs_ordered_extent *ordered; | 5047 | struct btrfs_ordered_extent *ordered; |
5048 | |||
5049 | trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); | ||
5050 | if (!trans) { | ||
5051 | ret = -EIO; | ||
5052 | goto out; | ||
5053 | } | ||
5054 | |||
5055 | /* the extent lock is ordered inside the running | ||
5056 | * transaction | ||
5057 | */ | ||
5046 | lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, | 5058 | lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, |
5047 | alloc_end - 1, GFP_NOFS); | 5059 | alloc_end - 1, GFP_NOFS); |
5048 | ordered = btrfs_lookup_first_ordered_extent(inode, | 5060 | ordered = btrfs_lookup_first_ordered_extent(inode, |
@@ -5053,6 +5065,12 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5053 | btrfs_put_ordered_extent(ordered); | 5065 | btrfs_put_ordered_extent(ordered); |
5054 | unlock_extent(&BTRFS_I(inode)->io_tree, | 5066 | unlock_extent(&BTRFS_I(inode)->io_tree, |
5055 | alloc_start, alloc_end - 1, GFP_NOFS); | 5067 | alloc_start, alloc_end - 1, GFP_NOFS); |
5068 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); | ||
5069 | |||
5070 | /* | ||
5071 | * we can't wait on the range with the transaction | ||
5072 | * running or with the extent lock held | ||
5073 | */ | ||
5056 | btrfs_wait_ordered_range(inode, alloc_start, | 5074 | btrfs_wait_ordered_range(inode, alloc_start, |
5057 | alloc_end - alloc_start); | 5075 | alloc_end - alloc_start); |
5058 | } else { | 5076 | } else { |
@@ -5070,7 +5088,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5070 | last_byte = min(extent_map_end(em), alloc_end); | 5088 | last_byte = min(extent_map_end(em), alloc_end); |
5071 | last_byte = (last_byte + mask) & ~mask; | 5089 | last_byte = (last_byte + mask) & ~mask; |
5072 | if (em->block_start == EXTENT_MAP_HOLE) { | 5090 | if (em->block_start == EXTENT_MAP_HOLE) { |
5073 | ret = prealloc_file_range(inode, cur_offset, | 5091 | ret = prealloc_file_range(trans, inode, cur_offset, |
5074 | last_byte, alloc_hint, mode); | 5092 | last_byte, alloc_hint, mode); |
5075 | if (ret < 0) { | 5093 | if (ret < 0) { |
5076 | free_extent_map(em); | 5094 | free_extent_map(em); |
@@ -5089,6 +5107,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5089 | } | 5107 | } |
5090 | unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, | 5108 | unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, |
5091 | GFP_NOFS); | 5109 | GFP_NOFS); |
5110 | |||
5111 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); | ||
5092 | out: | 5112 | out: |
5093 | mutex_unlock(&inode->i_mutex); | 5113 | mutex_unlock(&inode->i_mutex); |
5094 | return ret; | 5114 | return ret; |