diff options
author | Chris Mason <chris.mason@oracle.com> | 2009-04-24 14:39:24 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-04-24 15:46:05 -0400 |
commit | e980b50cda1610f1c17978d9b7fd311a9dd93877 (patch) | |
tree | a5ab72fa4b791758e25fd6ece6f1aedd29954e7b /fs/btrfs | |
parent | 9601e3f6336f6ca66929f451b1f66085e68e36e3 (diff) |
Btrfs: fix fallocate deadlock on inode extent lock
The btrfs fallocate call takes an extent lock on the entire range
being fallocated, and then runs through insert_reserved_extent on each
extent as they are allocated.
The problem with this is that btrfs_drop_extents may decide to try
and take the same extent lock fallocate was already holding. The solution
used here is to push down knowledge of the range that is already locked
going into btrfs_drop_extents.
It turns out that at least one other caller had the same bug.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/ctree.h | 3 | ||||
-rw-r--r-- | fs/btrfs/file.c | 11 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 27 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 3 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 2 |
5 files changed, 29 insertions, 17 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 213535f45da2..4414a5d9983a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -2177,7 +2177,8 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode); | |||
2177 | extern struct file_operations btrfs_file_operations; | 2177 | extern struct file_operations btrfs_file_operations; |
2178 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 2178 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
2179 | struct btrfs_root *root, struct inode *inode, | 2179 | struct btrfs_root *root, struct inode *inode, |
2180 | u64 start, u64 end, u64 inline_limit, u64 *hint_block); | 2180 | u64 start, u64 end, u64 locked_end, |
2181 | u64 inline_limit, u64 *hint_block); | ||
2181 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 2182 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
2182 | struct btrfs_root *root, | 2183 | struct btrfs_root *root, |
2183 | struct inode *inode, u64 start, u64 end); | 2184 | struct inode *inode, u64 start, u64 end); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 482f8db2cfd0..da3ed965c956 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -363,15 +363,16 @@ out: | |||
363 | */ | 363 | */ |
364 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 364 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
365 | struct btrfs_root *root, struct inode *inode, | 365 | struct btrfs_root *root, struct inode *inode, |
366 | u64 start, u64 end, u64 inline_limit, u64 *hint_byte) | 366 | u64 start, u64 end, u64 locked_end, |
367 | u64 inline_limit, u64 *hint_byte) | ||
367 | { | 368 | { |
368 | u64 extent_end = 0; | 369 | u64 extent_end = 0; |
369 | u64 locked_end = end; | ||
370 | u64 search_start = start; | 370 | u64 search_start = start; |
371 | u64 leaf_start; | 371 | u64 leaf_start; |
372 | u64 ram_bytes = 0; | 372 | u64 ram_bytes = 0; |
373 | u64 orig_parent = 0; | 373 | u64 orig_parent = 0; |
374 | u64 disk_bytenr = 0; | 374 | u64 disk_bytenr = 0; |
375 | u64 orig_locked_end = locked_end; | ||
375 | u8 compression; | 376 | u8 compression; |
376 | u8 encryption; | 377 | u8 encryption; |
377 | u16 other_encoding = 0; | 378 | u16 other_encoding = 0; |
@@ -684,9 +685,9 @@ next_slot: | |||
684 | } | 685 | } |
685 | out: | 686 | out: |
686 | btrfs_free_path(path); | 687 | btrfs_free_path(path); |
687 | if (locked_end > end) { | 688 | if (locked_end > orig_locked_end) { |
688 | unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, | 689 | unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, |
689 | GFP_NOFS); | 690 | locked_end - 1, GFP_NOFS); |
690 | } | 691 | } |
691 | btrfs_check_file(root, inode); | 692 | btrfs_check_file(root, inode); |
692 | return ret; | 693 | return ret; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 176b6cc28b1e..2fdb2995be64 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -234,7 +234,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
234 | } | 234 | } |
235 | 235 | ||
236 | ret = btrfs_drop_extents(trans, root, inode, start, | 236 | ret = btrfs_drop_extents(trans, root, inode, start, |
237 | aligned_end, start, &hint_byte); | 237 | aligned_end, aligned_end, start, &hint_byte); |
238 | BUG_ON(ret); | 238 | BUG_ON(ret); |
239 | 239 | ||
240 | if (isize > actual_end) | 240 | if (isize > actual_end) |
@@ -1439,6 +1439,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1439 | struct inode *inode, u64 file_pos, | 1439 | struct inode *inode, u64 file_pos, |
1440 | u64 disk_bytenr, u64 disk_num_bytes, | 1440 | u64 disk_bytenr, u64 disk_num_bytes, |
1441 | u64 num_bytes, u64 ram_bytes, | 1441 | u64 num_bytes, u64 ram_bytes, |
1442 | u64 locked_end, | ||
1442 | u8 compression, u8 encryption, | 1443 | u8 compression, u8 encryption, |
1443 | u16 other_encoding, int extent_type) | 1444 | u16 other_encoding, int extent_type) |
1444 | { | 1445 | { |
@@ -1455,7 +1456,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1455 | 1456 | ||
1456 | path->leave_spinning = 1; | 1457 | path->leave_spinning = 1; |
1457 | ret = btrfs_drop_extents(trans, root, inode, file_pos, | 1458 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
1458 | file_pos + num_bytes, file_pos, &hint); | 1459 | file_pos + num_bytes, locked_end, |
1460 | file_pos, &hint); | ||
1459 | BUG_ON(ret); | 1461 | BUG_ON(ret); |
1460 | 1462 | ||
1461 | ins.objectid = inode->i_ino; | 1463 | ins.objectid = inode->i_ino; |
@@ -1590,6 +1592,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1590 | ordered_extent->disk_len, | 1592 | ordered_extent->disk_len, |
1591 | ordered_extent->len, | 1593 | ordered_extent->len, |
1592 | ordered_extent->len, | 1594 | ordered_extent->len, |
1595 | ordered_extent->file_offset + | ||
1596 | ordered_extent->len, | ||
1593 | compressed, 0, 0, | 1597 | compressed, 0, 0, |
1594 | BTRFS_FILE_EXTENT_REG); | 1598 | BTRFS_FILE_EXTENT_REG); |
1595 | BUG_ON(ret); | 1599 | BUG_ON(ret); |
@@ -2877,6 +2881,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
2877 | err = btrfs_drop_extents(trans, root, inode, | 2881 | err = btrfs_drop_extents(trans, root, inode, |
2878 | cur_offset, | 2882 | cur_offset, |
2879 | cur_offset + hole_size, | 2883 | cur_offset + hole_size, |
2884 | block_end, | ||
2880 | cur_offset, &hint_byte); | 2885 | cur_offset, &hint_byte); |
2881 | if (err) | 2886 | if (err) |
2882 | break; | 2887 | break; |
@@ -4968,7 +4973,7 @@ out_fail: | |||
4968 | 4973 | ||
4969 | static int prealloc_file_range(struct btrfs_trans_handle *trans, | 4974 | static int prealloc_file_range(struct btrfs_trans_handle *trans, |
4970 | struct inode *inode, u64 start, u64 end, | 4975 | struct inode *inode, u64 start, u64 end, |
4971 | u64 alloc_hint, int mode) | 4976 | u64 locked_end, u64 alloc_hint, int mode) |
4972 | { | 4977 | { |
4973 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4978 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4974 | struct btrfs_key ins; | 4979 | struct btrfs_key ins; |
@@ -4989,7 +4994,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, | |||
4989 | ret = insert_reserved_file_extent(trans, inode, | 4994 | ret = insert_reserved_file_extent(trans, inode, |
4990 | cur_offset, ins.objectid, | 4995 | cur_offset, ins.objectid, |
4991 | ins.offset, ins.offset, | 4996 | ins.offset, ins.offset, |
4992 | ins.offset, 0, 0, 0, | 4997 | ins.offset, locked_end, |
4998 | 0, 0, 0, | ||
4993 | BTRFS_FILE_EXTENT_PREALLOC); | 4999 | BTRFS_FILE_EXTENT_PREALLOC); |
4994 | BUG_ON(ret); | 5000 | BUG_ON(ret); |
4995 | num_bytes -= ins.offset; | 5001 | num_bytes -= ins.offset; |
@@ -5018,6 +5024,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5018 | u64 alloc_start; | 5024 | u64 alloc_start; |
5019 | u64 alloc_end; | 5025 | u64 alloc_end; |
5020 | u64 alloc_hint = 0; | 5026 | u64 alloc_hint = 0; |
5027 | u64 locked_end; | ||
5021 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | 5028 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; |
5022 | struct extent_map *em; | 5029 | struct extent_map *em; |
5023 | struct btrfs_trans_handle *trans; | 5030 | struct btrfs_trans_handle *trans; |
@@ -5039,6 +5046,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5039 | goto out; | 5046 | goto out; |
5040 | } | 5047 | } |
5041 | 5048 | ||
5049 | locked_end = alloc_end - 1; | ||
5042 | while (1) { | 5050 | while (1) { |
5043 | struct btrfs_ordered_extent *ordered; | 5051 | struct btrfs_ordered_extent *ordered; |
5044 | 5052 | ||
@@ -5051,8 +5059,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5051 | /* the extent lock is ordered inside the running | 5059 | /* the extent lock is ordered inside the running |
5052 | * transaction | 5060 | * transaction |
5053 | */ | 5061 | */ |
5054 | lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, | 5062 | lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
5055 | alloc_end - 1, GFP_NOFS); | 5063 | GFP_NOFS); |
5056 | ordered = btrfs_lookup_first_ordered_extent(inode, | 5064 | ordered = btrfs_lookup_first_ordered_extent(inode, |
5057 | alloc_end - 1); | 5065 | alloc_end - 1); |
5058 | if (ordered && | 5066 | if (ordered && |
@@ -5060,7 +5068,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5060 | ordered->file_offset < alloc_end) { | 5068 | ordered->file_offset < alloc_end) { |
5061 | btrfs_put_ordered_extent(ordered); | 5069 | btrfs_put_ordered_extent(ordered); |
5062 | unlock_extent(&BTRFS_I(inode)->io_tree, | 5070 | unlock_extent(&BTRFS_I(inode)->io_tree, |
5063 | alloc_start, alloc_end - 1, GFP_NOFS); | 5071 | alloc_start, locked_end, GFP_NOFS); |
5064 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); | 5072 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); |
5065 | 5073 | ||
5066 | /* | 5074 | /* |
@@ -5085,7 +5093,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5085 | last_byte = (last_byte + mask) & ~mask; | 5093 | last_byte = (last_byte + mask) & ~mask; |
5086 | if (em->block_start == EXTENT_MAP_HOLE) { | 5094 | if (em->block_start == EXTENT_MAP_HOLE) { |
5087 | ret = prealloc_file_range(trans, inode, cur_offset, | 5095 | ret = prealloc_file_range(trans, inode, cur_offset, |
5088 | last_byte, alloc_hint, mode); | 5096 | last_byte, locked_end + 1, |
5097 | alloc_hint, mode); | ||
5089 | if (ret < 0) { | 5098 | if (ret < 0) { |
5090 | free_extent_map(em); | 5099 | free_extent_map(em); |
5091 | break; | 5100 | break; |
@@ -5101,7 +5110,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5101 | break; | 5110 | break; |
5102 | } | 5111 | } |
5103 | } | 5112 | } |
5104 | unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, | 5113 | unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
5105 | GFP_NOFS); | 5114 | GFP_NOFS); |
5106 | 5115 | ||
5107 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); | 5116 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7594bec1be10..f4e5d2e5ece6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -830,7 +830,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
830 | BUG_ON(!trans); | 830 | BUG_ON(!trans); |
831 | 831 | ||
832 | /* punch hole in destination first */ | 832 | /* punch hole in destination first */ |
833 | btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte); | 833 | btrfs_drop_extents(trans, root, inode, off, off + len, |
834 | off + len, 0, &hint_byte); | ||
834 | 835 | ||
835 | /* clone data */ | 836 | /* clone data */ |
836 | key.objectid = src->i_ino; | 837 | key.objectid = src->i_ino; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 25f20ea11f27..db5e212e8445 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -536,7 +536,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
536 | saved_nbytes = inode_get_bytes(inode); | 536 | saved_nbytes = inode_get_bytes(inode); |
537 | /* drop any overlapping extents */ | 537 | /* drop any overlapping extents */ |
538 | ret = btrfs_drop_extents(trans, root, inode, | 538 | ret = btrfs_drop_extents(trans, root, inode, |
539 | start, extent_end, start, &alloc_hint); | 539 | start, extent_end, extent_end, start, &alloc_hint); |
540 | BUG_ON(ret); | 540 | BUG_ON(ret); |
541 | 541 | ||
542 | if (found_type == BTRFS_FILE_EXTENT_REG || | 542 | if (found_type == BTRFS_FILE_EXTENT_REG || |