aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-04-24 14:39:24 -0400
committerChris Mason <chris.mason@oracle.com>2009-04-24 15:46:05 -0400
commite980b50cda1610f1c17978d9b7fd311a9dd93877 (patch)
treea5ab72fa4b791758e25fd6ece6f1aedd29954e7b
parent9601e3f6336f6ca66929f451b1f66085e68e36e3 (diff)
Btrfs: fix fallocate deadlock on inode extent lock
The btrfs fallocate call takes an extent lock on the entire range being fallocated, and then runs through insert_reserved_extent on each extent as they are allocated. The problem with this is that btrfs_drop_extents may decide to try and take the same extent lock fallocate was already holding. The solution used here is to push down knowledge of the range that is already locked going into btrfs_drop_extents. It turns out that at least one other caller had the same bug. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/file.c11
-rw-r--r--fs/btrfs/inode.c27
-rw-r--r--fs/btrfs/ioctl.c3
-rw-r--r--fs/btrfs/tree-log.c2
5 files changed, 29 insertions, 17 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 213535f45da2..4414a5d9983a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2177,7 +2177,8 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2177extern struct file_operations btrfs_file_operations; 2177extern struct file_operations btrfs_file_operations;
2178int btrfs_drop_extents(struct btrfs_trans_handle *trans, 2178int btrfs_drop_extents(struct btrfs_trans_handle *trans,
2179 struct btrfs_root *root, struct inode *inode, 2179 struct btrfs_root *root, struct inode *inode,
2180 u64 start, u64 end, u64 inline_limit, u64 *hint_block); 2180 u64 start, u64 end, u64 locked_end,
2181 u64 inline_limit, u64 *hint_block);
2181int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2182int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2182 struct btrfs_root *root, 2183 struct btrfs_root *root,
2183 struct inode *inode, u64 start, u64 end); 2184 struct inode *inode, u64 start, u64 end);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 482f8db2cfd0..da3ed965c956 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -363,15 +363,16 @@ out:
363 */ 363 */
364noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, 364noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
365 struct btrfs_root *root, struct inode *inode, 365 struct btrfs_root *root, struct inode *inode,
366 u64 start, u64 end, u64 inline_limit, u64 *hint_byte) 366 u64 start, u64 end, u64 locked_end,
367 u64 inline_limit, u64 *hint_byte)
367{ 368{
368 u64 extent_end = 0; 369 u64 extent_end = 0;
369 u64 locked_end = end;
370 u64 search_start = start; 370 u64 search_start = start;
371 u64 leaf_start; 371 u64 leaf_start;
372 u64 ram_bytes = 0; 372 u64 ram_bytes = 0;
373 u64 orig_parent = 0; 373 u64 orig_parent = 0;
374 u64 disk_bytenr = 0; 374 u64 disk_bytenr = 0;
375 u64 orig_locked_end = locked_end;
375 u8 compression; 376 u8 compression;
376 u8 encryption; 377 u8 encryption;
377 u16 other_encoding = 0; 378 u16 other_encoding = 0;
@@ -684,9 +685,9 @@ next_slot:
684 } 685 }
685out: 686out:
686 btrfs_free_path(path); 687 btrfs_free_path(path);
687 if (locked_end > end) { 688 if (locked_end > orig_locked_end) {
688 unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, 689 unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end,
689 GFP_NOFS); 690 locked_end - 1, GFP_NOFS);
690 } 691 }
691 btrfs_check_file(root, inode); 692 btrfs_check_file(root, inode);
692 return ret; 693 return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 176b6cc28b1e..2fdb2995be64 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -234,7 +234,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
234 } 234 }
235 235
236 ret = btrfs_drop_extents(trans, root, inode, start, 236 ret = btrfs_drop_extents(trans, root, inode, start,
237 aligned_end, start, &hint_byte); 237 aligned_end, aligned_end, start, &hint_byte);
238 BUG_ON(ret); 238 BUG_ON(ret);
239 239
240 if (isize > actual_end) 240 if (isize > actual_end)
@@ -1439,6 +1439,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1439 struct inode *inode, u64 file_pos, 1439 struct inode *inode, u64 file_pos,
1440 u64 disk_bytenr, u64 disk_num_bytes, 1440 u64 disk_bytenr, u64 disk_num_bytes,
1441 u64 num_bytes, u64 ram_bytes, 1441 u64 num_bytes, u64 ram_bytes,
1442 u64 locked_end,
1442 u8 compression, u8 encryption, 1443 u8 compression, u8 encryption,
1443 u16 other_encoding, int extent_type) 1444 u16 other_encoding, int extent_type)
1444{ 1445{
@@ -1455,7 +1456,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1455 1456
1456 path->leave_spinning = 1; 1457 path->leave_spinning = 1;
1457 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1458 ret = btrfs_drop_extents(trans, root, inode, file_pos,
1458 file_pos + num_bytes, file_pos, &hint); 1459 file_pos + num_bytes, locked_end,
1460 file_pos, &hint);
1459 BUG_ON(ret); 1461 BUG_ON(ret);
1460 1462
1461 ins.objectid = inode->i_ino; 1463 ins.objectid = inode->i_ino;
@@ -1590,6 +1592,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1590 ordered_extent->disk_len, 1592 ordered_extent->disk_len,
1591 ordered_extent->len, 1593 ordered_extent->len,
1592 ordered_extent->len, 1594 ordered_extent->len,
1595 ordered_extent->file_offset +
1596 ordered_extent->len,
1593 compressed, 0, 0, 1597 compressed, 0, 0,
1594 BTRFS_FILE_EXTENT_REG); 1598 BTRFS_FILE_EXTENT_REG);
1595 BUG_ON(ret); 1599 BUG_ON(ret);
@@ -2877,6 +2881,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
2877 err = btrfs_drop_extents(trans, root, inode, 2881 err = btrfs_drop_extents(trans, root, inode,
2878 cur_offset, 2882 cur_offset,
2879 cur_offset + hole_size, 2883 cur_offset + hole_size,
2884 block_end,
2880 cur_offset, &hint_byte); 2885 cur_offset, &hint_byte);
2881 if (err) 2886 if (err)
2882 break; 2887 break;
@@ -4968,7 +4973,7 @@ out_fail:
4968 4973
4969static int prealloc_file_range(struct btrfs_trans_handle *trans, 4974static int prealloc_file_range(struct btrfs_trans_handle *trans,
4970 struct inode *inode, u64 start, u64 end, 4975 struct inode *inode, u64 start, u64 end,
4971 u64 alloc_hint, int mode) 4976 u64 locked_end, u64 alloc_hint, int mode)
4972{ 4977{
4973 struct btrfs_root *root = BTRFS_I(inode)->root; 4978 struct btrfs_root *root = BTRFS_I(inode)->root;
4974 struct btrfs_key ins; 4979 struct btrfs_key ins;
@@ -4989,7 +4994,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
4989 ret = insert_reserved_file_extent(trans, inode, 4994 ret = insert_reserved_file_extent(trans, inode,
4990 cur_offset, ins.objectid, 4995 cur_offset, ins.objectid,
4991 ins.offset, ins.offset, 4996 ins.offset, ins.offset,
4992 ins.offset, 0, 0, 0, 4997 ins.offset, locked_end,
4998 0, 0, 0,
4993 BTRFS_FILE_EXTENT_PREALLOC); 4999 BTRFS_FILE_EXTENT_PREALLOC);
4994 BUG_ON(ret); 5000 BUG_ON(ret);
4995 num_bytes -= ins.offset; 5001 num_bytes -= ins.offset;
@@ -5018,6 +5024,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5018 u64 alloc_start; 5024 u64 alloc_start;
5019 u64 alloc_end; 5025 u64 alloc_end;
5020 u64 alloc_hint = 0; 5026 u64 alloc_hint = 0;
5027 u64 locked_end;
5021 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5028 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5022 struct extent_map *em; 5029 struct extent_map *em;
5023 struct btrfs_trans_handle *trans; 5030 struct btrfs_trans_handle *trans;
@@ -5039,6 +5046,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5039 goto out; 5046 goto out;
5040 } 5047 }
5041 5048
5049 locked_end = alloc_end - 1;
5042 while (1) { 5050 while (1) {
5043 struct btrfs_ordered_extent *ordered; 5051 struct btrfs_ordered_extent *ordered;
5044 5052
@@ -5051,8 +5059,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5051 /* the extent lock is ordered inside the running 5059 /* the extent lock is ordered inside the running
5052 * transaction 5060 * transaction
5053 */ 5061 */
5054 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, 5062 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5055 alloc_end - 1, GFP_NOFS); 5063 GFP_NOFS);
5056 ordered = btrfs_lookup_first_ordered_extent(inode, 5064 ordered = btrfs_lookup_first_ordered_extent(inode,
5057 alloc_end - 1); 5065 alloc_end - 1);
5058 if (ordered && 5066 if (ordered &&
@@ -5060,7 +5068,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5060 ordered->file_offset < alloc_end) { 5068 ordered->file_offset < alloc_end) {
5061 btrfs_put_ordered_extent(ordered); 5069 btrfs_put_ordered_extent(ordered);
5062 unlock_extent(&BTRFS_I(inode)->io_tree, 5070 unlock_extent(&BTRFS_I(inode)->io_tree,
5063 alloc_start, alloc_end - 1, GFP_NOFS); 5071 alloc_start, locked_end, GFP_NOFS);
5064 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5072 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5065 5073
5066 /* 5074 /*
@@ -5085,7 +5093,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5085 last_byte = (last_byte + mask) & ~mask; 5093 last_byte = (last_byte + mask) & ~mask;
5086 if (em->block_start == EXTENT_MAP_HOLE) { 5094 if (em->block_start == EXTENT_MAP_HOLE) {
5087 ret = prealloc_file_range(trans, inode, cur_offset, 5095 ret = prealloc_file_range(trans, inode, cur_offset,
5088 last_byte, alloc_hint, mode); 5096 last_byte, locked_end + 1,
5097 alloc_hint, mode);
5089 if (ret < 0) { 5098 if (ret < 0) {
5090 free_extent_map(em); 5099 free_extent_map(em);
5091 break; 5100 break;
@@ -5101,7 +5110,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5101 break; 5110 break;
5102 } 5111 }
5103 } 5112 }
5104 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, 5113 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5105 GFP_NOFS); 5114 GFP_NOFS);
5106 5115
5107 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5116 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7594bec1be10..f4e5d2e5ece6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -830,7 +830,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
830 BUG_ON(!trans); 830 BUG_ON(!trans);
831 831
832 /* punch hole in destination first */ 832 /* punch hole in destination first */
833 btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte); 833 btrfs_drop_extents(trans, root, inode, off, off + len,
834 off + len, 0, &hint_byte);
834 835
835 /* clone data */ 836 /* clone data */
836 key.objectid = src->i_ino; 837 key.objectid = src->i_ino;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 25f20ea11f27..db5e212e8445 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -536,7 +536,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
536 saved_nbytes = inode_get_bytes(inode); 536 saved_nbytes = inode_get_bytes(inode);
537 /* drop any overlapping extents */ 537 /* drop any overlapping extents */
538 ret = btrfs_drop_extents(trans, root, inode, 538 ret = btrfs_drop_extents(trans, root, inode,
539 start, extent_end, start, &alloc_hint); 539 start, extent_end, extent_end, start, &alloc_hint);
540 BUG_ON(ret); 540 BUG_ON(ret);
541 541
542 if (found_type == BTRFS_FILE_EXTENT_REG || 542 if (found_type == BTRFS_FILE_EXTENT_REG ||