aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan Zheng <zheng.yan@oracle.com>2008-10-30 14:19:50 -0400
committerChris Mason <chris.mason@oracle.com>2008-10-30 14:19:50 -0400
commit6643558db29006825dbb10012b3f8890aca4bcd5 (patch)
tree0c0f4f7a0011749cda998431828cb9161747b51a
parent9036c10208e1fc496cef7692ba66a78699b360dc (diff)
Btrfs: Fix bookend extent race v2
When dropping middle part of an extent, btrfs_drop_extents truncates the extent at first, then inserts a bookend extent. Since truncation and insertion can't be done atomically, there is a small period that the bookend extent isn't in the tree. This causes problem for functions that search the tree for file extent item. The way to fix this is lock the range of the bookend extent before truncation. Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
-rw-r--r--fs/btrfs/extent-tree.c28
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/file.c31
-rw-r--r--fs/btrfs/inode.c10
4 files changed, 43 insertions, 32 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fada9c22a021..535cee47fcfb 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3379,11 +3379,13 @@ static int noinline relocate_data_extent(struct inode *reloc_inode,
3379 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 3379 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
3380 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; 3380 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
3381 struct extent_map *em; 3381 struct extent_map *em;
3382 u64 start = extent_key->objectid - offset;
3383 u64 end = start + extent_key->offset - 1;
3382 3384
3383 em = alloc_extent_map(GFP_NOFS); 3385 em = alloc_extent_map(GFP_NOFS);
3384 BUG_ON(!em || IS_ERR(em)); 3386 BUG_ON(!em || IS_ERR(em));
3385 3387
3386 em->start = extent_key->objectid - offset; 3388 em->start = start;
3387 em->len = extent_key->offset; 3389 em->len = extent_key->offset;
3388 em->block_len = extent_key->offset; 3390 em->block_len = extent_key->offset;
3389 em->block_start = extent_key->objectid; 3391 em->block_start = extent_key->objectid;
@@ -3391,7 +3393,7 @@ static int noinline relocate_data_extent(struct inode *reloc_inode,
3391 set_bit(EXTENT_FLAG_PINNED, &em->flags); 3393 set_bit(EXTENT_FLAG_PINNED, &em->flags);
3392 3394
3393 /* setup extent map to cheat btrfs_readpage */ 3395 /* setup extent map to cheat btrfs_readpage */
3394 mutex_lock(&BTRFS_I(reloc_inode)->extent_mutex); 3396 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
3395 while (1) { 3397 while (1) {
3396 int ret; 3398 int ret;
3397 spin_lock(&em_tree->lock); 3399 spin_lock(&em_tree->lock);
@@ -3401,13 +3403,11 @@ static int noinline relocate_data_extent(struct inode *reloc_inode,
3401 free_extent_map(em); 3403 free_extent_map(em);
3402 break; 3404 break;
3403 } 3405 }
3404 btrfs_drop_extent_cache(reloc_inode, em->start, 3406 btrfs_drop_extent_cache(reloc_inode, start, end, 0);
3405 em->start + em->len - 1, 0);
3406 } 3407 }
3407 mutex_unlock(&BTRFS_I(reloc_inode)->extent_mutex); 3408 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
3408 3409
3409 return relocate_inode_pages(reloc_inode, extent_key->objectid - offset, 3410 return relocate_inode_pages(reloc_inode, start, extent_key->offset);
3410 extent_key->offset);
3411} 3411}
3412 3412
3413struct btrfs_ref_path { 3413struct btrfs_ref_path {
@@ -3831,7 +3831,6 @@ next:
3831 * the file extent item was modified by someone 3831 * the file extent item was modified by someone
3832 * before the extent got locked. 3832 * before the extent got locked.
3833 */ 3833 */
3834 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
3835 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 3834 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
3836 lock_end, GFP_NOFS); 3835 lock_end, GFP_NOFS);
3837 extent_locked = 0; 3836 extent_locked = 0;
@@ -3896,8 +3895,12 @@ next:
3896 lock_start = key.offset; 3895 lock_start = key.offset;
3897 lock_end = lock_start + num_bytes - 1; 3896 lock_end = lock_start + num_bytes - 1;
3898 } else { 3897 } else {
3899 BUG_ON(lock_start != key.offset); 3898 if (lock_start > key.offset ||
3900 BUG_ON(lock_end - lock_start + 1 < num_bytes); 3899 lock_end + 1 < key.offset + num_bytes) {
3900 unlock_extent(&BTRFS_I(inode)->io_tree,
3901 lock_start, lock_end, GFP_NOFS);
3902 extent_locked = 0;
3903 }
3901 } 3904 }
3902 3905
3903 if (!inode) { 3906 if (!inode) {
@@ -3951,7 +3954,6 @@ next:
3951 if (ordered) 3954 if (ordered)
3952 btrfs_put_ordered_extent(ordered); 3955 btrfs_put_ordered_extent(ordered);
3953 3956
3954 mutex_lock(&BTRFS_I(inode)->extent_mutex);
3955 extent_locked = 1; 3957 extent_locked = 1;
3956 continue; 3958 continue;
3957 } 3959 }
@@ -4073,7 +4075,6 @@ next:
4073 } 4075 }
4074 4076
4075 if (extent_locked) { 4077 if (extent_locked) {
4076 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
4077 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 4078 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4078 lock_end, GFP_NOFS); 4079 lock_end, GFP_NOFS);
4079 extent_locked = 0; 4080 extent_locked = 0;
@@ -4091,7 +4092,6 @@ out:
4091 if (inode) { 4092 if (inode) {
4092 mutex_unlock(&inode->i_mutex); 4093 mutex_unlock(&inode->i_mutex);
4093 if (extent_locked) { 4094 if (extent_locked) {
4094 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
4095 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 4095 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4096 lock_end, GFP_NOFS); 4096 lock_end, GFP_NOFS);
4097 } 4097 }
@@ -4180,10 +4180,8 @@ static int noinline invalidate_extent_cache(struct btrfs_root *root,
4180 4180
4181 lock_extent(&BTRFS_I(inode)->io_tree, key.offset, 4181 lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4182 key.offset + num_bytes - 1, GFP_NOFS); 4182 key.offset + num_bytes - 1, GFP_NOFS);
4183 mutex_lock(&BTRFS_I(inode)->extent_mutex);
4184 btrfs_drop_extent_cache(inode, key.offset, 4183 btrfs_drop_extent_cache(inode, key.offset,
4185 key.offset + num_bytes - 1, 1); 4184 key.offset + num_bytes - 1, 1);
4186 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
4187 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, 4185 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4188 key.offset + num_bytes - 1, GFP_NOFS); 4186 key.offset + num_bytes - 1, GFP_NOFS);
4189 cond_resched(); 4187 cond_resched();
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 7503bd46819b..65a0583027e9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -946,8 +946,12 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
946 946
947 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 947 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
948 &failed_start, mask); 948 &failed_start, mask);
949 if (err == -EEXIST) 949 if (err == -EEXIST) {
950 if (failed_start > start)
951 clear_extent_bit(tree, start, failed_start - 1,
952 EXTENT_LOCKED, 1, 0, mask);
950 return 0; 953 return 0;
954 }
951 return 1; 955 return 1;
952} 956}
953EXPORT_SYMBOL(try_lock_extent); 957EXPORT_SYMBOL(try_lock_extent);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b8a7637e14a1..1a0510ad030c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -364,6 +364,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
364 u64 start, u64 end, u64 inline_limit, u64 *hint_byte) 364 u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
365{ 365{
366 u64 extent_end = 0; 366 u64 extent_end = 0;
367 u64 locked_end = end;
367 u64 search_start = start; 368 u64 search_start = start;
368 u64 leaf_start; 369 u64 leaf_start;
369 u64 ram_bytes = 0; 370 u64 ram_bytes = 0;
@@ -479,12 +480,6 @@ next_slot:
479 goto next_slot; 480 goto next_slot;
480 } 481 }
481 482
482 if (found_inline) {
483 u64 mask = root->sectorsize - 1;
484 search_start = (extent_end + mask) & ~mask;
485 } else
486 search_start = extent_end;
487
488 if (end <= extent_end && start >= key.offset && found_inline) 483 if (end <= extent_end && start >= key.offset && found_inline)
489 *hint_byte = EXTENT_MAP_INLINE; 484 *hint_byte = EXTENT_MAP_INLINE;
490 485
@@ -501,6 +496,26 @@ next_slot:
501 if (found_inline && start <= key.offset) 496 if (found_inline && start <= key.offset)
502 keep = 1; 497 keep = 1;
503 } 498 }
499
500 if (bookend && found_extent && locked_end < extent_end) {
501 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
502 locked_end, extent_end - 1, GFP_NOFS);
503 if (!ret) {
504 btrfs_release_path(root, path);
505 lock_extent(&BTRFS_I(inode)->io_tree,
506 locked_end, extent_end - 1, GFP_NOFS);
507 locked_end = extent_end;
508 continue;
509 }
510 locked_end = extent_end;
511 }
512
513 if (found_inline) {
514 u64 mask = root->sectorsize - 1;
515 search_start = (extent_end + mask) & ~mask;
516 } else
517 search_start = extent_end;
518
504 /* truncate existing extent */ 519 /* truncate existing extent */
505 if (start > key.offset) { 520 if (start > key.offset) {
506 u64 new_num; 521 u64 new_num;
@@ -638,6 +653,10 @@ next_slot:
638 } 653 }
639out: 654out:
640 btrfs_free_path(path); 655 btrfs_free_path(path);
656 if (locked_end > end) {
657 unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
658 GFP_NOFS);
659 }
641 btrfs_check_file(root, inode); 660 btrfs_check_file(root, inode);
642 return ret; 661 return ret;
643} 662}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8254d6fa6910..e8511d14b119 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -246,7 +246,6 @@ static int cow_file_range_inline(struct btrfs_trans_handle *trans,
246 return 1; 246 return 1;
247 } 247 }
248 248
249 mutex_lock(&BTRFS_I(inode)->extent_mutex);
250 ret = btrfs_drop_extents(trans, root, inode, start, 249 ret = btrfs_drop_extents(trans, root, inode, start,
251 aligned_end, aligned_end, &hint_byte); 250 aligned_end, aligned_end, &hint_byte);
252 BUG_ON(ret); 251 BUG_ON(ret);
@@ -258,7 +257,6 @@ static int cow_file_range_inline(struct btrfs_trans_handle *trans,
258 compressed_pages); 257 compressed_pages);
259 BUG_ON(ret); 258 BUG_ON(ret);
260 btrfs_drop_extent_cache(inode, start, aligned_end, 0); 259 btrfs_drop_extent_cache(inode, start, aligned_end, 0);
261 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
262 return 0; 260 return 0;
263} 261}
264 262
@@ -437,9 +435,7 @@ again:
437 BUG_ON(disk_num_bytes > 435 BUG_ON(disk_num_bytes >
438 btrfs_super_total_bytes(&root->fs_info->super_copy)); 436 btrfs_super_total_bytes(&root->fs_info->super_copy));
439 437
440 mutex_lock(&BTRFS_I(inode)->extent_mutex);
441 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); 438 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
442 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
443 439
444 while(disk_num_bytes > 0) { 440 while(disk_num_bytes > 0) {
445 unsigned long min_bytes; 441 unsigned long min_bytes;
@@ -477,8 +473,6 @@ again:
477 em->block_start = ins.objectid; 473 em->block_start = ins.objectid;
478 em->block_len = ins.offset; 474 em->block_len = ins.offset;
479 em->bdev = root->fs_info->fs_devices->latest_bdev; 475 em->bdev = root->fs_info->fs_devices->latest_bdev;
480
481 mutex_lock(&BTRFS_I(inode)->extent_mutex);
482 set_bit(EXTENT_FLAG_PINNED, &em->flags); 476 set_bit(EXTENT_FLAG_PINNED, &em->flags);
483 477
484 if (will_compress) 478 if (will_compress)
@@ -495,7 +489,6 @@ again:
495 btrfs_drop_extent_cache(inode, start, 489 btrfs_drop_extent_cache(inode, start,
496 start + ram_size - 1, 0); 490 start + ram_size - 1, 0);
497 } 491 }
498 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
499 492
500 cur_alloc_size = ins.offset; 493 cur_alloc_size = ins.offset;
501 ret = btrfs_add_ordered_extent(inode, start, ins.objectid, 494 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
@@ -1016,8 +1009,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1016 1009
1017 INIT_LIST_HEAD(&list); 1010 INIT_LIST_HEAD(&list);
1018 1011
1019 mutex_lock(&BTRFS_I(inode)->extent_mutex);
1020
1021 ret = btrfs_drop_extents(trans, root, inode, 1012 ret = btrfs_drop_extents(trans, root, inode,
1022 ordered_extent->file_offset, 1013 ordered_extent->file_offset,
1023 ordered_extent->file_offset + 1014 ordered_extent->file_offset +
@@ -1059,7 +1050,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1059 btrfs_drop_extent_cache(inode, ordered_extent->file_offset, 1050 btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
1060 ordered_extent->file_offset + 1051 ordered_extent->file_offset +
1061 ordered_extent->len - 1, 0); 1052 ordered_extent->len - 1, 0);
1062 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
1063 1053
1064 ins.objectid = ordered_extent->start; 1054 ins.objectid = ordered_extent->start;
1065 ins.offset = ordered_extent->disk_len; 1055 ins.offset = ordered_extent->disk_len;