diff options
author | Yan Zheng <zheng.yan@oracle.com> | 2008-10-30 14:19:50 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-10-30 14:19:50 -0400 |
commit | 6643558db29006825dbb10012b3f8890aca4bcd5 (patch) | |
tree | 0c0f4f7a0011749cda998431828cb9161747b51a /fs/btrfs | |
parent | 9036c10208e1fc496cef7692ba66a78699b360dc (diff) |
Btrfs: Fix bookend extent race v2
When dropping middle part of an extent, btrfs_drop_extents truncates
the extent at first, then inserts a bookend extent.
Since truncation and insertion can't be done atomically, there is a small
period that the bookend extent isn't in the tree. This causes problem for
functions that search the tree for file extent item. The way to fix this is
lock the range of the bookend extent before truncation.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/extent-tree.c | 28 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 6 | ||||
-rw-r--r-- | fs/btrfs/file.c | 31 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 10 |
4 files changed, 43 insertions, 32 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fada9c22a021..535cee47fcfb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3379,11 +3379,13 @@ static int noinline relocate_data_extent(struct inode *reloc_inode, | |||
3379 | struct btrfs_root *root = BTRFS_I(reloc_inode)->root; | 3379 | struct btrfs_root *root = BTRFS_I(reloc_inode)->root; |
3380 | struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; | 3380 | struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; |
3381 | struct extent_map *em; | 3381 | struct extent_map *em; |
3382 | u64 start = extent_key->objectid - offset; | ||
3383 | u64 end = start + extent_key->offset - 1; | ||
3382 | 3384 | ||
3383 | em = alloc_extent_map(GFP_NOFS); | 3385 | em = alloc_extent_map(GFP_NOFS); |
3384 | BUG_ON(!em || IS_ERR(em)); | 3386 | BUG_ON(!em || IS_ERR(em)); |
3385 | 3387 | ||
3386 | em->start = extent_key->objectid - offset; | 3388 | em->start = start; |
3387 | em->len = extent_key->offset; | 3389 | em->len = extent_key->offset; |
3388 | em->block_len = extent_key->offset; | 3390 | em->block_len = extent_key->offset; |
3389 | em->block_start = extent_key->objectid; | 3391 | em->block_start = extent_key->objectid; |
@@ -3391,7 +3393,7 @@ static int noinline relocate_data_extent(struct inode *reloc_inode, | |||
3391 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 3393 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
3392 | 3394 | ||
3393 | /* setup extent map to cheat btrfs_readpage */ | 3395 | /* setup extent map to cheat btrfs_readpage */ |
3394 | mutex_lock(&BTRFS_I(reloc_inode)->extent_mutex); | 3396 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); |
3395 | while (1) { | 3397 | while (1) { |
3396 | int ret; | 3398 | int ret; |
3397 | spin_lock(&em_tree->lock); | 3399 | spin_lock(&em_tree->lock); |
@@ -3401,13 +3403,11 @@ static int noinline relocate_data_extent(struct inode *reloc_inode, | |||
3401 | free_extent_map(em); | 3403 | free_extent_map(em); |
3402 | break; | 3404 | break; |
3403 | } | 3405 | } |
3404 | btrfs_drop_extent_cache(reloc_inode, em->start, | 3406 | btrfs_drop_extent_cache(reloc_inode, start, end, 0); |
3405 | em->start + em->len - 1, 0); | ||
3406 | } | 3407 | } |
3407 | mutex_unlock(&BTRFS_I(reloc_inode)->extent_mutex); | 3408 | unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); |
3408 | 3409 | ||
3409 | return relocate_inode_pages(reloc_inode, extent_key->objectid - offset, | 3410 | return relocate_inode_pages(reloc_inode, start, extent_key->offset); |
3410 | extent_key->offset); | ||
3411 | } | 3411 | } |
3412 | 3412 | ||
3413 | struct btrfs_ref_path { | 3413 | struct btrfs_ref_path { |
@@ -3831,7 +3831,6 @@ next: | |||
3831 | * the file extent item was modified by someone | 3831 | * the file extent item was modified by someone |
3832 | * before the extent got locked. | 3832 | * before the extent got locked. |
3833 | */ | 3833 | */ |
3834 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
3835 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, | 3834 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, |
3836 | lock_end, GFP_NOFS); | 3835 | lock_end, GFP_NOFS); |
3837 | extent_locked = 0; | 3836 | extent_locked = 0; |
@@ -3896,8 +3895,12 @@ next: | |||
3896 | lock_start = key.offset; | 3895 | lock_start = key.offset; |
3897 | lock_end = lock_start + num_bytes - 1; | 3896 | lock_end = lock_start + num_bytes - 1; |
3898 | } else { | 3897 | } else { |
3899 | BUG_ON(lock_start != key.offset); | 3898 | if (lock_start > key.offset || |
3900 | BUG_ON(lock_end - lock_start + 1 < num_bytes); | 3899 | lock_end + 1 < key.offset + num_bytes) { |
3900 | unlock_extent(&BTRFS_I(inode)->io_tree, | ||
3901 | lock_start, lock_end, GFP_NOFS); | ||
3902 | extent_locked = 0; | ||
3903 | } | ||
3901 | } | 3904 | } |
3902 | 3905 | ||
3903 | if (!inode) { | 3906 | if (!inode) { |
@@ -3951,7 +3954,6 @@ next: | |||
3951 | if (ordered) | 3954 | if (ordered) |
3952 | btrfs_put_ordered_extent(ordered); | 3955 | btrfs_put_ordered_extent(ordered); |
3953 | 3956 | ||
3954 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
3955 | extent_locked = 1; | 3957 | extent_locked = 1; |
3956 | continue; | 3958 | continue; |
3957 | } | 3959 | } |
@@ -4073,7 +4075,6 @@ next: | |||
4073 | } | 4075 | } |
4074 | 4076 | ||
4075 | if (extent_locked) { | 4077 | if (extent_locked) { |
4076 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
4077 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, | 4078 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, |
4078 | lock_end, GFP_NOFS); | 4079 | lock_end, GFP_NOFS); |
4079 | extent_locked = 0; | 4080 | extent_locked = 0; |
@@ -4091,7 +4092,6 @@ out: | |||
4091 | if (inode) { | 4092 | if (inode) { |
4092 | mutex_unlock(&inode->i_mutex); | 4093 | mutex_unlock(&inode->i_mutex); |
4093 | if (extent_locked) { | 4094 | if (extent_locked) { |
4094 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
4095 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, | 4095 | unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, |
4096 | lock_end, GFP_NOFS); | 4096 | lock_end, GFP_NOFS); |
4097 | } | 4097 | } |
@@ -4180,10 +4180,8 @@ static int noinline invalidate_extent_cache(struct btrfs_root *root, | |||
4180 | 4180 | ||
4181 | lock_extent(&BTRFS_I(inode)->io_tree, key.offset, | 4181 | lock_extent(&BTRFS_I(inode)->io_tree, key.offset, |
4182 | key.offset + num_bytes - 1, GFP_NOFS); | 4182 | key.offset + num_bytes - 1, GFP_NOFS); |
4183 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
4184 | btrfs_drop_extent_cache(inode, key.offset, | 4183 | btrfs_drop_extent_cache(inode, key.offset, |
4185 | key.offset + num_bytes - 1, 1); | 4184 | key.offset + num_bytes - 1, 1); |
4186 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
4187 | unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, | 4185 | unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, |
4188 | key.offset + num_bytes - 1, GFP_NOFS); | 4186 | key.offset + num_bytes - 1, GFP_NOFS); |
4189 | cond_resched(); | 4187 | cond_resched(); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7503bd46819b..65a0583027e9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -946,8 +946,12 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | |||
946 | 946 | ||
947 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, | 947 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, |
948 | &failed_start, mask); | 948 | &failed_start, mask); |
949 | if (err == -EEXIST) | 949 | if (err == -EEXIST) { |
950 | if (failed_start > start) | ||
951 | clear_extent_bit(tree, start, failed_start - 1, | ||
952 | EXTENT_LOCKED, 1, 0, mask); | ||
950 | return 0; | 953 | return 0; |
954 | } | ||
951 | return 1; | 955 | return 1; |
952 | } | 956 | } |
953 | EXPORT_SYMBOL(try_lock_extent); | 957 | EXPORT_SYMBOL(try_lock_extent); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b8a7637e14a1..1a0510ad030c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -364,6 +364,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
364 | u64 start, u64 end, u64 inline_limit, u64 *hint_byte) | 364 | u64 start, u64 end, u64 inline_limit, u64 *hint_byte) |
365 | { | 365 | { |
366 | u64 extent_end = 0; | 366 | u64 extent_end = 0; |
367 | u64 locked_end = end; | ||
367 | u64 search_start = start; | 368 | u64 search_start = start; |
368 | u64 leaf_start; | 369 | u64 leaf_start; |
369 | u64 ram_bytes = 0; | 370 | u64 ram_bytes = 0; |
@@ -479,12 +480,6 @@ next_slot: | |||
479 | goto next_slot; | 480 | goto next_slot; |
480 | } | 481 | } |
481 | 482 | ||
482 | if (found_inline) { | ||
483 | u64 mask = root->sectorsize - 1; | ||
484 | search_start = (extent_end + mask) & ~mask; | ||
485 | } else | ||
486 | search_start = extent_end; | ||
487 | |||
488 | if (end <= extent_end && start >= key.offset && found_inline) | 483 | if (end <= extent_end && start >= key.offset && found_inline) |
489 | *hint_byte = EXTENT_MAP_INLINE; | 484 | *hint_byte = EXTENT_MAP_INLINE; |
490 | 485 | ||
@@ -501,6 +496,26 @@ next_slot: | |||
501 | if (found_inline && start <= key.offset) | 496 | if (found_inline && start <= key.offset) |
502 | keep = 1; | 497 | keep = 1; |
503 | } | 498 | } |
499 | |||
500 | if (bookend && found_extent && locked_end < extent_end) { | ||
501 | ret = try_lock_extent(&BTRFS_I(inode)->io_tree, | ||
502 | locked_end, extent_end - 1, GFP_NOFS); | ||
503 | if (!ret) { | ||
504 | btrfs_release_path(root, path); | ||
505 | lock_extent(&BTRFS_I(inode)->io_tree, | ||
506 | locked_end, extent_end - 1, GFP_NOFS); | ||
507 | locked_end = extent_end; | ||
508 | continue; | ||
509 | } | ||
510 | locked_end = extent_end; | ||
511 | } | ||
512 | |||
513 | if (found_inline) { | ||
514 | u64 mask = root->sectorsize - 1; | ||
515 | search_start = (extent_end + mask) & ~mask; | ||
516 | } else | ||
517 | search_start = extent_end; | ||
518 | |||
504 | /* truncate existing extent */ | 519 | /* truncate existing extent */ |
505 | if (start > key.offset) { | 520 | if (start > key.offset) { |
506 | u64 new_num; | 521 | u64 new_num; |
@@ -638,6 +653,10 @@ next_slot: | |||
638 | } | 653 | } |
639 | out: | 654 | out: |
640 | btrfs_free_path(path); | 655 | btrfs_free_path(path); |
656 | if (locked_end > end) { | ||
657 | unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, | ||
658 | GFP_NOFS); | ||
659 | } | ||
641 | btrfs_check_file(root, inode); | 660 | btrfs_check_file(root, inode); |
642 | return ret; | 661 | return ret; |
643 | } | 662 | } |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8254d6fa6910..e8511d14b119 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -246,7 +246,6 @@ static int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
246 | return 1; | 246 | return 1; |
247 | } | 247 | } |
248 | 248 | ||
249 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
250 | ret = btrfs_drop_extents(trans, root, inode, start, | 249 | ret = btrfs_drop_extents(trans, root, inode, start, |
251 | aligned_end, aligned_end, &hint_byte); | 250 | aligned_end, aligned_end, &hint_byte); |
252 | BUG_ON(ret); | 251 | BUG_ON(ret); |
@@ -258,7 +257,6 @@ static int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
258 | compressed_pages); | 257 | compressed_pages); |
259 | BUG_ON(ret); | 258 | BUG_ON(ret); |
260 | btrfs_drop_extent_cache(inode, start, aligned_end, 0); | 259 | btrfs_drop_extent_cache(inode, start, aligned_end, 0); |
261 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
262 | return 0; | 260 | return 0; |
263 | } | 261 | } |
264 | 262 | ||
@@ -437,9 +435,7 @@ again: | |||
437 | BUG_ON(disk_num_bytes > | 435 | BUG_ON(disk_num_bytes > |
438 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 436 | btrfs_super_total_bytes(&root->fs_info->super_copy)); |
439 | 437 | ||
440 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
441 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 438 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
442 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
443 | 439 | ||
444 | while(disk_num_bytes > 0) { | 440 | while(disk_num_bytes > 0) { |
445 | unsigned long min_bytes; | 441 | unsigned long min_bytes; |
@@ -477,8 +473,6 @@ again: | |||
477 | em->block_start = ins.objectid; | 473 | em->block_start = ins.objectid; |
478 | em->block_len = ins.offset; | 474 | em->block_len = ins.offset; |
479 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 475 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
480 | |||
481 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
482 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 476 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
483 | 477 | ||
484 | if (will_compress) | 478 | if (will_compress) |
@@ -495,7 +489,6 @@ again: | |||
495 | btrfs_drop_extent_cache(inode, start, | 489 | btrfs_drop_extent_cache(inode, start, |
496 | start + ram_size - 1, 0); | 490 | start + ram_size - 1, 0); |
497 | } | 491 | } |
498 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
499 | 492 | ||
500 | cur_alloc_size = ins.offset; | 493 | cur_alloc_size = ins.offset; |
501 | ret = btrfs_add_ordered_extent(inode, start, ins.objectid, | 494 | ret = btrfs_add_ordered_extent(inode, start, ins.objectid, |
@@ -1016,8 +1009,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1016 | 1009 | ||
1017 | INIT_LIST_HEAD(&list); | 1010 | INIT_LIST_HEAD(&list); |
1018 | 1011 | ||
1019 | mutex_lock(&BTRFS_I(inode)->extent_mutex); | ||
1020 | |||
1021 | ret = btrfs_drop_extents(trans, root, inode, | 1012 | ret = btrfs_drop_extents(trans, root, inode, |
1022 | ordered_extent->file_offset, | 1013 | ordered_extent->file_offset, |
1023 | ordered_extent->file_offset + | 1014 | ordered_extent->file_offset + |
@@ -1059,7 +1050,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1059 | btrfs_drop_extent_cache(inode, ordered_extent->file_offset, | 1050 | btrfs_drop_extent_cache(inode, ordered_extent->file_offset, |
1060 | ordered_extent->file_offset + | 1051 | ordered_extent->file_offset + |
1061 | ordered_extent->len - 1, 0); | 1052 | ordered_extent->len - 1, 0); |
1062 | mutex_unlock(&BTRFS_I(inode)->extent_mutex); | ||
1063 | 1053 | ||
1064 | ins.objectid = ordered_extent->start; | 1054 | ins.objectid = ordered_extent->start; |
1065 | ins.offset = ordered_extent->disk_len; | 1055 | ins.offset = ordered_extent->disk_len; |