diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/ctree.c | 76 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 7 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 12 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 21 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 5 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 49 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 3 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 24 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 16 |
9 files changed, 161 insertions, 52 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d92462fe66c8..5a6c39b44c84 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -968,6 +968,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
| 968 | return 0; | 968 | return 0; |
| 969 | } | 969 | } |
| 970 | 970 | ||
| 971 | static struct extent_buffer *alloc_tree_block_no_bg_flush( | ||
| 972 | struct btrfs_trans_handle *trans, | ||
| 973 | struct btrfs_root *root, | ||
| 974 | u64 parent_start, | ||
| 975 | const struct btrfs_disk_key *disk_key, | ||
| 976 | int level, | ||
| 977 | u64 hint, | ||
| 978 | u64 empty_size) | ||
| 979 | { | ||
| 980 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 981 | struct extent_buffer *ret; | ||
| 982 | |||
| 983 | /* | ||
| 984 | * If we are COWing a node/leaf from the extent, chunk, device or free | ||
| 985 | * space trees, make sure that we do not finish block group creation of | ||
| 986 | * pending block groups. We do this to avoid a deadlock. | ||
| 987 | * COWing can result in allocation of a new chunk, and flushing pending | ||
| 988 | * block groups (btrfs_create_pending_block_groups()) can be triggered | ||
| 989 | * when finishing allocation of a new chunk. Creation of a pending block | ||
| 990 | * group modifies the extent, chunk, device and free space trees, | ||
| 991 | * therefore we could deadlock with ourselves since we are holding a | ||
| 992 | * lock on an extent buffer that btrfs_create_pending_block_groups() may | ||
| 993 | * try to COW later. | ||
| 994 | * For similar reasons, we also need to delay flushing pending block | ||
| 995 | * groups when splitting a leaf or node, from one of those trees, since | ||
| 996 | * we are holding a write lock on it and its parent or when inserting a | ||
| 997 | * new root node for one of those trees. | ||
| 998 | */ | ||
| 999 | if (root == fs_info->extent_root || | ||
| 1000 | root == fs_info->chunk_root || | ||
| 1001 | root == fs_info->dev_root || | ||
| 1002 | root == fs_info->free_space_root) | ||
| 1003 | trans->can_flush_pending_bgs = false; | ||
| 1004 | |||
| 1005 | ret = btrfs_alloc_tree_block(trans, root, parent_start, | ||
| 1006 | root->root_key.objectid, disk_key, level, | ||
| 1007 | hint, empty_size); | ||
| 1008 | trans->can_flush_pending_bgs = true; | ||
| 1009 | |||
| 1010 | return ret; | ||
| 1011 | } | ||
| 1012 | |||
| 971 | /* | 1013 | /* |
| 972 | * does the dirty work in cow of a single block. The parent block (if | 1014 | * does the dirty work in cow of a single block. The parent block (if |
| 973 | * supplied) is updated to point to the new cow copy. The new buffer is marked | 1015 | * supplied) is updated to point to the new cow copy. The new buffer is marked |
| @@ -1015,26 +1057,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 1015 | if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) | 1057 | if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) |
| 1016 | parent_start = parent->start; | 1058 | parent_start = parent->start; |
| 1017 | 1059 | ||
| 1018 | /* | 1060 | cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key, |
| 1019 | * If we are COWing a node/leaf from the extent, chunk or device trees, | 1061 | level, search_start, empty_size); |
| 1020 | * make sure that we do not finish block group creation of pending block | ||
| 1021 | * groups. We do this to avoid a deadlock. | ||
| 1022 | * COWing can result in allocation of a new chunk, and flushing pending | ||
| 1023 | * block groups (btrfs_create_pending_block_groups()) can be triggered | ||
| 1024 | * when finishing allocation of a new chunk. Creation of a pending block | ||
| 1025 | * group modifies the extent, chunk and device trees, therefore we could | ||
| 1026 | * deadlock with ourselves since we are holding a lock on an extent | ||
| 1027 | * buffer that btrfs_create_pending_block_groups() may try to COW later. | ||
| 1028 | */ | ||
| 1029 | if (root == fs_info->extent_root || | ||
| 1030 | root == fs_info->chunk_root || | ||
| 1031 | root == fs_info->dev_root) | ||
| 1032 | trans->can_flush_pending_bgs = false; | ||
| 1033 | |||
| 1034 | cow = btrfs_alloc_tree_block(trans, root, parent_start, | ||
| 1035 | root->root_key.objectid, &disk_key, level, | ||
| 1036 | search_start, empty_size); | ||
| 1037 | trans->can_flush_pending_bgs = true; | ||
| 1038 | if (IS_ERR(cow)) | 1062 | if (IS_ERR(cow)) |
| 1039 | return PTR_ERR(cow); | 1063 | return PTR_ERR(cow); |
| 1040 | 1064 | ||
| @@ -3343,8 +3367,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
| 3343 | else | 3367 | else |
| 3344 | btrfs_node_key(lower, &lower_key, 0); | 3368 | btrfs_node_key(lower, &lower_key, 0); |
| 3345 | 3369 | ||
| 3346 | c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, | 3370 | c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level, |
| 3347 | &lower_key, level, root->node->start, 0); | 3371 | root->node->start, 0); |
| 3348 | if (IS_ERR(c)) | 3372 | if (IS_ERR(c)) |
| 3349 | return PTR_ERR(c); | 3373 | return PTR_ERR(c); |
| 3350 | 3374 | ||
| @@ -3473,8 +3497,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
| 3473 | mid = (c_nritems + 1) / 2; | 3497 | mid = (c_nritems + 1) / 2; |
| 3474 | btrfs_node_key(c, &disk_key, mid); | 3498 | btrfs_node_key(c, &disk_key, mid); |
| 3475 | 3499 | ||
| 3476 | split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, | 3500 | split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level, |
| 3477 | &disk_key, level, c->start, 0); | 3501 | c->start, 0); |
| 3478 | if (IS_ERR(split)) | 3502 | if (IS_ERR(split)) |
| 3479 | return PTR_ERR(split); | 3503 | return PTR_ERR(split); |
| 3480 | 3504 | ||
| @@ -4258,8 +4282,8 @@ again: | |||
| 4258 | else | 4282 | else |
| 4259 | btrfs_item_key(l, &disk_key, mid); | 4283 | btrfs_item_key(l, &disk_key, mid); |
| 4260 | 4284 | ||
| 4261 | right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, | 4285 | right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0, |
| 4262 | &disk_key, 0, l->start, 0); | 4286 | l->start, 0); |
| 4263 | if (IS_ERR(right)) | 4287 | if (IS_ERR(right)) |
| 4264 | return PTR_ERR(right); | 4288 | return PTR_ERR(right); |
| 4265 | 4289 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0a68cf7032f5..7a2a2621f0d9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | struct btrfs_trans_handle; | 35 | struct btrfs_trans_handle; |
| 36 | struct btrfs_transaction; | 36 | struct btrfs_transaction; |
| 37 | struct btrfs_pending_snapshot; | 37 | struct btrfs_pending_snapshot; |
| 38 | struct btrfs_delayed_ref_root; | ||
| 38 | extern struct kmem_cache *btrfs_trans_handle_cachep; | 39 | extern struct kmem_cache *btrfs_trans_handle_cachep; |
| 39 | extern struct kmem_cache *btrfs_bit_radix_cachep; | 40 | extern struct kmem_cache *btrfs_bit_radix_cachep; |
| 40 | extern struct kmem_cache *btrfs_path_cachep; | 41 | extern struct kmem_cache *btrfs_path_cachep; |
| @@ -786,6 +787,9 @@ enum { | |||
| 786 | * main phase. The fs_info::balance_ctl is initialized. | 787 | * main phase. The fs_info::balance_ctl is initialized. |
| 787 | */ | 788 | */ |
| 788 | BTRFS_FS_BALANCE_RUNNING, | 789 | BTRFS_FS_BALANCE_RUNNING, |
| 790 | |||
| 791 | /* Indicate that the cleaner thread is awake and doing something. */ | ||
| 792 | BTRFS_FS_CLEANER_RUNNING, | ||
| 789 | }; | 793 | }; |
| 790 | 794 | ||
| 791 | struct btrfs_fs_info { | 795 | struct btrfs_fs_info { |
| @@ -2661,6 +2665,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
| 2661 | unsigned long count); | 2665 | unsigned long count); |
| 2662 | int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info, | 2666 | int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info, |
| 2663 | unsigned long count, u64 transid, int wait); | 2667 | unsigned long count, u64 transid, int wait); |
| 2668 | void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, | ||
| 2669 | struct btrfs_delayed_ref_root *delayed_refs, | ||
| 2670 | struct btrfs_delayed_ref_head *head); | ||
| 2664 | int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); | 2671 | int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); |
| 2665 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | 2672 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, |
| 2666 | struct btrfs_fs_info *fs_info, u64 bytenr, | 2673 | struct btrfs_fs_info *fs_info, u64 bytenr, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8da2f380d3c0..6a2a2a951705 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -1682,6 +1682,8 @@ static int cleaner_kthread(void *arg) | |||
| 1682 | while (1) { | 1682 | while (1) { |
| 1683 | again = 0; | 1683 | again = 0; |
| 1684 | 1684 | ||
| 1685 | set_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); | ||
| 1686 | |||
| 1685 | /* Make the cleaner go to sleep early. */ | 1687 | /* Make the cleaner go to sleep early. */ |
| 1686 | if (btrfs_need_cleaner_sleep(fs_info)) | 1688 | if (btrfs_need_cleaner_sleep(fs_info)) |
| 1687 | goto sleep; | 1689 | goto sleep; |
| @@ -1728,6 +1730,7 @@ static int cleaner_kthread(void *arg) | |||
| 1728 | */ | 1730 | */ |
| 1729 | btrfs_delete_unused_bgs(fs_info); | 1731 | btrfs_delete_unused_bgs(fs_info); |
| 1730 | sleep: | 1732 | sleep: |
| 1733 | clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); | ||
| 1731 | if (kthread_should_park()) | 1734 | if (kthread_should_park()) |
| 1732 | kthread_parkme(); | 1735 | kthread_parkme(); |
| 1733 | if (kthread_should_stop()) | 1736 | if (kthread_should_stop()) |
| @@ -4201,6 +4204,14 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) | |||
| 4201 | spin_lock(&fs_info->ordered_root_lock); | 4204 | spin_lock(&fs_info->ordered_root_lock); |
| 4202 | } | 4205 | } |
| 4203 | spin_unlock(&fs_info->ordered_root_lock); | 4206 | spin_unlock(&fs_info->ordered_root_lock); |
| 4207 | |||
| 4208 | /* | ||
| 4209 | * We need this here because if we've been flipped read-only we won't | ||
| 4210 | * get sync() from the umount, so we need to make sure any ordered | ||
| 4211 | * extents that haven't had their dirty pages IO start writeout yet | ||
| 4212 | * actually get run and error out properly. | ||
| 4213 | */ | ||
| 4214 | btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); | ||
| 4204 | } | 4215 | } |
| 4205 | 4216 | ||
| 4206 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | 4217 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, |
| @@ -4265,6 +4276,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
| 4265 | if (pin_bytes) | 4276 | if (pin_bytes) |
| 4266 | btrfs_pin_extent(fs_info, head->bytenr, | 4277 | btrfs_pin_extent(fs_info, head->bytenr, |
| 4267 | head->num_bytes, 1); | 4278 | head->num_bytes, 1); |
| 4279 | btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head); | ||
| 4268 | btrfs_put_delayed_ref_head(head); | 4280 | btrfs_put_delayed_ref_head(head); |
| 4269 | cond_resched(); | 4281 | cond_resched(); |
| 4270 | spin_lock(&delayed_refs->lock); | 4282 | spin_lock(&delayed_refs->lock); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b15afeae16df..d81035b7ea7d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -2456,12 +2456,10 @@ static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans, | |||
| 2456 | return ret ? ret : 1; | 2456 | return ret ? ret : 1; |
| 2457 | } | 2457 | } |
| 2458 | 2458 | ||
| 2459 | static void cleanup_ref_head_accounting(struct btrfs_trans_handle *trans, | 2459 | void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, |
| 2460 | struct btrfs_delayed_ref_head *head) | 2460 | struct btrfs_delayed_ref_root *delayed_refs, |
| 2461 | struct btrfs_delayed_ref_head *head) | ||
| 2461 | { | 2462 | { |
| 2462 | struct btrfs_fs_info *fs_info = trans->fs_info; | ||
| 2463 | struct btrfs_delayed_ref_root *delayed_refs = | ||
| 2464 | &trans->transaction->delayed_refs; | ||
| 2465 | int nr_items = 1; /* Dropping this ref head update. */ | 2463 | int nr_items = 1; /* Dropping this ref head update. */ |
| 2466 | 2464 | ||
| 2467 | if (head->total_ref_mod < 0) { | 2465 | if (head->total_ref_mod < 0) { |
| @@ -2544,7 +2542,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, | |||
| 2544 | } | 2542 | } |
| 2545 | } | 2543 | } |
| 2546 | 2544 | ||
| 2547 | cleanup_ref_head_accounting(trans, head); | 2545 | btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head); |
| 2548 | 2546 | ||
| 2549 | trace_run_delayed_ref_head(fs_info, head, 0); | 2547 | trace_run_delayed_ref_head(fs_info, head, 0); |
| 2550 | btrfs_delayed_ref_unlock(head); | 2548 | btrfs_delayed_ref_unlock(head); |
| @@ -4954,6 +4952,15 @@ static void flush_space(struct btrfs_fs_info *fs_info, | |||
| 4954 | ret = 0; | 4952 | ret = 0; |
| 4955 | break; | 4953 | break; |
| 4956 | case COMMIT_TRANS: | 4954 | case COMMIT_TRANS: |
| 4955 | /* | ||
| 4956 | * If we have pending delayed iputs then we could free up a | ||
| 4957 | * bunch of pinned space, so make sure we run the iputs before | ||
| 4958 | * we do our pinned bytes check below. | ||
| 4959 | */ | ||
| 4960 | mutex_lock(&fs_info->cleaner_delayed_iput_mutex); | ||
| 4961 | btrfs_run_delayed_iputs(fs_info); | ||
| 4962 | mutex_unlock(&fs_info->cleaner_delayed_iput_mutex); | ||
| 4963 | |||
| 4957 | ret = may_commit_transaction(fs_info, space_info); | 4964 | ret = may_commit_transaction(fs_info, space_info); |
| 4958 | break; | 4965 | break; |
| 4959 | default: | 4966 | default: |
| @@ -7188,7 +7195,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
| 7188 | if (head->must_insert_reserved) | 7195 | if (head->must_insert_reserved) |
| 7189 | ret = 1; | 7196 | ret = 1; |
| 7190 | 7197 | ||
| 7191 | cleanup_ref_head_accounting(trans, head); | 7198 | btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head); |
| 7192 | mutex_unlock(&head->mutex); | 7199 | mutex_unlock(&head->mutex); |
| 7193 | btrfs_put_delayed_ref_head(head); | 7200 | btrfs_put_delayed_ref_head(head); |
| 7194 | return ret; | 7201 | return ret; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 43eb4535319d..5c349667c761 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -3129,9 +3129,6 @@ out: | |||
| 3129 | /* once for the tree */ | 3129 | /* once for the tree */ |
| 3130 | btrfs_put_ordered_extent(ordered_extent); | 3130 | btrfs_put_ordered_extent(ordered_extent); |
| 3131 | 3131 | ||
| 3132 | /* Try to release some metadata so we don't get an OOM but don't wait */ | ||
| 3133 | btrfs_btree_balance_dirty_nodelay(fs_info); | ||
| 3134 | |||
| 3135 | return ret; | 3132 | return ret; |
| 3136 | } | 3133 | } |
| 3137 | 3134 | ||
| @@ -3254,6 +3251,8 @@ void btrfs_add_delayed_iput(struct inode *inode) | |||
| 3254 | ASSERT(list_empty(&binode->delayed_iput)); | 3251 | ASSERT(list_empty(&binode->delayed_iput)); |
| 3255 | list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs); | 3252 | list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs); |
| 3256 | spin_unlock(&fs_info->delayed_iput_lock); | 3253 | spin_unlock(&fs_info->delayed_iput_lock); |
| 3254 | if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags)) | ||
| 3255 | wake_up_process(fs_info->cleaner_kthread); | ||
| 3257 | } | 3256 | } |
| 3258 | 3257 | ||
| 3259 | void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) | 3258 | void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fab9443f6a42..9c8e1734429c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -3221,6 +3221,26 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) | |||
| 3221 | inode_lock_nested(inode2, I_MUTEX_CHILD); | 3221 | inode_lock_nested(inode2, I_MUTEX_CHILD); |
| 3222 | } | 3222 | } |
| 3223 | 3223 | ||
| 3224 | static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, | ||
| 3225 | struct inode *inode2, u64 loff2, u64 len) | ||
| 3226 | { | ||
| 3227 | unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); | ||
| 3228 | unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); | ||
| 3229 | } | ||
| 3230 | |||
| 3231 | static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, | ||
| 3232 | struct inode *inode2, u64 loff2, u64 len) | ||
| 3233 | { | ||
| 3234 | if (inode1 < inode2) { | ||
| 3235 | swap(inode1, inode2); | ||
| 3236 | swap(loff1, loff2); | ||
| 3237 | } else if (inode1 == inode2 && loff2 < loff1) { | ||
| 3238 | swap(loff1, loff2); | ||
| 3239 | } | ||
| 3240 | lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); | ||
| 3241 | lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); | ||
| 3242 | } | ||
| 3243 | |||
| 3224 | static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, | 3244 | static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, |
| 3225 | struct inode *dst, u64 dst_loff) | 3245 | struct inode *dst, u64 dst_loff) |
| 3226 | { | 3246 | { |
| @@ -3242,11 +3262,12 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, | |||
| 3242 | return -EINVAL; | 3262 | return -EINVAL; |
| 3243 | 3263 | ||
| 3244 | /* | 3264 | /* |
| 3245 | * Lock destination range to serialize with concurrent readpages(). | 3265 | * Lock destination range to serialize with concurrent readpages() and |
| 3266 | * source range to serialize with relocation. | ||
| 3246 | */ | 3267 | */ |
| 3247 | lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); | 3268 | btrfs_double_extent_lock(src, loff, dst, dst_loff, len); |
| 3248 | ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); | 3269 | ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); |
| 3249 | unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); | 3270 | btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); |
| 3250 | 3271 | ||
| 3251 | return ret; | 3272 | return ret; |
| 3252 | } | 3273 | } |
| @@ -3905,17 +3926,33 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, | |||
| 3905 | len = ALIGN(src->i_size, bs) - off; | 3926 | len = ALIGN(src->i_size, bs) - off; |
| 3906 | 3927 | ||
| 3907 | if (destoff > inode->i_size) { | 3928 | if (destoff > inode->i_size) { |
| 3929 | const u64 wb_start = ALIGN_DOWN(inode->i_size, bs); | ||
| 3930 | |||
| 3908 | ret = btrfs_cont_expand(inode, inode->i_size, destoff); | 3931 | ret = btrfs_cont_expand(inode, inode->i_size, destoff); |
| 3909 | if (ret) | 3932 | if (ret) |
| 3910 | return ret; | 3933 | return ret; |
| 3934 | /* | ||
| 3935 | * We may have truncated the last block if the inode's size is | ||
| 3936 | * not sector size aligned, so we need to wait for writeback to | ||
| 3937 | * complete before proceeding further, otherwise we can race | ||
| 3938 | * with cloning and attempt to increment a reference to an | ||
| 3939 | * extent that no longer exists (writeback completed right after | ||
| 3940 | * we found the previous extent covering eof and before we | ||
| 3941 | * attempted to increment its reference count). | ||
| 3942 | */ | ||
| 3943 | ret = btrfs_wait_ordered_range(inode, wb_start, | ||
| 3944 | destoff - wb_start); | ||
| 3945 | if (ret) | ||
| 3946 | return ret; | ||
| 3911 | } | 3947 | } |
| 3912 | 3948 | ||
| 3913 | /* | 3949 | /* |
| 3914 | * Lock destination range to serialize with concurrent readpages(). | 3950 | * Lock destination range to serialize with concurrent readpages() and |
| 3951 | * source range to serialize with relocation. | ||
| 3915 | */ | 3952 | */ |
| 3916 | lock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); | 3953 | btrfs_double_extent_lock(src, off, inode, destoff, len); |
| 3917 | ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); | 3954 | ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); |
| 3918 | unlock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); | 3955 | btrfs_double_extent_unlock(src, off, inode, destoff, len); |
| 3919 | /* | 3956 | /* |
| 3920 | * Truncate page cache pages so that future reads will see the cloned | 3957 | * Truncate page cache pages so that future reads will see the cloned |
| 3921 | * data immediately and not the previous data. | 3958 | * data immediately and not the previous data. |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c5586ffd1426..0a3f122dd61f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -1621,6 +1621,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
| 1621 | flags | SB_RDONLY, device_name, data); | 1621 | flags | SB_RDONLY, device_name, data); |
| 1622 | if (IS_ERR(mnt_root)) { | 1622 | if (IS_ERR(mnt_root)) { |
| 1623 | root = ERR_CAST(mnt_root); | 1623 | root = ERR_CAST(mnt_root); |
| 1624 | kfree(subvol_name); | ||
| 1624 | goto out; | 1625 | goto out; |
| 1625 | } | 1626 | } |
| 1626 | 1627 | ||
| @@ -1630,12 +1631,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
| 1630 | if (error < 0) { | 1631 | if (error < 0) { |
| 1631 | root = ERR_PTR(error); | 1632 | root = ERR_PTR(error); |
| 1632 | mntput(mnt_root); | 1633 | mntput(mnt_root); |
| 1634 | kfree(subvol_name); | ||
| 1633 | goto out; | 1635 | goto out; |
| 1634 | } | 1636 | } |
| 1635 | } | 1637 | } |
| 1636 | } | 1638 | } |
| 1637 | if (IS_ERR(mnt_root)) { | 1639 | if (IS_ERR(mnt_root)) { |
| 1638 | root = ERR_CAST(mnt_root); | 1640 | root = ERR_CAST(mnt_root); |
| 1641 | kfree(subvol_name); | ||
| 1639 | goto out; | 1642 | goto out; |
| 1640 | } | 1643 | } |
| 1641 | 1644 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 127fa1535f58..4ec2b660d014 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -850,14 +850,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 850 | 850 | ||
| 851 | btrfs_trans_release_chunk_metadata(trans); | 851 | btrfs_trans_release_chunk_metadata(trans); |
| 852 | 852 | ||
| 853 | if (lock && should_end_transaction(trans) && | ||
| 854 | READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { | ||
| 855 | spin_lock(&info->trans_lock); | ||
| 856 | if (cur_trans->state == TRANS_STATE_RUNNING) | ||
| 857 | cur_trans->state = TRANS_STATE_BLOCKED; | ||
| 858 | spin_unlock(&info->trans_lock); | ||
| 859 | } | ||
| 860 | |||
| 861 | if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { | 853 | if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { |
| 862 | if (throttle) | 854 | if (throttle) |
| 863 | return btrfs_commit_transaction(trans); | 855 | return btrfs_commit_transaction(trans); |
| @@ -1879,6 +1871,21 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err) | |||
| 1879 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1871 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 1880 | } | 1872 | } |
| 1881 | 1873 | ||
| 1874 | /* | ||
| 1875 | * Release reserved delayed ref space of all pending block groups of the | ||
| 1876 | * transaction and remove them from the list | ||
| 1877 | */ | ||
| 1878 | static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans) | ||
| 1879 | { | ||
| 1880 | struct btrfs_fs_info *fs_info = trans->fs_info; | ||
| 1881 | struct btrfs_block_group_cache *block_group, *tmp; | ||
| 1882 | |||
| 1883 | list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { | ||
| 1884 | btrfs_delayed_refs_rsv_release(fs_info, 1); | ||
| 1885 | list_del_init(&block_group->bg_list); | ||
| 1886 | } | ||
| 1887 | } | ||
| 1888 | |||
| 1882 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) | 1889 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) |
| 1883 | { | 1890 | { |
| 1884 | /* | 1891 | /* |
| @@ -2270,6 +2277,7 @@ scrub_continue: | |||
| 2270 | btrfs_scrub_continue(fs_info); | 2277 | btrfs_scrub_continue(fs_info); |
| 2271 | cleanup_transaction: | 2278 | cleanup_transaction: |
| 2272 | btrfs_trans_release_metadata(trans); | 2279 | btrfs_trans_release_metadata(trans); |
| 2280 | btrfs_cleanup_pending_block_groups(trans); | ||
| 2273 | btrfs_trans_release_chunk_metadata(trans); | 2281 | btrfs_trans_release_chunk_metadata(trans); |
| 2274 | trans->block_rsv = NULL; | 2282 | trans->block_rsv = NULL; |
| 2275 | btrfs_warn(fs_info, "Skipping commit of aborted transaction."); | 2283 | btrfs_warn(fs_info, "Skipping commit of aborted transaction."); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2576b1a379c9..15561926ab32 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -957,11 +957,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, | |||
| 957 | else | 957 | else |
| 958 | fs_devices = alloc_fs_devices(disk_super->fsid, NULL); | 958 | fs_devices = alloc_fs_devices(disk_super->fsid, NULL); |
| 959 | 959 | ||
| 960 | fs_devices->fsid_change = fsid_change_in_progress; | ||
| 961 | |||
| 962 | if (IS_ERR(fs_devices)) | 960 | if (IS_ERR(fs_devices)) |
| 963 | return ERR_CAST(fs_devices); | 961 | return ERR_CAST(fs_devices); |
| 964 | 962 | ||
| 963 | fs_devices->fsid_change = fsid_change_in_progress; | ||
| 964 | |||
| 965 | mutex_lock(&fs_devices->device_list_mutex); | 965 | mutex_lock(&fs_devices->device_list_mutex); |
| 966 | list_add(&fs_devices->fs_list, &fs_uuids); | 966 | list_add(&fs_devices->fs_list, &fs_uuids); |
| 967 | 967 | ||
| @@ -7825,6 +7825,18 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, | |||
| 7825 | ret = -EUCLEAN; | 7825 | ret = -EUCLEAN; |
| 7826 | goto out; | 7826 | goto out; |
| 7827 | } | 7827 | } |
| 7828 | |||
| 7829 | /* It's possible this device is a dummy for seed device */ | ||
| 7830 | if (dev->disk_total_bytes == 0) { | ||
| 7831 | dev = find_device(fs_info->fs_devices->seed, devid, NULL); | ||
| 7832 | if (!dev) { | ||
| 7833 | btrfs_err(fs_info, "failed to find seed devid %llu", | ||
| 7834 | devid); | ||
| 7835 | ret = -EUCLEAN; | ||
| 7836 | goto out; | ||
| 7837 | } | ||
| 7838 | } | ||
| 7839 | |||
| 7828 | if (physical_offset + physical_len > dev->disk_total_bytes) { | 7840 | if (physical_offset + physical_len > dev->disk_total_bytes) { |
| 7829 | btrfs_err(fs_info, | 7841 | btrfs_err(fs_info, |
| 7830 | "dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu", | 7842 | "dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu", |
