aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.c76
-rw-r--r--fs/btrfs/ctree.h7
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/btrfs/extent-tree.c21
-rw-r--r--fs/btrfs/inode.c5
-rw-r--r--fs/btrfs/ioctl.c49
-rw-r--r--fs/btrfs/super.c3
-rw-r--r--fs/btrfs/transaction.c24
-rw-r--r--fs/btrfs/volumes.c16
9 files changed, 161 insertions, 52 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d92462fe66c8..5a6c39b44c84 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -968,6 +968,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
968 return 0; 968 return 0;
969} 969}
970 970
971static struct extent_buffer *alloc_tree_block_no_bg_flush(
972 struct btrfs_trans_handle *trans,
973 struct btrfs_root *root,
974 u64 parent_start,
975 const struct btrfs_disk_key *disk_key,
976 int level,
977 u64 hint,
978 u64 empty_size)
979{
980 struct btrfs_fs_info *fs_info = root->fs_info;
981 struct extent_buffer *ret;
982
983 /*
984 * If we are COWing a node/leaf from the extent, chunk, device or free
985 * space trees, make sure that we do not finish block group creation of
986 * pending block groups. We do this to avoid a deadlock.
987 * COWing can result in allocation of a new chunk, and flushing pending
988 * block groups (btrfs_create_pending_block_groups()) can be triggered
989 * when finishing allocation of a new chunk. Creation of a pending block
990 * group modifies the extent, chunk, device and free space trees,
991 * therefore we could deadlock with ourselves since we are holding a
992 * lock on an extent buffer that btrfs_create_pending_block_groups() may
993 * try to COW later.
994 * For similar reasons, we also need to delay flushing pending block
995 * groups when splitting a leaf or node, from one of those trees, since
996 * we are holding a write lock on it and its parent or when inserting a
997 * new root node for one of those trees.
998 */
999 if (root == fs_info->extent_root ||
1000 root == fs_info->chunk_root ||
1001 root == fs_info->dev_root ||
1002 root == fs_info->free_space_root)
1003 trans->can_flush_pending_bgs = false;
1004
1005 ret = btrfs_alloc_tree_block(trans, root, parent_start,
1006 root->root_key.objectid, disk_key, level,
1007 hint, empty_size);
1008 trans->can_flush_pending_bgs = true;
1009
1010 return ret;
1011}
1012
971/* 1013/*
972 * does the dirty work in cow of a single block. The parent block (if 1014 * does the dirty work in cow of a single block. The parent block (if
973 * supplied) is updated to point to the new cow copy. The new buffer is marked 1015 * supplied) is updated to point to the new cow copy. The new buffer is marked
@@ -1015,26 +1057,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1015 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) 1057 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
1016 parent_start = parent->start; 1058 parent_start = parent->start;
1017 1059
1018 /* 1060 cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
1019 * If we are COWing a node/leaf from the extent, chunk or device trees, 1061 level, search_start, empty_size);
1020 * make sure that we do not finish block group creation of pending block
1021 * groups. We do this to avoid a deadlock.
1022 * COWing can result in allocation of a new chunk, and flushing pending
1023 * block groups (btrfs_create_pending_block_groups()) can be triggered
1024 * when finishing allocation of a new chunk. Creation of a pending block
1025 * group modifies the extent, chunk and device trees, therefore we could
1026 * deadlock with ourselves since we are holding a lock on an extent
1027 * buffer that btrfs_create_pending_block_groups() may try to COW later.
1028 */
1029 if (root == fs_info->extent_root ||
1030 root == fs_info->chunk_root ||
1031 root == fs_info->dev_root)
1032 trans->can_flush_pending_bgs = false;
1033
1034 cow = btrfs_alloc_tree_block(trans, root, parent_start,
1035 root->root_key.objectid, &disk_key, level,
1036 search_start, empty_size);
1037 trans->can_flush_pending_bgs = true;
1038 if (IS_ERR(cow)) 1062 if (IS_ERR(cow))
1039 return PTR_ERR(cow); 1063 return PTR_ERR(cow);
1040 1064
@@ -3343,8 +3367,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3343 else 3367 else
3344 btrfs_node_key(lower, &lower_key, 0); 3368 btrfs_node_key(lower, &lower_key, 0);
3345 3369
3346 c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 3370 c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
3347 &lower_key, level, root->node->start, 0); 3371 root->node->start, 0);
3348 if (IS_ERR(c)) 3372 if (IS_ERR(c))
3349 return PTR_ERR(c); 3373 return PTR_ERR(c);
3350 3374
@@ -3473,8 +3497,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3473 mid = (c_nritems + 1) / 2; 3497 mid = (c_nritems + 1) / 2;
3474 btrfs_node_key(c, &disk_key, mid); 3498 btrfs_node_key(c, &disk_key, mid);
3475 3499
3476 split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 3500 split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
3477 &disk_key, level, c->start, 0); 3501 c->start, 0);
3478 if (IS_ERR(split)) 3502 if (IS_ERR(split))
3479 return PTR_ERR(split); 3503 return PTR_ERR(split);
3480 3504
@@ -4258,8 +4282,8 @@ again:
4258 else 4282 else
4259 btrfs_item_key(l, &disk_key, mid); 4283 btrfs_item_key(l, &disk_key, mid);
4260 4284
4261 right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 4285 right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
4262 &disk_key, 0, l->start, 0); 4286 l->start, 0);
4263 if (IS_ERR(right)) 4287 if (IS_ERR(right))
4264 return PTR_ERR(right); 4288 return PTR_ERR(right);
4265 4289
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0a68cf7032f5..7a2a2621f0d9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -35,6 +35,7 @@
35struct btrfs_trans_handle; 35struct btrfs_trans_handle;
36struct btrfs_transaction; 36struct btrfs_transaction;
37struct btrfs_pending_snapshot; 37struct btrfs_pending_snapshot;
38struct btrfs_delayed_ref_root;
38extern struct kmem_cache *btrfs_trans_handle_cachep; 39extern struct kmem_cache *btrfs_trans_handle_cachep;
39extern struct kmem_cache *btrfs_bit_radix_cachep; 40extern struct kmem_cache *btrfs_bit_radix_cachep;
40extern struct kmem_cache *btrfs_path_cachep; 41extern struct kmem_cache *btrfs_path_cachep;
@@ -786,6 +787,9 @@ enum {
786 * main phase. The fs_info::balance_ctl is initialized. 787 * main phase. The fs_info::balance_ctl is initialized.
787 */ 788 */
788 BTRFS_FS_BALANCE_RUNNING, 789 BTRFS_FS_BALANCE_RUNNING,
790
791 /* Indicate that the cleaner thread is awake and doing something. */
792 BTRFS_FS_CLEANER_RUNNING,
789}; 793};
790 794
791struct btrfs_fs_info { 795struct btrfs_fs_info {
@@ -2661,6 +2665,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2661 unsigned long count); 2665 unsigned long count);
2662int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info, 2666int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
2663 unsigned long count, u64 transid, int wait); 2667 unsigned long count, u64 transid, int wait);
2668void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
2669 struct btrfs_delayed_ref_root *delayed_refs,
2670 struct btrfs_delayed_ref_head *head);
2664int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); 2671int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
2665int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, 2672int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
2666 struct btrfs_fs_info *fs_info, u64 bytenr, 2673 struct btrfs_fs_info *fs_info, u64 bytenr,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8da2f380d3c0..6a2a2a951705 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1682,6 +1682,8 @@ static int cleaner_kthread(void *arg)
1682 while (1) { 1682 while (1) {
1683 again = 0; 1683 again = 0;
1684 1684
1685 set_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
1686
1685 /* Make the cleaner go to sleep early. */ 1687 /* Make the cleaner go to sleep early. */
1686 if (btrfs_need_cleaner_sleep(fs_info)) 1688 if (btrfs_need_cleaner_sleep(fs_info))
1687 goto sleep; 1689 goto sleep;
@@ -1728,6 +1730,7 @@ static int cleaner_kthread(void *arg)
1728 */ 1730 */
1729 btrfs_delete_unused_bgs(fs_info); 1731 btrfs_delete_unused_bgs(fs_info);
1730sleep: 1732sleep:
1733 clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
1731 if (kthread_should_park()) 1734 if (kthread_should_park())
1732 kthread_parkme(); 1735 kthread_parkme();
1733 if (kthread_should_stop()) 1736 if (kthread_should_stop())
@@ -4201,6 +4204,14 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
4201 spin_lock(&fs_info->ordered_root_lock); 4204 spin_lock(&fs_info->ordered_root_lock);
4202 } 4205 }
4203 spin_unlock(&fs_info->ordered_root_lock); 4206 spin_unlock(&fs_info->ordered_root_lock);
4207
4208 /*
4209 * We need this here because if we've been flipped read-only we won't
4210 * get sync() from the umount, so we need to make sure any ordered
4211 * extents that haven't had their dirty pages IO start writeout yet
4212 * actually get run and error out properly.
4213 */
4214 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
4204} 4215}
4205 4216
4206static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 4217static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
@@ -4265,6 +4276,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
4265 if (pin_bytes) 4276 if (pin_bytes)
4266 btrfs_pin_extent(fs_info, head->bytenr, 4277 btrfs_pin_extent(fs_info, head->bytenr,
4267 head->num_bytes, 1); 4278 head->num_bytes, 1);
4279 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
4268 btrfs_put_delayed_ref_head(head); 4280 btrfs_put_delayed_ref_head(head);
4269 cond_resched(); 4281 cond_resched();
4270 spin_lock(&delayed_refs->lock); 4282 spin_lock(&delayed_refs->lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b15afeae16df..d81035b7ea7d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2456,12 +2456,10 @@ static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
2456 return ret ? ret : 1; 2456 return ret ? ret : 1;
2457} 2457}
2458 2458
2459static void cleanup_ref_head_accounting(struct btrfs_trans_handle *trans, 2459void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
2460 struct btrfs_delayed_ref_head *head) 2460 struct btrfs_delayed_ref_root *delayed_refs,
2461 struct btrfs_delayed_ref_head *head)
2461{ 2462{
2462 struct btrfs_fs_info *fs_info = trans->fs_info;
2463 struct btrfs_delayed_ref_root *delayed_refs =
2464 &trans->transaction->delayed_refs;
2465 int nr_items = 1; /* Dropping this ref head update. */ 2463 int nr_items = 1; /* Dropping this ref head update. */
2466 2464
2467 if (head->total_ref_mod < 0) { 2465 if (head->total_ref_mod < 0) {
@@ -2544,7 +2542,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
2544 } 2542 }
2545 } 2543 }
2546 2544
2547 cleanup_ref_head_accounting(trans, head); 2545 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
2548 2546
2549 trace_run_delayed_ref_head(fs_info, head, 0); 2547 trace_run_delayed_ref_head(fs_info, head, 0);
2550 btrfs_delayed_ref_unlock(head); 2548 btrfs_delayed_ref_unlock(head);
@@ -4954,6 +4952,15 @@ static void flush_space(struct btrfs_fs_info *fs_info,
4954 ret = 0; 4952 ret = 0;
4955 break; 4953 break;
4956 case COMMIT_TRANS: 4954 case COMMIT_TRANS:
4955 /*
4956 * If we have pending delayed iputs then we could free up a
4957 * bunch of pinned space, so make sure we run the iputs before
4958 * we do our pinned bytes check below.
4959 */
4960 mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
4961 btrfs_run_delayed_iputs(fs_info);
4962 mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
4963
4957 ret = may_commit_transaction(fs_info, space_info); 4964 ret = may_commit_transaction(fs_info, space_info);
4958 break; 4965 break;
4959 default: 4966 default:
@@ -7188,7 +7195,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
7188 if (head->must_insert_reserved) 7195 if (head->must_insert_reserved)
7189 ret = 1; 7196 ret = 1;
7190 7197
7191 cleanup_ref_head_accounting(trans, head); 7198 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
7192 mutex_unlock(&head->mutex); 7199 mutex_unlock(&head->mutex);
7193 btrfs_put_delayed_ref_head(head); 7200 btrfs_put_delayed_ref_head(head);
7194 return ret; 7201 return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 43eb4535319d..5c349667c761 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3129,9 +3129,6 @@ out:
3129 /* once for the tree */ 3129 /* once for the tree */
3130 btrfs_put_ordered_extent(ordered_extent); 3130 btrfs_put_ordered_extent(ordered_extent);
3131 3131
3132 /* Try to release some metadata so we don't get an OOM but don't wait */
3133 btrfs_btree_balance_dirty_nodelay(fs_info);
3134
3135 return ret; 3132 return ret;
3136} 3133}
3137 3134
@@ -3254,6 +3251,8 @@ void btrfs_add_delayed_iput(struct inode *inode)
3254 ASSERT(list_empty(&binode->delayed_iput)); 3251 ASSERT(list_empty(&binode->delayed_iput));
3255 list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs); 3252 list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
3256 spin_unlock(&fs_info->delayed_iput_lock); 3253 spin_unlock(&fs_info->delayed_iput_lock);
3254 if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
3255 wake_up_process(fs_info->cleaner_kthread);
3257} 3256}
3258 3257
3259void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) 3258void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fab9443f6a42..9c8e1734429c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3221,6 +3221,26 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
3221 inode_lock_nested(inode2, I_MUTEX_CHILD); 3221 inode_lock_nested(inode2, I_MUTEX_CHILD);
3222} 3222}
3223 3223
3224static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
3225 struct inode *inode2, u64 loff2, u64 len)
3226{
3227 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
3228 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
3229}
3230
3231static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
3232 struct inode *inode2, u64 loff2, u64 len)
3233{
3234 if (inode1 < inode2) {
3235 swap(inode1, inode2);
3236 swap(loff1, loff2);
3237 } else if (inode1 == inode2 && loff2 < loff1) {
3238 swap(loff1, loff2);
3239 }
3240 lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
3241 lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
3242}
3243
3224static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, 3244static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
3225 struct inode *dst, u64 dst_loff) 3245 struct inode *dst, u64 dst_loff)
3226{ 3246{
@@ -3242,11 +3262,12 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
3242 return -EINVAL; 3262 return -EINVAL;
3243 3263
3244 /* 3264 /*
3245 * Lock destination range to serialize with concurrent readpages(). 3265 * Lock destination range to serialize with concurrent readpages() and
3266 * source range to serialize with relocation.
3246 */ 3267 */
3247 lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); 3268 btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
3248 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); 3269 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
3249 unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); 3270 btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
3250 3271
3251 return ret; 3272 return ret;
3252} 3273}
@@ -3905,17 +3926,33 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
3905 len = ALIGN(src->i_size, bs) - off; 3926 len = ALIGN(src->i_size, bs) - off;
3906 3927
3907 if (destoff > inode->i_size) { 3928 if (destoff > inode->i_size) {
3929 const u64 wb_start = ALIGN_DOWN(inode->i_size, bs);
3930
3908 ret = btrfs_cont_expand(inode, inode->i_size, destoff); 3931 ret = btrfs_cont_expand(inode, inode->i_size, destoff);
3909 if (ret) 3932 if (ret)
3910 return ret; 3933 return ret;
3934 /*
3935 * We may have truncated the last block if the inode's size is
3936 * not sector size aligned, so we need to wait for writeback to
3937 * complete before proceeding further, otherwise we can race
3938 * with cloning and attempt to increment a reference to an
3939 * extent that no longer exists (writeback completed right after
3940 * we found the previous extent covering eof and before we
3941 * attempted to increment its reference count).
3942 */
3943 ret = btrfs_wait_ordered_range(inode, wb_start,
3944 destoff - wb_start);
3945 if (ret)
3946 return ret;
3911 } 3947 }
3912 3948
3913 /* 3949 /*
3914 * Lock destination range to serialize with concurrent readpages(). 3950 * Lock destination range to serialize with concurrent readpages() and
3951 * source range to serialize with relocation.
3915 */ 3952 */
3916 lock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); 3953 btrfs_double_extent_lock(src, off, inode, destoff, len);
3917 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); 3954 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
3918 unlock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); 3955 btrfs_double_extent_unlock(src, off, inode, destoff, len);
3919 /* 3956 /*
3920 * Truncate page cache pages so that future reads will see the cloned 3957 * Truncate page cache pages so that future reads will see the cloned
3921 * data immediately and not the previous data. 3958 * data immediately and not the previous data.
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index c5586ffd1426..0a3f122dd61f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1621,6 +1621,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1621 flags | SB_RDONLY, device_name, data); 1621 flags | SB_RDONLY, device_name, data);
1622 if (IS_ERR(mnt_root)) { 1622 if (IS_ERR(mnt_root)) {
1623 root = ERR_CAST(mnt_root); 1623 root = ERR_CAST(mnt_root);
1624 kfree(subvol_name);
1624 goto out; 1625 goto out;
1625 } 1626 }
1626 1627
@@ -1630,12 +1631,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1630 if (error < 0) { 1631 if (error < 0) {
1631 root = ERR_PTR(error); 1632 root = ERR_PTR(error);
1632 mntput(mnt_root); 1633 mntput(mnt_root);
1634 kfree(subvol_name);
1633 goto out; 1635 goto out;
1634 } 1636 }
1635 } 1637 }
1636 } 1638 }
1637 if (IS_ERR(mnt_root)) { 1639 if (IS_ERR(mnt_root)) {
1638 root = ERR_CAST(mnt_root); 1640 root = ERR_CAST(mnt_root);
1641 kfree(subvol_name);
1639 goto out; 1642 goto out;
1640 } 1643 }
1641 1644
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 127fa1535f58..4ec2b660d014 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -850,14 +850,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
850 850
851 btrfs_trans_release_chunk_metadata(trans); 851 btrfs_trans_release_chunk_metadata(trans);
852 852
853 if (lock && should_end_transaction(trans) &&
854 READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
855 spin_lock(&info->trans_lock);
856 if (cur_trans->state == TRANS_STATE_RUNNING)
857 cur_trans->state = TRANS_STATE_BLOCKED;
858 spin_unlock(&info->trans_lock);
859 }
860
861 if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { 853 if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
862 if (throttle) 854 if (throttle)
863 return btrfs_commit_transaction(trans); 855 return btrfs_commit_transaction(trans);
@@ -1879,6 +1871,21 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
1879 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1871 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1880} 1872}
1881 1873
1874/*
1875 * Release reserved delayed ref space of all pending block groups of the
1876 * transaction and remove them from the list
1877 */
1878static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
1879{
1880 struct btrfs_fs_info *fs_info = trans->fs_info;
1881 struct btrfs_block_group_cache *block_group, *tmp;
1882
1883 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
1884 btrfs_delayed_refs_rsv_release(fs_info, 1);
1885 list_del_init(&block_group->bg_list);
1886 }
1887}
1888
1882static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) 1889static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
1883{ 1890{
1884 /* 1891 /*
@@ -2270,6 +2277,7 @@ scrub_continue:
2270 btrfs_scrub_continue(fs_info); 2277 btrfs_scrub_continue(fs_info);
2271cleanup_transaction: 2278cleanup_transaction:
2272 btrfs_trans_release_metadata(trans); 2279 btrfs_trans_release_metadata(trans);
2280 btrfs_cleanup_pending_block_groups(trans);
2273 btrfs_trans_release_chunk_metadata(trans); 2281 btrfs_trans_release_chunk_metadata(trans);
2274 trans->block_rsv = NULL; 2282 trans->block_rsv = NULL;
2275 btrfs_warn(fs_info, "Skipping commit of aborted transaction."); 2283 btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2576b1a379c9..15561926ab32 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -957,11 +957,11 @@ static noinline struct btrfs_device *device_list_add(const char *path,
957 else 957 else
958 fs_devices = alloc_fs_devices(disk_super->fsid, NULL); 958 fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
959 959
960 fs_devices->fsid_change = fsid_change_in_progress;
961
962 if (IS_ERR(fs_devices)) 960 if (IS_ERR(fs_devices))
963 return ERR_CAST(fs_devices); 961 return ERR_CAST(fs_devices);
964 962
963 fs_devices->fsid_change = fsid_change_in_progress;
964
965 mutex_lock(&fs_devices->device_list_mutex); 965 mutex_lock(&fs_devices->device_list_mutex);
966 list_add(&fs_devices->fs_list, &fs_uuids); 966 list_add(&fs_devices->fs_list, &fs_uuids);
967 967
@@ -7825,6 +7825,18 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
7825 ret = -EUCLEAN; 7825 ret = -EUCLEAN;
7826 goto out; 7826 goto out;
7827 } 7827 }
7828
7829 /* It's possible this device is a dummy for seed device */
7830 if (dev->disk_total_bytes == 0) {
7831 dev = find_device(fs_info->fs_devices->seed, devid, NULL);
7832 if (!dev) {
7833 btrfs_err(fs_info, "failed to find seed devid %llu",
7834 devid);
7835 ret = -EUCLEAN;
7836 goto out;
7837 }
7838 }
7839
7828 if (physical_offset + physical_len > dev->disk_total_bytes) { 7840 if (physical_offset + physical_len > dev->disk_total_bytes) {
7829 btrfs_err(fs_info, 7841 btrfs_err(fs_info,
7830"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu", 7842"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",