aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-02-03 11:48:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-02-03 11:48:33 -0500
commit312b3a93dda6db9354b0c6b0f1868c1434e8c787 (patch)
treeafc8aa280c7c420247df63de3414abb70e47dc4d
parent12491ed354d23c0ecbe02459bf4be58b8c772bc8 (diff)
parent532b618bdf237250d6d4566536d4b6ce3d0a31fe (diff)
Merge tag 'for-5.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - regression fix: transaction commit can run away due to delayed ref waiting heuristic, this is not necessary now because of the proper reservation mechanism introduced in 5.0 - regression fix: potential crash due to use-before-check of an ERR_PTR return value - fix for transaction abort during transaction commit that needs to properly clean up pending block groups - fix deadlock during b-tree node/leaf splitting, when this happens on some of the fundamental trees, we must prevent new tree block allocation to re-enter indirectly via the block group flushing path - potential memory leak after errors during mount * tag 'for-5.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: On error always free subvol_name in btrfs_mount btrfs: clean up pending block groups when transaction commit aborts btrfs: fix potential oops in device_list_add btrfs: don't end the transaction for delayed refs in throttle Btrfs: fix deadlock when allocating tree block during leaf/node split
-rw-r--r--fs/btrfs/ctree.c78
-rw-r--r--fs/btrfs/super.c3
-rw-r--r--fs/btrfs/transaction.c24
-rw-r--r--fs/btrfs/volumes.c4
4 files changed, 71 insertions, 38 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index f64aad613727..5a6c39b44c84 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -968,6 +968,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
968 return 0; 968 return 0;
969} 969}
970 970
971static struct extent_buffer *alloc_tree_block_no_bg_flush(
972 struct btrfs_trans_handle *trans,
973 struct btrfs_root *root,
974 u64 parent_start,
975 const struct btrfs_disk_key *disk_key,
976 int level,
977 u64 hint,
978 u64 empty_size)
979{
980 struct btrfs_fs_info *fs_info = root->fs_info;
981 struct extent_buffer *ret;
982
983 /*
984 * If we are COWing a node/leaf from the extent, chunk, device or free
985 * space trees, make sure that we do not finish block group creation of
986 * pending block groups. We do this to avoid a deadlock.
987 * COWing can result in allocation of a new chunk, and flushing pending
988 * block groups (btrfs_create_pending_block_groups()) can be triggered
989 * when finishing allocation of a new chunk. Creation of a pending block
990 * group modifies the extent, chunk, device and free space trees,
991 * therefore we could deadlock with ourselves since we are holding a
992 * lock on an extent buffer that btrfs_create_pending_block_groups() may
993 * try to COW later.
994 * For similar reasons, we also need to delay flushing pending block
995 * groups when splitting a leaf or node, from one of those trees, since
996 * we are holding a write lock on it and its parent or when inserting a
997 * new root node for one of those trees.
998 */
999 if (root == fs_info->extent_root ||
1000 root == fs_info->chunk_root ||
1001 root == fs_info->dev_root ||
1002 root == fs_info->free_space_root)
1003 trans->can_flush_pending_bgs = false;
1004
1005 ret = btrfs_alloc_tree_block(trans, root, parent_start,
1006 root->root_key.objectid, disk_key, level,
1007 hint, empty_size);
1008 trans->can_flush_pending_bgs = true;
1009
1010 return ret;
1011}
1012
971/* 1013/*
972 * does the dirty work in cow of a single block. The parent block (if 1014 * does the dirty work in cow of a single block. The parent block (if
973 * supplied) is updated to point to the new cow copy. The new buffer is marked 1015 * supplied) is updated to point to the new cow copy. The new buffer is marked
@@ -1015,28 +1057,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1015 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) 1057 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
1016 parent_start = parent->start; 1058 parent_start = parent->start;
1017 1059
1018 /* 1060 cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
1019 * If we are COWing a node/leaf from the extent, chunk, device or free 1061 level, search_start, empty_size);
1020 * space trees, make sure that we do not finish block group creation of
1021 * pending block groups. We do this to avoid a deadlock.
1022 * COWing can result in allocation of a new chunk, and flushing pending
1023 * block groups (btrfs_create_pending_block_groups()) can be triggered
1024 * when finishing allocation of a new chunk. Creation of a pending block
1025 * group modifies the extent, chunk, device and free space trees,
1026 * therefore we could deadlock with ourselves since we are holding a
1027 * lock on an extent buffer that btrfs_create_pending_block_groups() may
1028 * try to COW later.
1029 */
1030 if (root == fs_info->extent_root ||
1031 root == fs_info->chunk_root ||
1032 root == fs_info->dev_root ||
1033 root == fs_info->free_space_root)
1034 trans->can_flush_pending_bgs = false;
1035
1036 cow = btrfs_alloc_tree_block(trans, root, parent_start,
1037 root->root_key.objectid, &disk_key, level,
1038 search_start, empty_size);
1039 trans->can_flush_pending_bgs = true;
1040 if (IS_ERR(cow)) 1062 if (IS_ERR(cow))
1041 return PTR_ERR(cow); 1063 return PTR_ERR(cow);
1042 1064
@@ -3345,8 +3367,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3345 else 3367 else
3346 btrfs_node_key(lower, &lower_key, 0); 3368 btrfs_node_key(lower, &lower_key, 0);
3347 3369
3348 c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 3370 c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
3349 &lower_key, level, root->node->start, 0); 3371 root->node->start, 0);
3350 if (IS_ERR(c)) 3372 if (IS_ERR(c))
3351 return PTR_ERR(c); 3373 return PTR_ERR(c);
3352 3374
@@ -3475,8 +3497,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3475 mid = (c_nritems + 1) / 2; 3497 mid = (c_nritems + 1) / 2;
3476 btrfs_node_key(c, &disk_key, mid); 3498 btrfs_node_key(c, &disk_key, mid);
3477 3499
3478 split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 3500 split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
3479 &disk_key, level, c->start, 0); 3501 c->start, 0);
3480 if (IS_ERR(split)) 3502 if (IS_ERR(split))
3481 return PTR_ERR(split); 3503 return PTR_ERR(split);
3482 3504
@@ -4260,8 +4282,8 @@ again:
4260 else 4282 else
4261 btrfs_item_key(l, &disk_key, mid); 4283 btrfs_item_key(l, &disk_key, mid);
4262 4284
4263 right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 4285 right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
4264 &disk_key, 0, l->start, 0); 4286 l->start, 0);
4265 if (IS_ERR(right)) 4287 if (IS_ERR(right))
4266 return PTR_ERR(right); 4288 return PTR_ERR(right);
4267 4289
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index c5586ffd1426..0a3f122dd61f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1621,6 +1621,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1621 flags | SB_RDONLY, device_name, data); 1621 flags | SB_RDONLY, device_name, data);
1622 if (IS_ERR(mnt_root)) { 1622 if (IS_ERR(mnt_root)) {
1623 root = ERR_CAST(mnt_root); 1623 root = ERR_CAST(mnt_root);
1624 kfree(subvol_name);
1624 goto out; 1625 goto out;
1625 } 1626 }
1626 1627
@@ -1630,12 +1631,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1630 if (error < 0) { 1631 if (error < 0) {
1631 root = ERR_PTR(error); 1632 root = ERR_PTR(error);
1632 mntput(mnt_root); 1633 mntput(mnt_root);
1634 kfree(subvol_name);
1633 goto out; 1635 goto out;
1634 } 1636 }
1635 } 1637 }
1636 } 1638 }
1637 if (IS_ERR(mnt_root)) { 1639 if (IS_ERR(mnt_root)) {
1638 root = ERR_CAST(mnt_root); 1640 root = ERR_CAST(mnt_root);
1641 kfree(subvol_name);
1639 goto out; 1642 goto out;
1640 } 1643 }
1641 1644
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 127fa1535f58..4ec2b660d014 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -850,14 +850,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
850 850
851 btrfs_trans_release_chunk_metadata(trans); 851 btrfs_trans_release_chunk_metadata(trans);
852 852
853 if (lock && should_end_transaction(trans) &&
854 READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
855 spin_lock(&info->trans_lock);
856 if (cur_trans->state == TRANS_STATE_RUNNING)
857 cur_trans->state = TRANS_STATE_BLOCKED;
858 spin_unlock(&info->trans_lock);
859 }
860
861 if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { 853 if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
862 if (throttle) 854 if (throttle)
863 return btrfs_commit_transaction(trans); 855 return btrfs_commit_transaction(trans);
@@ -1879,6 +1871,21 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
1879 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1871 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1880} 1872}
1881 1873
1874/*
1875 * Release reserved delayed ref space of all pending block groups of the
1876 * transaction and remove them from the list
1877 */
1878static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
1879{
1880 struct btrfs_fs_info *fs_info = trans->fs_info;
1881 struct btrfs_block_group_cache *block_group, *tmp;
1882
1883 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
1884 btrfs_delayed_refs_rsv_release(fs_info, 1);
1885 list_del_init(&block_group->bg_list);
1886 }
1887}
1888
1882static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) 1889static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
1883{ 1890{
1884 /* 1891 /*
@@ -2270,6 +2277,7 @@ scrub_continue:
2270 btrfs_scrub_continue(fs_info); 2277 btrfs_scrub_continue(fs_info);
2271cleanup_transaction: 2278cleanup_transaction:
2272 btrfs_trans_release_metadata(trans); 2279 btrfs_trans_release_metadata(trans);
2280 btrfs_cleanup_pending_block_groups(trans);
2273 btrfs_trans_release_chunk_metadata(trans); 2281 btrfs_trans_release_chunk_metadata(trans);
2274 trans->block_rsv = NULL; 2282 trans->block_rsv = NULL;
2275 btrfs_warn(fs_info, "Skipping commit of aborted transaction."); 2283 btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3e4f8f88353e..15561926ab32 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -957,11 +957,11 @@ static noinline struct btrfs_device *device_list_add(const char *path,
957 else 957 else
958 fs_devices = alloc_fs_devices(disk_super->fsid, NULL); 958 fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
959 959
960 fs_devices->fsid_change = fsid_change_in_progress;
961
962 if (IS_ERR(fs_devices)) 960 if (IS_ERR(fs_devices))
963 return ERR_CAST(fs_devices); 961 return ERR_CAST(fs_devices);
964 962
963 fs_devices->fsid_change = fsid_change_in_progress;
964
965 mutex_lock(&fs_devices->device_list_mutex); 965 mutex_lock(&fs_devices->device_list_mutex);
966 list_add(&fs_devices->fs_list, &fs_uuids); 966 list_add(&fs_devices->fs_list, &fs_uuids);
967 967