diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-02-03 11:48:33 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-02-03 11:48:33 -0500 |
| commit | 312b3a93dda6db9354b0c6b0f1868c1434e8c787 (patch) | |
| tree | afc8aa280c7c420247df63de3414abb70e47dc4d | |
| parent | 12491ed354d23c0ecbe02459bf4be58b8c772bc8 (diff) | |
| parent | 532b618bdf237250d6d4566536d4b6ce3d0a31fe (diff) | |
Merge tag 'for-5.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba:
- regression fix: transaction commit can run away due to delayed ref
waiting heuristic, this is not necessary now because of the proper
reservation mechanism introduced in 5.0
- regression fix: potential crash due to use-before-check of an ERR_PTR
return value
- fix for transaction abort during transaction commit that needs to
properly clean up pending block groups
- fix deadlock during b-tree node/leaf splitting, when this happens on
some of the fundamental trees, we must prevent new tree block
allocation to re-enter indirectly via the block group flushing path
- potential memory leak after errors during mount
* tag 'for-5.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: On error always free subvol_name in btrfs_mount
btrfs: clean up pending block groups when transaction commit aborts
btrfs: fix potential oops in device_list_add
btrfs: don't end the transaction for delayed refs in throttle
Btrfs: fix deadlock when allocating tree block during leaf/node split
| -rw-r--r-- | fs/btrfs/ctree.c | 78 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 3 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 24 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 4 |
4 files changed, 71 insertions, 38 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f64aad613727..5a6c39b44c84 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -968,6 +968,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
| 968 | return 0; | 968 | return 0; |
| 969 | } | 969 | } |
| 970 | 970 | ||
| 971 | static struct extent_buffer *alloc_tree_block_no_bg_flush( | ||
| 972 | struct btrfs_trans_handle *trans, | ||
| 973 | struct btrfs_root *root, | ||
| 974 | u64 parent_start, | ||
| 975 | const struct btrfs_disk_key *disk_key, | ||
| 976 | int level, | ||
| 977 | u64 hint, | ||
| 978 | u64 empty_size) | ||
| 979 | { | ||
| 980 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 981 | struct extent_buffer *ret; | ||
| 982 | |||
| 983 | /* | ||
| 984 | * If we are COWing a node/leaf from the extent, chunk, device or free | ||
| 985 | * space trees, make sure that we do not finish block group creation of | ||
| 986 | * pending block groups. We do this to avoid a deadlock. | ||
| 987 | * COWing can result in allocation of a new chunk, and flushing pending | ||
| 988 | * block groups (btrfs_create_pending_block_groups()) can be triggered | ||
| 989 | * when finishing allocation of a new chunk. Creation of a pending block | ||
| 990 | * group modifies the extent, chunk, device and free space trees, | ||
| 991 | * therefore we could deadlock with ourselves since we are holding a | ||
| 992 | * lock on an extent buffer that btrfs_create_pending_block_groups() may | ||
| 993 | * try to COW later. | ||
| 994 | * For similar reasons, we also need to delay flushing pending block | ||
| 995 | * groups when splitting a leaf or node, from one of those trees, since | ||
| 996 | * we are holding a write lock on it and its parent or when inserting a | ||
| 997 | * new root node for one of those trees. | ||
| 998 | */ | ||
| 999 | if (root == fs_info->extent_root || | ||
| 1000 | root == fs_info->chunk_root || | ||
| 1001 | root == fs_info->dev_root || | ||
| 1002 | root == fs_info->free_space_root) | ||
| 1003 | trans->can_flush_pending_bgs = false; | ||
| 1004 | |||
| 1005 | ret = btrfs_alloc_tree_block(trans, root, parent_start, | ||
| 1006 | root->root_key.objectid, disk_key, level, | ||
| 1007 | hint, empty_size); | ||
| 1008 | trans->can_flush_pending_bgs = true; | ||
| 1009 | |||
| 1010 | return ret; | ||
| 1011 | } | ||
| 1012 | |||
| 971 | /* | 1013 | /* |
| 972 | * does the dirty work in cow of a single block. The parent block (if | 1014 | * does the dirty work in cow of a single block. The parent block (if |
| 973 | * supplied) is updated to point to the new cow copy. The new buffer is marked | 1015 | * supplied) is updated to point to the new cow copy. The new buffer is marked |
| @@ -1015,28 +1057,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 1015 | if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) | 1057 | if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) |
| 1016 | parent_start = parent->start; | 1058 | parent_start = parent->start; |
| 1017 | 1059 | ||
| 1018 | /* | 1060 | cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key, |
| 1019 | * If we are COWing a node/leaf from the extent, chunk, device or free | 1061 | level, search_start, empty_size); |
| 1020 | * space trees, make sure that we do not finish block group creation of | ||
| 1021 | * pending block groups. We do this to avoid a deadlock. | ||
| 1022 | * COWing can result in allocation of a new chunk, and flushing pending | ||
| 1023 | * block groups (btrfs_create_pending_block_groups()) can be triggered | ||
| 1024 | * when finishing allocation of a new chunk. Creation of a pending block | ||
| 1025 | * group modifies the extent, chunk, device and free space trees, | ||
| 1026 | * therefore we could deadlock with ourselves since we are holding a | ||
| 1027 | * lock on an extent buffer that btrfs_create_pending_block_groups() may | ||
| 1028 | * try to COW later. | ||
| 1029 | */ | ||
| 1030 | if (root == fs_info->extent_root || | ||
| 1031 | root == fs_info->chunk_root || | ||
| 1032 | root == fs_info->dev_root || | ||
| 1033 | root == fs_info->free_space_root) | ||
| 1034 | trans->can_flush_pending_bgs = false; | ||
| 1035 | |||
| 1036 | cow = btrfs_alloc_tree_block(trans, root, parent_start, | ||
| 1037 | root->root_key.objectid, &disk_key, level, | ||
| 1038 | search_start, empty_size); | ||
| 1039 | trans->can_flush_pending_bgs = true; | ||
| 1040 | if (IS_ERR(cow)) | 1062 | if (IS_ERR(cow)) |
| 1041 | return PTR_ERR(cow); | 1063 | return PTR_ERR(cow); |
| 1042 | 1064 | ||
| @@ -3345,8 +3367,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
| 3345 | else | 3367 | else |
| 3346 | btrfs_node_key(lower, &lower_key, 0); | 3368 | btrfs_node_key(lower, &lower_key, 0); |
| 3347 | 3369 | ||
| 3348 | c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, | 3370 | c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level, |
| 3349 | &lower_key, level, root->node->start, 0); | 3371 | root->node->start, 0); |
| 3350 | if (IS_ERR(c)) | 3372 | if (IS_ERR(c)) |
| 3351 | return PTR_ERR(c); | 3373 | return PTR_ERR(c); |
| 3352 | 3374 | ||
| @@ -3475,8 +3497,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
| 3475 | mid = (c_nritems + 1) / 2; | 3497 | mid = (c_nritems + 1) / 2; |
| 3476 | btrfs_node_key(c, &disk_key, mid); | 3498 | btrfs_node_key(c, &disk_key, mid); |
| 3477 | 3499 | ||
| 3478 | split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, | 3500 | split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level, |
| 3479 | &disk_key, level, c->start, 0); | 3501 | c->start, 0); |
| 3480 | if (IS_ERR(split)) | 3502 | if (IS_ERR(split)) |
| 3481 | return PTR_ERR(split); | 3503 | return PTR_ERR(split); |
| 3482 | 3504 | ||
| @@ -4260,8 +4282,8 @@ again: | |||
| 4260 | else | 4282 | else |
| 4261 | btrfs_item_key(l, &disk_key, mid); | 4283 | btrfs_item_key(l, &disk_key, mid); |
| 4262 | 4284 | ||
| 4263 | right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, | 4285 | right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0, |
| 4264 | &disk_key, 0, l->start, 0); | 4286 | l->start, 0); |
| 4265 | if (IS_ERR(right)) | 4287 | if (IS_ERR(right)) |
| 4266 | return PTR_ERR(right); | 4288 | return PTR_ERR(right); |
| 4267 | 4289 | ||
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c5586ffd1426..0a3f122dd61f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -1621,6 +1621,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
| 1621 | flags | SB_RDONLY, device_name, data); | 1621 | flags | SB_RDONLY, device_name, data); |
| 1622 | if (IS_ERR(mnt_root)) { | 1622 | if (IS_ERR(mnt_root)) { |
| 1623 | root = ERR_CAST(mnt_root); | 1623 | root = ERR_CAST(mnt_root); |
| 1624 | kfree(subvol_name); | ||
| 1624 | goto out; | 1625 | goto out; |
| 1625 | } | 1626 | } |
| 1626 | 1627 | ||
| @@ -1630,12 +1631,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
| 1630 | if (error < 0) { | 1631 | if (error < 0) { |
| 1631 | root = ERR_PTR(error); | 1632 | root = ERR_PTR(error); |
| 1632 | mntput(mnt_root); | 1633 | mntput(mnt_root); |
| 1634 | kfree(subvol_name); | ||
| 1633 | goto out; | 1635 | goto out; |
| 1634 | } | 1636 | } |
| 1635 | } | 1637 | } |
| 1636 | } | 1638 | } |
| 1637 | if (IS_ERR(mnt_root)) { | 1639 | if (IS_ERR(mnt_root)) { |
| 1638 | root = ERR_CAST(mnt_root); | 1640 | root = ERR_CAST(mnt_root); |
| 1641 | kfree(subvol_name); | ||
| 1639 | goto out; | 1642 | goto out; |
| 1640 | } | 1643 | } |
| 1641 | 1644 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 127fa1535f58..4ec2b660d014 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -850,14 +850,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 850 | 850 | ||
| 851 | btrfs_trans_release_chunk_metadata(trans); | 851 | btrfs_trans_release_chunk_metadata(trans); |
| 852 | 852 | ||
| 853 | if (lock && should_end_transaction(trans) && | ||
| 854 | READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { | ||
| 855 | spin_lock(&info->trans_lock); | ||
| 856 | if (cur_trans->state == TRANS_STATE_RUNNING) | ||
| 857 | cur_trans->state = TRANS_STATE_BLOCKED; | ||
| 858 | spin_unlock(&info->trans_lock); | ||
| 859 | } | ||
| 860 | |||
| 861 | if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { | 853 | if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { |
| 862 | if (throttle) | 854 | if (throttle) |
| 863 | return btrfs_commit_transaction(trans); | 855 | return btrfs_commit_transaction(trans); |
| @@ -1879,6 +1871,21 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err) | |||
| 1879 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1871 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 1880 | } | 1872 | } |
| 1881 | 1873 | ||
| 1874 | /* | ||
| 1875 | * Release reserved delayed ref space of all pending block groups of the | ||
| 1876 | * transaction and remove them from the list | ||
| 1877 | */ | ||
| 1878 | static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans) | ||
| 1879 | { | ||
| 1880 | struct btrfs_fs_info *fs_info = trans->fs_info; | ||
| 1881 | struct btrfs_block_group_cache *block_group, *tmp; | ||
| 1882 | |||
| 1883 | list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { | ||
| 1884 | btrfs_delayed_refs_rsv_release(fs_info, 1); | ||
| 1885 | list_del_init(&block_group->bg_list); | ||
| 1886 | } | ||
| 1887 | } | ||
| 1888 | |||
| 1882 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) | 1889 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) |
| 1883 | { | 1890 | { |
| 1884 | /* | 1891 | /* |
| @@ -2270,6 +2277,7 @@ scrub_continue: | |||
| 2270 | btrfs_scrub_continue(fs_info); | 2277 | btrfs_scrub_continue(fs_info); |
| 2271 | cleanup_transaction: | 2278 | cleanup_transaction: |
| 2272 | btrfs_trans_release_metadata(trans); | 2279 | btrfs_trans_release_metadata(trans); |
| 2280 | btrfs_cleanup_pending_block_groups(trans); | ||
| 2273 | btrfs_trans_release_chunk_metadata(trans); | 2281 | btrfs_trans_release_chunk_metadata(trans); |
| 2274 | trans->block_rsv = NULL; | 2282 | trans->block_rsv = NULL; |
| 2275 | btrfs_warn(fs_info, "Skipping commit of aborted transaction."); | 2283 | btrfs_warn(fs_info, "Skipping commit of aborted transaction."); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3e4f8f88353e..15561926ab32 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -957,11 +957,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, | |||
| 957 | else | 957 | else |
| 958 | fs_devices = alloc_fs_devices(disk_super->fsid, NULL); | 958 | fs_devices = alloc_fs_devices(disk_super->fsid, NULL); |
| 959 | 959 | ||
| 960 | fs_devices->fsid_change = fsid_change_in_progress; | ||
| 961 | |||
| 962 | if (IS_ERR(fs_devices)) | 960 | if (IS_ERR(fs_devices)) |
| 963 | return ERR_CAST(fs_devices); | 961 | return ERR_CAST(fs_devices); |
| 964 | 962 | ||
| 963 | fs_devices->fsid_change = fsid_change_in_progress; | ||
| 964 | |||
| 965 | mutex_lock(&fs_devices->device_list_mutex); | 965 | mutex_lock(&fs_devices->device_list_mutex); |
| 966 | list_add(&fs_devices->fs_list, &fs_uuids); | 966 | list_add(&fs_devices->fs_list, &fs_uuids); |
| 967 | 967 | ||
