aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2018-10-24 05:13:03 -0400
committerDavid Sterba <dsterba@suse.com>2018-11-06 10:42:32 -0500
commit4222ea7100c0e37adace2790c8822758bbeee179 (patch)
treec10f267aaeaf99c03440f87177195f0e42f3e065
parent7e17916b35797396f681a3270245fd29c1e4c250 (diff)
Btrfs: fix deadlock on tree root leaf when finding free extent
When we are writing out a free space cache, during the transaction commit phase, we can end up in a deadlock which results in a stack trace like the following: schedule+0x28/0x80 btrfs_tree_read_lock+0x8e/0x120 [btrfs] ? finish_wait+0x80/0x80 btrfs_read_lock_root_node+0x2f/0x40 [btrfs] btrfs_search_slot+0xf6/0x9f0 [btrfs] ? evict_refill_and_join+0xd0/0xd0 [btrfs] ? inode_insert5+0x119/0x190 btrfs_lookup_inode+0x3a/0xc0 [btrfs] ? kmem_cache_alloc+0x166/0x1d0 btrfs_iget+0x113/0x690 [btrfs] __lookup_free_space_inode+0xd8/0x150 [btrfs] lookup_free_space_inode+0x5b/0xb0 [btrfs] load_free_space_cache+0x7c/0x170 [btrfs] ? cache_block_group+0x72/0x3b0 [btrfs] cache_block_group+0x1b3/0x3b0 [btrfs] ? finish_wait+0x80/0x80 find_free_extent+0x799/0x1010 [btrfs] btrfs_reserve_extent+0x9b/0x180 [btrfs] btrfs_alloc_tree_block+0x1b3/0x4f0 [btrfs] __btrfs_cow_block+0x11d/0x500 [btrfs] btrfs_cow_block+0xdc/0x180 [btrfs] btrfs_search_slot+0x3bd/0x9f0 [btrfs] btrfs_lookup_inode+0x3a/0xc0 [btrfs] ? kmem_cache_alloc+0x166/0x1d0 btrfs_update_inode_item+0x46/0x100 [btrfs] cache_save_setup+0xe4/0x3a0 [btrfs] btrfs_start_dirty_block_groups+0x1be/0x480 [btrfs] btrfs_commit_transaction+0xcb/0x8b0 [btrfs] At cache_save_setup() we need to update the inode item of a block group's cache which is located in the tree root (fs_info->tree_root), which means that it may result in COWing a leaf from that tree. If that happens we need to find a free metadata extent and while looking for one, if we find a block group which was not cached yet we attempt to load its cache by calling cache_block_group(). However this function will try to load the inode of the free space cache, which requires finding the matching inode item in the tree root - if that inode item is located in the same leaf as the inode item of the space cache we are updating at cache_save_setup(), we end up in a deadlock, since we try to obtain a read lock on the same extent buffer that we previously write locked. So fix this by using the tree root's commit root when searching for a block group's free space cache inode item when we are attempting to load a free space cache. This is safe since block groups once loaded stay in memory forever, as well as their caches, so after they are first loaded we will never need to read their inode items again. For new block groups, once they are created they get their ->cached field set to BTRFS_CACHE_FINISHED meaning we will not need to read their inode item. Reported-by: Andrew Nelson <andrew.s.nelson@gmail.com> Link: https://lore.kernel.org/linux-btrfs/CAPTELenq9x5KOWuQ+fa7h1r3nsJG8vyiTH8+ifjURc_duHh2Wg@mail.gmail.com/ Fixes: 9d66e233c704 ("Btrfs: load free space cache if it exists") Tested-by: Andrew Nelson <andrew.s.nelson@gmail.com> Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/free-space-cache.c22
-rw-r--r--fs/btrfs/inode.c32
3 files changed, 46 insertions, 11 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 68ca41dbbef3..f7f955109d8b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3163,6 +3163,9 @@ void btrfs_destroy_inode(struct inode *inode);
3163int btrfs_drop_inode(struct inode *inode); 3163int btrfs_drop_inode(struct inode *inode);
3164int __init btrfs_init_cachep(void); 3164int __init btrfs_init_cachep(void);
3165void __cold btrfs_destroy_cachep(void); 3165void __cold btrfs_destroy_cachep(void);
3166struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
3167 struct btrfs_root *root, int *new,
3168 struct btrfs_path *path);
3166struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 3169struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3167 struct btrfs_root *root, int *was_new); 3170 struct btrfs_root *root, int *was_new);
3168struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, 3171struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 4ba0aedc878b..74aa552f4793 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -75,7 +75,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
75 * sure NOFS is set to keep us from deadlocking. 75 * sure NOFS is set to keep us from deadlocking.
76 */ 76 */
77 nofs_flag = memalloc_nofs_save(); 77 nofs_flag = memalloc_nofs_save();
78 inode = btrfs_iget(fs_info->sb, &location, root, NULL); 78 inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path);
79 btrfs_release_path(path);
79 memalloc_nofs_restore(nofs_flag); 80 memalloc_nofs_restore(nofs_flag);
80 if (IS_ERR(inode)) 81 if (IS_ERR(inode))
81 return inode; 82 return inode;
@@ -838,6 +839,25 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
838 path->search_commit_root = 1; 839 path->search_commit_root = 1;
839 path->skip_locking = 1; 840 path->skip_locking = 1;
840 841
842 /*
843 * We must pass a path with search_commit_root set to btrfs_iget in
844 * order to avoid a deadlock when allocating extents for the tree root.
845 *
846 * When we are COWing an extent buffer from the tree root, when looking
847 * for a free extent, at extent-tree.c:find_free_extent(), we can find
848 * block group without its free space cache loaded. When we find one
849 * we must load its space cache which requires reading its free space
850 * cache's inode item from the root tree. If this inode item is located
851 * in the same leaf that we started COWing before, then we end up in
852 * deadlock on the extent buffer (trying to read lock it when we
853 * previously write locked it).
854 *
855 * It's safe to read the inode item using the commit root because
856 * block groups, once loaded, stay in memory forever (until they are
857 * removed) as well as their space caches once loaded. New block groups
858 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
859 * we will never try to read their inode item while the fs is mounted.
860 */
841 inode = lookup_free_space_inode(fs_info, block_group, path); 861 inode = lookup_free_space_inode(fs_info, block_group, path);
842 if (IS_ERR(inode)) { 862 if (IS_ERR(inode)) {
843 btrfs_free_path(path); 863 btrfs_free_path(path);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 55761b1519f5..e71daadd2f75 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3569,10 +3569,11 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3569/* 3569/*
3570 * read an inode from the btree into the in-memory inode 3570 * read an inode from the btree into the in-memory inode
3571 */ 3571 */
3572static int btrfs_read_locked_inode(struct inode *inode) 3572static int btrfs_read_locked_inode(struct inode *inode,
3573 struct btrfs_path *in_path)
3573{ 3574{
3574 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 3575 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3575 struct btrfs_path *path; 3576 struct btrfs_path *path = in_path;
3576 struct extent_buffer *leaf; 3577 struct extent_buffer *leaf;
3577 struct btrfs_inode_item *inode_item; 3578 struct btrfs_inode_item *inode_item;
3578 struct btrfs_root *root = BTRFS_I(inode)->root; 3579 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3588,15 +3589,18 @@ static int btrfs_read_locked_inode(struct inode *inode)
3588 if (!ret) 3589 if (!ret)
3589 filled = true; 3590 filled = true;
3590 3591
3591 path = btrfs_alloc_path(); 3592 if (!path) {
3592 if (!path) 3593 path = btrfs_alloc_path();
3593 return -ENOMEM; 3594 if (!path)
3595 return -ENOMEM;
3596 }
3594 3597
3595 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 3598 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3596 3599
3597 ret = btrfs_lookup_inode(NULL, root, path, &location, 0); 3600 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3598 if (ret) { 3601 if (ret) {
3599 btrfs_free_path(path); 3602 if (path != in_path)
3603 btrfs_free_path(path);
3600 return ret; 3604 return ret;
3601 } 3605 }
3602 3606
@@ -3721,7 +3725,8 @@ cache_acl:
3721 btrfs_ino(BTRFS_I(inode)), 3725 btrfs_ino(BTRFS_I(inode)),
3722 root->root_key.objectid, ret); 3726 root->root_key.objectid, ret);
3723 } 3727 }
3724 btrfs_free_path(path); 3728 if (path != in_path)
3729 btrfs_free_path(path);
3725 3730
3726 if (!maybe_acls) 3731 if (!maybe_acls)
3727 cache_no_acl(inode); 3732 cache_no_acl(inode);
@@ -5643,8 +5648,9 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
5643/* Get an inode object given its location and corresponding root. 5648/* Get an inode object given its location and corresponding root.
5644 * Returns in *is_new if the inode was read from disk 5649 * Returns in *is_new if the inode was read from disk
5645 */ 5650 */
5646struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 5651struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
5647 struct btrfs_root *root, int *new) 5652 struct btrfs_root *root, int *new,
5653 struct btrfs_path *path)
5648{ 5654{
5649 struct inode *inode; 5655 struct inode *inode;
5650 5656
@@ -5655,7 +5661,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5655 if (inode->i_state & I_NEW) { 5661 if (inode->i_state & I_NEW) {
5656 int ret; 5662 int ret;
5657 5663
5658 ret = btrfs_read_locked_inode(inode); 5664 ret = btrfs_read_locked_inode(inode, path);
5659 if (!ret) { 5665 if (!ret) {
5660 inode_tree_add(inode); 5666 inode_tree_add(inode);
5661 unlock_new_inode(inode); 5667 unlock_new_inode(inode);
@@ -5677,6 +5683,12 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5677 return inode; 5683 return inode;
5678} 5684}
5679 5685
5686struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5687 struct btrfs_root *root, int *new)
5688{
5689 return btrfs_iget_path(s, location, root, new, NULL);
5690}
5691
5680static struct inode *new_simple_dir(struct super_block *s, 5692static struct inode *new_simple_dir(struct super_block *s,
5681 struct btrfs_key *key, 5693 struct btrfs_key *key,
5682 struct btrfs_root *root) 5694 struct btrfs_root *root)