aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <josef@toxicpanda.com>2019-02-06 15:46:14 -0500
committerDavid Sterba <dsterba@suse.com>2019-02-27 08:08:47 -0500
commit78c52d9eb6b7ac899bcd5a681aeff7c971c22934 (patch)
treedda06314e8913773e28f672b1b21442891ed0380
parent4ea748e1d2c9f8a27332b949e8210dbbf392987e (diff)
btrfs: check for refs on snapshot delete resume
There's a bug in snapshot deletion where we won't update the drop_progress key if we're in the UPDATE_BACKREF stage. This is a problem because we could drop refs for blocks we know don't belong to ours. If we crash or umount at the right time we could experience messages such as the following when snapshot deletion resumes BTRFS error (device dm-3): unable to find ref byte nr 66797568 parent 0 root 258 owner 1 offset 0 ------------[ cut here ]------------ WARNING: CPU: 3 PID: 16052 at fs/btrfs/extent-tree.c:7108 __btrfs_free_extent.isra.78+0x62c/0xb30 [btrfs] CPU: 3 PID: 16052 Comm: umount Tainted: G W OE 5.0.0-rc4+ #147 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./890FX Deluxe5, BIOS P1.40 05/03/2011 RIP: 0010:__btrfs_free_extent.isra.78+0x62c/0xb30 [btrfs] RSP: 0018:ffffc90005cd7b18 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 RDX: ffff88842fade680 RSI: ffff88842fad6b18 RDI: ffff88842fad6b18 RBP: ffffc90005cd7bc8 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000001 R11: ffffffff822696b8 R12: 0000000003fb4000 R13: 0000000000000001 R14: 0000000000000102 R15: ffff88819c9d67e0 FS: 00007f08bb138fc0(0000) GS:ffff88842fac0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f8f5d861ea0 CR3: 00000003e99fe000 CR4: 00000000000006e0 Call Trace: ? _raw_spin_unlock+0x27/0x40 ? btrfs_merge_delayed_refs+0x356/0x3e0 [btrfs] __btrfs_run_delayed_refs+0x75a/0x13c0 [btrfs] ? join_transaction+0x2b/0x460 [btrfs] btrfs_run_delayed_refs+0xf3/0x1c0 [btrfs] btrfs_commit_transaction+0x52/0xa50 [btrfs] ? start_transaction+0xa6/0x510 [btrfs] btrfs_sync_fs+0x79/0x1c0 [btrfs] sync_filesystem+0x70/0x90 generic_shutdown_super+0x27/0x120 kill_anon_super+0x12/0x30 btrfs_kill_super+0x16/0xa0 [btrfs] deactivate_locked_super+0x43/0x70 deactivate_super+0x40/0x60 cleanup_mnt+0x3f/0x80 __cleanup_mnt+0x12/0x20 task_work_run+0x8b/0xc0 exit_to_usermode_loop+0xce/0xd0 do_syscall_64+0x20b/0x210 entry_SYSCALL_64_after_hwframe+0x49/0xbe To fix this simply mark dead roots we read from disk as DEAD and then set the walk_control->restarted flag so we know we have a restarted deletion. From here whenever we try to drop refs for blocks we check to verify our ref is set on them, and if it is not we skip it. Once we find a ref that is set we unset walk_control->restarted since the tree should be in a normal state from then on, and any problems we run into from there are different issues. I tested this with an existing broken fs and my reproducer that creates a broken fs and it fixed both file systems. Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent-tree.c48
-rw-r--r--fs/btrfs/root-tree.c8
3 files changed, 55 insertions, 3 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 85140913c0f5..0f4838e00fbc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1210,6 +1210,8 @@ enum {
1210 * Set for the subvolume tree owning the reloc tree. 1210 * Set for the subvolume tree owning the reloc tree.
1211 */ 1211 */
1212 BTRFS_ROOT_DEAD_RELOC_TREE, 1212 BTRFS_ROOT_DEAD_RELOC_TREE,
1213 /* Mark dead root stored on device whose cleanup needs to be resumed */
1214 BTRFS_ROOT_DEAD_TREE,
1213}; 1215};
1214 1216
1215/* 1217/*
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 994f0cc41799..36af54bec111 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8771,6 +8771,7 @@ struct walk_control {
8771 int keep_locks; 8771 int keep_locks;
8772 int reada_slot; 8772 int reada_slot;
8773 int reada_count; 8773 int reada_count;
8774 int restarted;
8774}; 8775};
8775 8776
8776#define DROP_REFERENCE 1 8777#define DROP_REFERENCE 1
@@ -8934,6 +8935,33 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
8934} 8935}
8935 8936
8936/* 8937/*
8938 * This is used to verify a ref exists for this root to deal with a bug where we
8939 * would have a drop_progress key that hadn't been updated properly.
8940 */
8941static int check_ref_exists(struct btrfs_trans_handle *trans,
8942 struct btrfs_root *root, u64 bytenr, u64 parent,
8943 int level)
8944{
8945 struct btrfs_path *path;
8946 struct btrfs_extent_inline_ref *iref;
8947 int ret;
8948
8949 path = btrfs_alloc_path();
8950 if (!path)
8951 return -ENOMEM;
8952
8953 ret = lookup_extent_backref(trans, path, &iref, bytenr,
8954 root->fs_info->nodesize, parent,
8955 root->root_key.objectid, level, 0);
8956 btrfs_free_path(path);
8957 if (ret == -ENOENT)
8958 return 0;
8959 if (ret < 0)
8960 return ret;
8961 return 1;
8962}
8963
8964/*
8937 * helper to process tree block pointer. 8965 * helper to process tree block pointer.
8938 * 8966 *
8939 * when wc->stage == DROP_REFERENCE, this function checks 8967 * when wc->stage == DROP_REFERENCE, this function checks
@@ -9088,6 +9116,23 @@ skip:
9088 } 9116 }
9089 9117
9090 /* 9118 /*
9119 * If we had a drop_progress we need to verify the refs are set
9120 * as expected. If we find our ref then we know that from here
9121 * on out everything should be correct, and we can clear the
9122 * ->restarted flag.
9123 */
9124 if (wc->restarted) {
9125 ret = check_ref_exists(trans, root, bytenr, parent,
9126 level - 1);
9127 if (ret < 0)
9128 goto out_unlock;
9129 if (ret == 0)
9130 goto no_delete;
9131 ret = 0;
9132 wc->restarted = 0;
9133 }
9134
9135 /*
9091 * Reloc tree doesn't contribute to qgroup numbers, and we have 9136 * Reloc tree doesn't contribute to qgroup numbers, and we have
9092 * already accounted them at merge time (replace_path), 9137 * already accounted them at merge time (replace_path),
9093 * thus we could skip expensive subtree trace here. 9138 * thus we could skip expensive subtree trace here.
@@ -9108,7 +9153,7 @@ skip:
9108 if (ret) 9153 if (ret)
9109 goto out_unlock; 9154 goto out_unlock;
9110 } 9155 }
9111 9156no_delete:
9112 *lookup_info = 1; 9157 *lookup_info = 1;
9113 ret = 1; 9158 ret = 1;
9114 9159
@@ -9425,6 +9470,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
9425 } 9470 }
9426 } 9471 }
9427 9472
9473 wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
9428 wc->level = level; 9474 wc->level = level;
9429 wc->shared_level = -1; 9475 wc->shared_level = -1;
9430 wc->stage = DROP_REFERENCE; 9476 wc->stage = DROP_REFERENCE;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 0d2b957ca3a3..893d12fbfda0 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -263,8 +263,10 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
263 if (root) { 263 if (root) {
264 WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, 264 WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
265 &root->state)); 265 &root->state));
266 if (btrfs_root_refs(&root->root_item) == 0) 266 if (btrfs_root_refs(&root->root_item) == 0) {
267 set_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
267 btrfs_add_dead_root(root); 268 btrfs_add_dead_root(root);
269 }
268 continue; 270 continue;
269 } 271 }
270 272
@@ -310,8 +312,10 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
310 break; 312 break;
311 } 313 }
312 314
313 if (btrfs_root_refs(&root->root_item) == 0) 315 if (btrfs_root_refs(&root->root_item) == 0) {
316 set_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
314 btrfs_add_dead_root(root); 317 btrfs_add_dead_root(root);
318 }
315 } 319 }
316 320
317 btrfs_free_path(path); 321 btrfs_free_path(path);