aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2015-06-10 19:58:53 -0400
committerChris Mason <clm@fb.com>2015-06-30 17:36:46 -0400
commit67c5e7d464bc466471b05e027abe8a6b29687ebd (patch)
tree20f6112838a0957f6c05e5100aabb581606ea20d /fs
parente82afc52abff07a4acbc90f899598ebafb662831 (diff)
Btrfs: fix race between balance and unused block group deletion
We have a race between deleting an unused block group and balancing the same block group that leads to an assertion failure/BUG(), producing the following trace: [181631.208236] BTRFS: assertion failed: 0, file: fs/btrfs/volumes.c, line: 2622 [181631.220591] ------------[ cut here ]------------ [181631.222959] kernel BUG at fs/btrfs/ctree.h:4062! [181631.223932] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [181631.224566] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse acpi_cpufreq parpor$ [181631.224566] CPU: 8 PID: 17451 Comm: btrfs Tainted: G W 4.1.0-rc5-btrfs-next-10+ #1 [181631.224566] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [181631.224566] task: ffff880127e09590 ti: ffff8800b5824000 task.ti: ffff8800b5824000 [181631.224566] RIP: 0010:[<ffffffffa03f19f6>] [<ffffffffa03f19f6>] assfail.constprop.50+0x1e/0x20 [btrfs] [181631.224566] RSP: 0018:ffff8800b5827ae8 EFLAGS: 00010246 [181631.224566] RAX: 0000000000000040 RBX: ffff8800109fc218 RCX: ffffffff81095dce [181631.224566] RDX: 0000000000005124 RSI: ffffffff81464819 RDI: 00000000ffffffff [181631.224566] RBP: ffff8800b5827ae8 R08: 0000000000000001 R09: 0000000000000000 [181631.224566] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800109fc200 [181631.224566] R13: ffff880020095000 R14: ffff8800b1a13f38 R15: ffff880020095000 [181631.224566] FS: 00007f70ca0b0c80(0000) GS:ffff88013ec00000(0000) knlGS:0000000000000000 [181631.224566] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [181631.224566] CR2: 00007f2872ab6e68 CR3: 00000000a717c000 CR4: 00000000000006e0 [181631.224566] Stack: [181631.224566] ffff8800b5827ba8 ffffffffa03f3916 ffff8800b5827b38 ffffffffa03d080e [181631.224566] ffffffffa03d1423 ffff880020095000 ffff88001233c000 0000000000000001 [181631.224566] ffff880020095000 ffff8800b1a13f38 0000000a69c00000 0000000000000000 [181631.224566] Call Trace: [181631.224566] [<ffffffffa03f3916>] btrfs_remove_chunk+0xa4/0x6bb [btrfs] [181631.224566] [<ffffffffa03d080e>] ? join_transaction.isra.8+0xb9/0x3ba [btrfs] [181631.224566] [<ffffffffa03d1423>] ? wait_current_trans.isra.13+0x22/0xfc [btrfs] [181631.224566] [<ffffffffa03f3fbc>] btrfs_relocate_chunk.isra.29+0x8f/0xa7 [btrfs] [181631.224566] [<ffffffffa03f54df>] btrfs_balance+0xaa4/0xc52 [btrfs] [181631.224566] [<ffffffffa03fd388>] btrfs_ioctl_balance+0x23f/0x2b0 [btrfs] [181631.224566] [<ffffffff810872f9>] ? trace_hardirqs_on+0xd/0xf [181631.224566] [<ffffffffa04019a3>] btrfs_ioctl+0xfe2/0x2220 [btrfs] [181631.224566] [<ffffffff812603ed>] ? __this_cpu_preempt_check+0x13/0x15 [181631.224566] [<ffffffff81084669>] ? arch_local_irq_save+0x9/0xc [181631.224566] [<ffffffff81138def>] ? handle_mm_fault+0x834/0xcd2 [181631.224566] [<ffffffff81138def>] ? handle_mm_fault+0x834/0xcd2 [181631.224566] [<ffffffff8103e48c>] ? __do_page_fault+0x211/0x424 [181631.224566] [<ffffffff811755e6>] do_vfs_ioctl+0x3c6/0x479 (...) The sequence of steps leading to this are: CPU 0 CPU 1 btrfs_balance() btrfs_relocate_chunk() btrfs_relocate_block_group(bg X) btrfs_lookup_block_group(bg X) cleaner_kthread locks fs_info->cleaner_mutex btrfs_delete_unused_bgs() finds bg X, which became unused in the previous transaction checks bg X ->ro == 0, so it proceeds sets bg X ->ro to 1 (btrfs_set_block_group_ro(bg X)) blocks on fs_info->cleaner_mutex btrfs_remove_chunk(bg X) unlocks fs_info->cleaner_mutex acquires fs_info->cleaner_mutex relocate_block_group() --> does nothing, no extents found in the extent tree from bg X unlocks fs_info->cleaner_mutex btrfs_relocate_block_group(bg X) returns btrfs_remove_chunk(bg X) extent map not found --> ASSERT(0) Fix this by using a new mutex to make sure these 2 operations, block group relocation and removal, are serialized. This issue is reproducible by running fstests generic/038 (which stresses chunk allocation and automatic removal of unused block groups) together with the following balance loop: while true; do btrfs balance start -dusage=0 <mountpoint> ; done Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/volumes.c48
4 files changed, 58 insertions, 6 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80a9aefb0c46..aac314e14188 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1778,6 +1778,7 @@ struct btrfs_fs_info {
1778 spinlock_t unused_bgs_lock; 1778 spinlock_t unused_bgs_lock;
1779 struct list_head unused_bgs; 1779 struct list_head unused_bgs;
1780 struct mutex unused_bg_unpin_mutex; 1780 struct mutex unused_bg_unpin_mutex;
1781 struct mutex delete_unused_bgs_mutex;
1781 1782
1782 /* For btrfs to record security options */ 1783 /* For btrfs to record security options */
1783 struct security_mnt_opts security_opts; 1784 struct security_mnt_opts security_opts;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b977fc8d8201..b59deb2c63f4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1772,7 +1772,6 @@ static int cleaner_kthread(void *arg)
1772 } 1772 }
1773 1773
1774 btrfs_run_delayed_iputs(root); 1774 btrfs_run_delayed_iputs(root);
1775 btrfs_delete_unused_bgs(root->fs_info);
1776 again = btrfs_clean_one_deleted_snapshot(root); 1775 again = btrfs_clean_one_deleted_snapshot(root);
1777 mutex_unlock(&root->fs_info->cleaner_mutex); 1776 mutex_unlock(&root->fs_info->cleaner_mutex);
1778 1777
@@ -1781,6 +1780,16 @@ static int cleaner_kthread(void *arg)
1781 * needn't do anything special here. 1780 * needn't do anything special here.
1782 */ 1781 */
1783 btrfs_run_defrag_inodes(root->fs_info); 1782 btrfs_run_defrag_inodes(root->fs_info);
1783
1784 /*
1785 * Acquires fs_info->delete_unused_bgs_mutex to avoid racing
1786 * with relocation (btrfs_relocate_chunk) and relocation
1787 * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group)
1788 * after acquiring fs_info->delete_unused_bgs_mutex. So we
1789 * can't hold, nor need to, fs_info->cleaner_mutex when deleting
1790 * unused block groups.
1791 */
1792 btrfs_delete_unused_bgs(root->fs_info);
1784sleep: 1793sleep:
1785 if (!try_to_freeze() && !again) { 1794 if (!try_to_freeze() && !again) {
1786 set_current_state(TASK_INTERRUPTIBLE); 1795 set_current_state(TASK_INTERRUPTIBLE);
@@ -2492,6 +2501,7 @@ int open_ctree(struct super_block *sb,
2492 spin_lock_init(&fs_info->unused_bgs_lock); 2501 spin_lock_init(&fs_info->unused_bgs_lock);
2493 rwlock_init(&fs_info->tree_mod_log_lock); 2502 rwlock_init(&fs_info->tree_mod_log_lock);
2494 mutex_init(&fs_info->unused_bg_unpin_mutex); 2503 mutex_init(&fs_info->unused_bg_unpin_mutex);
2504 mutex_init(&fs_info->delete_unused_bgs_mutex);
2495 mutex_init(&fs_info->reloc_mutex); 2505 mutex_init(&fs_info->reloc_mutex);
2496 mutex_init(&fs_info->delalloc_root_mutex); 2506 mutex_init(&fs_info->delalloc_root_mutex);
2497 seqlock_init(&fs_info->profiles_lock); 2507 seqlock_init(&fs_info->profiles_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 38b76cc02f48..1c2bd1723e40 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9889,6 +9889,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9889 } 9889 }
9890 spin_unlock(&fs_info->unused_bgs_lock); 9890 spin_unlock(&fs_info->unused_bgs_lock);
9891 9891
9892 mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
9893
9892 /* Don't want to race with allocators so take the groups_sem */ 9894 /* Don't want to race with allocators so take the groups_sem */
9893 down_write(&space_info->groups_sem); 9895 down_write(&space_info->groups_sem);
9894 spin_lock(&block_group->lock); 9896 spin_lock(&block_group->lock);
@@ -9983,6 +9985,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9983end_trans: 9985end_trans:
9984 btrfs_end_transaction(trans, root); 9986 btrfs_end_transaction(trans, root);
9985next: 9987next:
9988 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
9986 btrfs_put_block_group(block_group); 9989 btrfs_put_block_group(block_group);
9987 spin_lock(&fs_info->unused_bgs_lock); 9990 spin_lock(&fs_info->unused_bgs_lock);
9988 } 9991 }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d4cd4059bded..9b95503ddd00 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2766,6 +2766,20 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
2766 root = root->fs_info->chunk_root; 2766 root = root->fs_info->chunk_root;
2767 extent_root = root->fs_info->extent_root; 2767 extent_root = root->fs_info->extent_root;
2768 2768
2769 /*
2770 * Prevent races with automatic removal of unused block groups.
2771 * After we relocate and before we remove the chunk with offset
2772 * chunk_offset, automatic removal of the block group can kick in,
2773 * resulting in a failure when calling btrfs_remove_chunk() below.
2774 *
2775 * Make sure to acquire this mutex before doing a tree search (dev
2776 * or chunk trees) to find chunks. Otherwise the cleaner kthread might
2777 * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after
2778 * we release the path used to search the chunk/dev tree and before
2779 * the current task acquires this mutex and calls us.
2780 */
2781 ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex));
2782
2769 ret = btrfs_can_relocate(extent_root, chunk_offset); 2783 ret = btrfs_can_relocate(extent_root, chunk_offset);
2770 if (ret) 2784 if (ret)
2771 return -ENOSPC; 2785 return -ENOSPC;
@@ -2814,13 +2828,18 @@ again:
2814 key.type = BTRFS_CHUNK_ITEM_KEY; 2828 key.type = BTRFS_CHUNK_ITEM_KEY;
2815 2829
2816 while (1) { 2830 while (1) {
2831 mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
2817 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); 2832 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
2818 if (ret < 0) 2833 if (ret < 0) {
2834 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
2819 goto error; 2835 goto error;
2836 }
2820 BUG_ON(ret == 0); /* Corruption */ 2837 BUG_ON(ret == 0); /* Corruption */
2821 2838
2822 ret = btrfs_previous_item(chunk_root, path, key.objectid, 2839 ret = btrfs_previous_item(chunk_root, path, key.objectid,
2823 key.type); 2840 key.type);
2841 if (ret)
2842 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
2824 if (ret < 0) 2843 if (ret < 0)
2825 goto error; 2844 goto error;
2826 if (ret > 0) 2845 if (ret > 0)
@@ -2843,6 +2862,7 @@ again:
2843 else 2862 else
2844 BUG_ON(ret); 2863 BUG_ON(ret);
2845 } 2864 }
2865 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
2846 2866
2847 if (found_key.offset == 0) 2867 if (found_key.offset == 0)
2848 break; 2868 break;
@@ -3299,9 +3319,12 @@ again:
3299 goto error; 3319 goto error;
3300 } 3320 }
3301 3321
3322 mutex_lock(&fs_info->delete_unused_bgs_mutex);
3302 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); 3323 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3303 if (ret < 0) 3324 if (ret < 0) {
3325 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3304 goto error; 3326 goto error;
3327 }
3305 3328
3306 /* 3329 /*
3307 * this shouldn't happen, it means the last relocate 3330 * this shouldn't happen, it means the last relocate
@@ -3313,6 +3336,7 @@ again:
3313 ret = btrfs_previous_item(chunk_root, path, 0, 3336 ret = btrfs_previous_item(chunk_root, path, 0,
3314 BTRFS_CHUNK_ITEM_KEY); 3337 BTRFS_CHUNK_ITEM_KEY);
3315 if (ret) { 3338 if (ret) {
3339 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3316 ret = 0; 3340 ret = 0;
3317 break; 3341 break;
3318 } 3342 }
@@ -3321,8 +3345,10 @@ again:
3321 slot = path->slots[0]; 3345 slot = path->slots[0];
3322 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3346 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3323 3347
3324 if (found_key.objectid != key.objectid) 3348 if (found_key.objectid != key.objectid) {
3349 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3325 break; 3350 break;
3351 }
3326 3352
3327 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); 3353 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3328 3354
@@ -3335,10 +3361,13 @@ again:
3335 ret = should_balance_chunk(chunk_root, leaf, chunk, 3361 ret = should_balance_chunk(chunk_root, leaf, chunk,
3336 found_key.offset); 3362 found_key.offset);
3337 btrfs_release_path(path); 3363 btrfs_release_path(path);
3338 if (!ret) 3364 if (!ret) {
3365 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3339 goto loop; 3366 goto loop;
3367 }
3340 3368
3341 if (counting) { 3369 if (counting) {
3370 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3342 spin_lock(&fs_info->balance_lock); 3371 spin_lock(&fs_info->balance_lock);
3343 bctl->stat.expected++; 3372 bctl->stat.expected++;
3344 spin_unlock(&fs_info->balance_lock); 3373 spin_unlock(&fs_info->balance_lock);
@@ -3348,6 +3377,7 @@ again:
3348 ret = btrfs_relocate_chunk(chunk_root, 3377 ret = btrfs_relocate_chunk(chunk_root,
3349 found_key.objectid, 3378 found_key.objectid,
3350 found_key.offset); 3379 found_key.offset);
3380 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3351 if (ret && ret != -ENOSPC) 3381 if (ret && ret != -ENOSPC)
3352 goto error; 3382 goto error;
3353 if (ret == -ENOSPC) { 3383 if (ret == -ENOSPC) {
@@ -4087,11 +4117,16 @@ again:
4087 key.type = BTRFS_DEV_EXTENT_KEY; 4117 key.type = BTRFS_DEV_EXTENT_KEY;
4088 4118
4089 do { 4119 do {
4120 mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
4090 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4121 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4091 if (ret < 0) 4122 if (ret < 0) {
4123 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
4092 goto done; 4124 goto done;
4125 }
4093 4126
4094 ret = btrfs_previous_item(root, path, 0, key.type); 4127 ret = btrfs_previous_item(root, path, 0, key.type);
4128 if (ret)
4129 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
4095 if (ret < 0) 4130 if (ret < 0)
4096 goto done; 4131 goto done;
4097 if (ret) { 4132 if (ret) {
@@ -4105,6 +4140,7 @@ again:
4105 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 4140 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
4106 4141
4107 if (key.objectid != device->devid) { 4142 if (key.objectid != device->devid) {
4143 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
4108 btrfs_release_path(path); 4144 btrfs_release_path(path);
4109 break; 4145 break;
4110 } 4146 }
@@ -4113,6 +4149,7 @@ again:
4113 length = btrfs_dev_extent_length(l, dev_extent); 4149 length = btrfs_dev_extent_length(l, dev_extent);
4114 4150
4115 if (key.offset + length <= new_size) { 4151 if (key.offset + length <= new_size) {
4152 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
4116 btrfs_release_path(path); 4153 btrfs_release_path(path);
4117 break; 4154 break;
4118 } 4155 }
@@ -4122,6 +4159,7 @@ again:
4122 btrfs_release_path(path); 4159 btrfs_release_path(path);
4123 4160
4124 ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset); 4161 ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
4162 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
4125 if (ret && ret != -ENOSPC) 4163 if (ret && ret != -ENOSPC)
4126 goto done; 4164 goto done;
4127 if (ret == -ENOSPC) 4165 if (ret == -ENOSPC)