aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2015-01-29 14:18:25 -0500
committerChris Mason <clm@fb.com>2015-02-02 22:24:48 -0500
commitd4b450cd4b33ce7c572e7fdccf33b59c4cdf361c (patch)
treeb72cf028d71d46a6ee9d4082a9c98b9cf17495e1 /fs
parente3540eab29e1b2260bc4b9b3979a49a00e3e3af8 (diff)
Btrfs: fix race between transaction commit and empty block group removal
Committing a transaction can race with automatic removal of empty block groups (cleaner kthread), leading to a BUG_ON() in the transaction commit code while running btrfs_finish_extent_commit(). The following sequence diagram shows how it can happen: CPU 1 CPU 2 btrfs_commit_transaction() fs_info->running_transaction = NULL btrfs_finish_extent_commit() find_first_extent_bit() -> found range for block group X in fs_info->freed_extents[] btrfs_delete_unused_bgs() -> found block group X Removed block group X's range from fs_info->freed_extents[] btrfs_remove_chunk() btrfs_remove_block_group(bg X) unpin_extent_range(bg X range) btrfs_lookup_block_group(bg X) -> returns NULL -> BUG_ON() The trace that results from the BUG_ON() is: [48665.187808] ------------[ cut here ]------------ [48665.188032] kernel BUG at fs/btrfs/extent-tree.c:5675! [48665.188032] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC [48665.188032] Modules linked in: dm_flakey dm_mod crc32c_generic btrfs xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop parport_pc evdev microcode [48665.197388] CPU: 2 PID: 31211 Comm: kworker/u32:16 Tainted: G W 3.19.0-rc5-btrfs-next-4+ #1 [48665.197388] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 [48665.197388] Workqueue: events_unbound btrfs_async_reclaim_metadata_space [btrfs] [48665.197388] task: ffff880222011810 ti: ffff8801b56a4000 task.ti: ffff8801b56a4000 [48665.197388] RIP: 0010:[<ffffffffa0350d05>] [<ffffffffa0350d05>] unpin_extent_range+0x6a/0x1ba [btrfs] [48665.197388] RSP: 0018:ffff8801b56a7b88 EFLAGS: 00010246 [48665.197388] RAX: 0000000000000000 RBX: ffff8802143a6000 RCX: ffff8802220120c8 [48665.197388] RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffff8800a3c140b0 [48665.197388] RBP: ffff8801b56a7bd8 R08: 0000000000000003 R09: 0000000000000000 [48665.197388] R10: 0000000000000000 R11: 000000000000bbac R12: 0000000012e8e000 [48665.197388] R13: ffff8800a3c14000 R14: 0000000000000000 R15: 0000000000000000 [48665.197388] FS: 0000000000000000(0000) GS:ffff88023ec40000(0000) knlGS:0000000000000000 [48665.197388] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [48665.197388] CR2: 00007f065e42f270 CR3: 0000000206f70000 CR4: 00000000000006e0 [48665.197388] Stack: [48665.197388] ffff8801b56a7bd8 0000000012ea0000 01ff8800a3c14138 0000000012e9ffff [48665.197388] ffff880141df3dd8 ffff8802143a6000 ffff8800a3c14138 ffff880141df3df0 [48665.197388] ffff880141df3dd8 0000000000000000 ffff8801b56a7c08 ffffffffa0354227 [48665.197388] Call Trace: [48665.197388] [<ffffffffa0354227>] btrfs_finish_extent_commit+0xb0/0xd9 [btrfs] [48665.197388] [<ffffffffa0366b4b>] btrfs_commit_transaction+0x791/0x92c [btrfs] [48665.197388] [<ffffffffa0352432>] flush_space+0x43d/0x452 [btrfs] [48665.197388] [<ffffffff814295c3>] ? _raw_spin_unlock+0x28/0x33 [48665.197388] [<ffffffffa035255f>] btrfs_async_reclaim_metadata_space+0x118/0x164 [btrfs] [48665.197388] [<ffffffff81059917>] ? process_one_work+0x14b/0x3ab [48665.197388] [<ffffffff810599ac>] process_one_work+0x1e0/0x3ab [48665.197388] [<ffffffff81079fa9>] ? trace_hardirqs_off+0xd/0xf [48665.197388] [<ffffffff8105a55b>] worker_thread+0x210/0x2d0 [48665.197388] [<ffffffff8105a34b>] ? rescuer_thread+0x2c3/0x2c3 [48665.197388] [<ffffffff8105e5c0>] kthread+0xef/0xf7 [48665.197388] [<ffffffff81429682>] ? _raw_spin_unlock_irq+0x2d/0x39 [48665.197388] [<ffffffff8105e4d1>] ? __kthread_parkme+0xad/0xad [48665.197388] [<ffffffff81429dec>] ret_from_fork+0x7c/0xb0 [48665.197388] [<ffffffff8105e4d1>] ? __kthread_parkme+0xad/0xad [48665.197388] Code: 85 f6 74 14 49 8b 06 49 03 46 09 49 39 c4 72 1d 4c 89 f7 e8 83 ec ff ff 4c 89 e6 4c 89 ef e8 1e f1 ff ff 48 85 c0 49 89 c6 75 02 <0f> 0b 49 8b 1e 49 03 5e 09 48 8b [48665.197388] RIP [<ffffffffa0350d05>] unpin_extent_range+0x6a/0x1ba [btrfs] [48665.197388] RSP <ffff8801b56a7b88> [48665.272246] ---[ end trace b9c6ab9957521376 ]--- Fix this by ensuring that unpining the block group's range in btrfs_finish_extent_commit() is done in a synchronized fashion with removing the block group's range from freed_extents[] in btrfs_delete_unused_bgs() This race got introduced with the change: Btrfs: remove empty block groups automatically commit 47ab2a6c689913db23ccae38349714edf8365e0a Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/btrfs/extent-tree.c21
3 files changed, 22 insertions, 1 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 54a66fa6beb1..d3562dd43c66 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1744,6 +1744,7 @@ struct btrfs_fs_info {
1744 1744
1745 spinlock_t unused_bgs_lock; 1745 spinlock_t unused_bgs_lock;
1746 struct list_head unused_bgs; 1746 struct list_head unused_bgs;
1747 struct mutex unused_bg_unpin_mutex;
1747 1748
1748 /* For btrfs to record security options */ 1749 /* For btrfs to record security options */
1749 struct security_mnt_opts security_opts; 1750 struct security_mnt_opts security_opts;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 263d1471d01a..41b320e235d7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2242,6 +2242,7 @@ int open_ctree(struct super_block *sb,
2242 spin_lock_init(&fs_info->qgroup_op_lock); 2242 spin_lock_init(&fs_info->qgroup_op_lock);
2243 spin_lock_init(&fs_info->buffer_lock); 2243 spin_lock_init(&fs_info->buffer_lock);
2244 spin_lock_init(&fs_info->unused_bgs_lock); 2244 spin_lock_init(&fs_info->unused_bgs_lock);
2245 mutex_init(&fs_info->unused_bg_unpin_mutex);
2245 rwlock_init(&fs_info->tree_mod_log_lock); 2246 rwlock_init(&fs_info->tree_mod_log_lock);
2246 mutex_init(&fs_info->reloc_mutex); 2247 mutex_init(&fs_info->reloc_mutex);
2247 mutex_init(&fs_info->delalloc_root_mutex); 2248 mutex_init(&fs_info->delalloc_root_mutex);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 53294da0749d..857a859948a3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5735,10 +5735,13 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
5735 unpin = &fs_info->freed_extents[0]; 5735 unpin = &fs_info->freed_extents[0];
5736 5736
5737 while (1) { 5737 while (1) {
5738 mutex_lock(&fs_info->unused_bg_unpin_mutex);
5738 ret = find_first_extent_bit(unpin, 0, &start, &end, 5739 ret = find_first_extent_bit(unpin, 0, &start, &end,
5739 EXTENT_DIRTY, NULL); 5740 EXTENT_DIRTY, NULL);
5740 if (ret) 5741 if (ret) {
5742 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
5741 break; 5743 break;
5744 }
5742 5745
5743 if (btrfs_test_opt(root, DISCARD)) 5746 if (btrfs_test_opt(root, DISCARD))
5744 ret = btrfs_discard_extent(root, start, 5747 ret = btrfs_discard_extent(root, start,
@@ -5746,6 +5749,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
5746 5749
5747 clear_extent_dirty(unpin, start, end, GFP_NOFS); 5750 clear_extent_dirty(unpin, start, end, GFP_NOFS);
5748 unpin_extent_range(root, start, end, true); 5751 unpin_extent_range(root, start, end, true);
5752 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
5749 cond_resched(); 5753 cond_resched();
5750 } 5754 }
5751 5755
@@ -9561,18 +9565,33 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9561 */ 9565 */
9562 start = block_group->key.objectid; 9566 start = block_group->key.objectid;
9563 end = start + block_group->key.offset - 1; 9567 end = start + block_group->key.offset - 1;
9568 /*
9569 * Hold the unused_bg_unpin_mutex lock to avoid racing with
9570 * btrfs_finish_extent_commit(). If we are at transaction N,
9571 * another task might be running finish_extent_commit() for the
9572 * previous transaction N - 1, and have seen a range belonging
9573 * to the block group in freed_extents[] before we were able to
9574 * clear the whole block group range from freed_extents[]. This
9575 * means that task can lookup for the block group after we
9576 * unpinned it from freed_extents[] and removed it, leading to
9577 * a BUG_ON() at btrfs_unpin_extent_range().
9578 */
9579 mutex_lock(&fs_info->unused_bg_unpin_mutex);
9564 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end, 9580 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
9565 EXTENT_DIRTY, GFP_NOFS); 9581 EXTENT_DIRTY, GFP_NOFS);
9566 if (ret) { 9582 if (ret) {
9583 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
9567 btrfs_set_block_group_rw(root, block_group); 9584 btrfs_set_block_group_rw(root, block_group);
9568 goto end_trans; 9585 goto end_trans;
9569 } 9586 }
9570 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end, 9587 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
9571 EXTENT_DIRTY, GFP_NOFS); 9588 EXTENT_DIRTY, GFP_NOFS);
9572 if (ret) { 9589 if (ret) {
9590 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
9573 btrfs_set_block_group_rw(root, block_group); 9591 btrfs_set_block_group_rw(root, block_group);
9574 goto end_trans; 9592 goto end_trans;
9575 } 9593 }
9594 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
9576 9595
9577 /* Reset pinned so btrfs_put_block_group doesn't complain */ 9596 /* Reset pinned so btrfs_put_block_group doesn't complain */
9578 block_group->pinned = 0; 9597 block_group->pinned = 0;