aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2015-04-25 13:29:16 -0400
committerChris Mason <clm@fb.com>2015-04-26 09:26:37 -0400
commitb58d1a9ef92031a6fc2f418c01abafb4458ba009 (patch)
treeedf1cf7d38effb41856f836cec5250192b68c5e7 /fs
parenta3bdccc4e683f0ac69230707ed3fa20e7cf73a79 (diff)
Btrfs: fix race between start dirty bg cache writeout and bg deletion
While running xfstests I ran into the following: [20892.242791] ------------[ cut here ]------------ [20892.243776] WARNING: CPU: 0 PID: 13299 at fs/btrfs/super.c:260 __btrfs_abort_transaction+0x52/0x114 [btrfs]() [20892.245874] BTRFS: Transaction aborted (error -2) [20892.247329] Modules linked in: btrfs dm_snapshot dm_bufio dm_flakey dm_mod crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse$ [20892.258488] CPU: 0 PID: 13299 Comm: fsstress Tainted: G W 4.0.0-rc5-btrfs-next-9+ #2 [20892.262011] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 [20892.264738] 0000000000000009 ffff880427f8bc18 ffffffff8142fa46 ffffffff8108b6a2 [20892.266244] ffff880427f8bc68 ffff880427f8bc58 ffffffff81045ea5 ffff880427f8bc48 [20892.267761] ffffffffa0509a6d 00000000fffffffe ffff8803545d6f40 ffffffffa05a15a0 [20892.269378] Call Trace: [20892.269915] [<ffffffff8142fa46>] dump_stack+0x4f/0x7b [20892.271097] [<ffffffff8108b6a2>] ? console_unlock+0x361/0x3ad [20892.272173] [<ffffffff81045ea5>] warn_slowpath_common+0xa1/0xbb [20892.273386] [<ffffffffa0509a6d>] ? __btrfs_abort_transaction+0x52/0x114 [btrfs] [20892.274857] [<ffffffff81045f05>] warn_slowpath_fmt+0x46/0x48 [20892.275851] [<ffffffffa0509a6d>] __btrfs_abort_transaction+0x52/0x114 [btrfs] [20892.277341] [<ffffffffa0515e10>] write_one_cache_group+0x68/0xaf [btrfs] [20892.278628] [<ffffffffa052088a>] btrfs_start_dirty_block_groups+0x18d/0x29b [btrfs] [20892.280191] [<ffffffffa052f077>] btrfs_commit_transaction+0x130/0x9c9 [btrfs] [20892.281781] [<ffffffff8107d33d>] ? trace_hardirqs_on+0xd/0xf [20892.282873] [<ffffffffa054163b>] btrfs_sync_file+0x313/0x387 [btrfs] [20892.284111] [<ffffffff8117acad>] vfs_fsync_range+0x95/0xa4 [20892.285203] [<ffffffff810e603f>] ? time_hardirqs_on+0x15/0x28 [20892.286290] [<ffffffff8123960b>] ? trace_hardirqs_on_thunk+0x3a/0x3f [20892.287469] [<ffffffff8117acd8>] vfs_fsync+0x1c/0x1e [20892.288412] [<ffffffff8117ae54>] do_fsync+0x34/0x4e [20892.289348] [<ffffffff8117b07c>] SyS_fsync+0x10/0x14 [20892.290255] [<ffffffff81435b32>] system_call_fastpath+0x12/0x17 [20892.291316] ---[ end trace 597f77e664245373 ]--- [20892.293955] BTRFS: error (device sdg) in write_one_cache_group:3184: errno=-2 No such entry [20892.297390] BTRFS info (device sdg): forced readonly This happens because in btrfs_start_dirty_block_groups() we splice the transaction's list of dirty block groups into a local list and then we keep extracting the first element of the list without holding the cache_write_mutex mutex. This means that before we acquire that mutex the first block group on the list might be removed by a conurrent task running btrfs_remove_block_group(). So make sure we extract the first element (and test the list emptyness) while holding that mutex. Fixes: 1bbc621ef284 ("Btrfs: allow block group cache writeout outside critical section in commit") Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent-tree.c44
1 files changed, 27 insertions, 17 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1eef4ee01d1a..fc0db9887c0e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3408,17 +3408,14 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3408 int loops = 0; 3408 int loops = 0;
3409 3409
3410 spin_lock(&cur_trans->dirty_bgs_lock); 3410 spin_lock(&cur_trans->dirty_bgs_lock);
3411 if (!list_empty(&cur_trans->dirty_bgs)) { 3411 if (list_empty(&cur_trans->dirty_bgs)) {
3412 list_splice_init(&cur_trans->dirty_bgs, &dirty); 3412 spin_unlock(&cur_trans->dirty_bgs_lock);
3413 return 0;
3413 } 3414 }
3415 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3414 spin_unlock(&cur_trans->dirty_bgs_lock); 3416 spin_unlock(&cur_trans->dirty_bgs_lock);
3415 3417
3416again: 3418again:
3417 if (list_empty(&dirty)) {
3418 btrfs_free_path(path);
3419 return 0;
3420 }
3421
3422 /* 3419 /*
3423 * make sure all the block groups on our dirty list actually 3420 * make sure all the block groups on our dirty list actually
3424 * exist 3421 * exist
@@ -3431,18 +3428,16 @@ again:
3431 return -ENOMEM; 3428 return -ENOMEM;
3432 } 3429 }
3433 3430
3431 /*
3432 * cache_write_mutex is here only to save us from balance or automatic
3433 * removal of empty block groups deleting this block group while we are
3434 * writing out the cache
3435 */
3436 mutex_lock(&trans->transaction->cache_write_mutex);
3434 while (!list_empty(&dirty)) { 3437 while (!list_empty(&dirty)) {
3435 cache = list_first_entry(&dirty, 3438 cache = list_first_entry(&dirty,
3436 struct btrfs_block_group_cache, 3439 struct btrfs_block_group_cache,
3437 dirty_list); 3440 dirty_list);
3438
3439 /*
3440 * cache_write_mutex is here only to save us from balance
3441 * deleting this block group while we are writing out the
3442 * cache
3443 */
3444 mutex_lock(&trans->transaction->cache_write_mutex);
3445
3446 /* 3441 /*
3447 * this can happen if something re-dirties a block 3442 * this can happen if something re-dirties a block
3448 * group that is already under IO. Just wait for it to 3443 * group that is already under IO. Just wait for it to
@@ -3495,7 +3490,6 @@ again:
3495 } 3490 }
3496 if (!ret) 3491 if (!ret)
3497 ret = write_one_cache_group(trans, root, path, cache); 3492 ret = write_one_cache_group(trans, root, path, cache);
3498 mutex_unlock(&trans->transaction->cache_write_mutex);
3499 3493
3500 /* if its not on the io list, we need to put the block group */ 3494 /* if its not on the io list, we need to put the block group */
3501 if (should_put) 3495 if (should_put)
@@ -3503,7 +3497,16 @@ again:
3503 3497
3504 if (ret) 3498 if (ret)
3505 break; 3499 break;
3500
3501 /*
3502 * Avoid blocking other tasks for too long. It might even save
3503 * us from writing caches for block groups that are going to be
3504 * removed.
3505 */
3506 mutex_unlock(&trans->transaction->cache_write_mutex);
3507 mutex_lock(&trans->transaction->cache_write_mutex);
3506 } 3508 }
3509 mutex_unlock(&trans->transaction->cache_write_mutex);
3507 3510
3508 /* 3511 /*
3509 * go through delayed refs for all the stuff we've just kicked off 3512 * go through delayed refs for all the stuff we've just kicked off
@@ -3514,8 +3517,15 @@ again:
3514 loops++; 3517 loops++;
3515 spin_lock(&cur_trans->dirty_bgs_lock); 3518 spin_lock(&cur_trans->dirty_bgs_lock);
3516 list_splice_init(&cur_trans->dirty_bgs, &dirty); 3519 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3520 /*
3521 * dirty_bgs_lock protects us from concurrent block group
3522 * deletes too (not just cache_write_mutex).
3523 */
3524 if (!list_empty(&dirty)) {
3525 spin_unlock(&cur_trans->dirty_bgs_lock);
3526 goto again;
3527 }
3517 spin_unlock(&cur_trans->dirty_bgs_lock); 3528 spin_unlock(&cur_trans->dirty_bgs_lock);
3518 goto again;
3519 } 3529 }
3520 3530
3521 btrfs_free_path(path); 3531 btrfs_free_path(path);