aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2015-12-17 22:02:48 -0500
committerFilipe Manana <fdmanana@suse.com>2015-12-21 12:51:22 -0500
commite44081ef611832b47a86abf4e36dc0ed2e950884 (patch)
treed30d0dd196f66f9ebc56233312f41668cf875381
parent0376374a98abd533fb49c6db12967bddc2f4b4b3 (diff)
Btrfs: fix unprotected list operations at btrfs_write_dirty_block_groups
We call btrfs_write_dirty_block_groups() in the critical section of a transaction's commit, when no other tasks can join the transaction and add more block groups to the transaction's list of dirty block groups, so we not taking the dirty block groups spinlock when checking for the list's emptyness, grabbing its first element or deleting elements from it. However there's a special and rare case where we can have a concurrent task adding elements to this list. We trigger writeback for space caches before at btrfs_start_dirty_block_groups() and in past iterations of the loop at btrfs_write_dirty_block_groups(), this means that when the writeback finishes (which happens asynchronously) it creates a task for the endio free space work queue that executes btrfs_finish_ordered_io() - this function is able to join the transaction, through btrfs_join_transaction_nolock(), and update the free space cache's inode item in the root tree, which can result in COWing nodes of this tree and therefore allocation of a new block group can happen, which gets added to the transaction's list of dirty block groups while the transaction commit task is operating on it concurrently. So fix this by taking the dirty block groups spinlock before doing operations on the dirty block groups list at btrfs_write_dirty_block_groups(). Signed-off-by: Filipe Manana <fdmanana@suse.com>
-rw-r--r--fs/btrfs/extent-tree.c19
1 files changed, 17 insertions, 2 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c4661db2b72a..e6993f687953 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3684,11 +3684,21 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3684 return -ENOMEM; 3684 return -ENOMEM;
3685 3685
3686 /* 3686 /*
3687 * We don't need the lock here since we are protected by the transaction 3687 * Even though we are in the critical section of the transaction commit,
3688 * commit. We want to do the cache_save_setup first and then run the 3688 * we can still have concurrent tasks adding elements to this
3689 * transaction's list of dirty block groups. These tasks correspond to
3690 * endio free space workers started when writeback finishes for a
3691 * space cache, which run inode.c:btrfs_finish_ordered_io(), and can
3692 * allocate new block groups as a result of COWing nodes of the root
3693 * tree when updating the free space inode. The writeback for the space
3694 * caches is triggered by an earlier call to
3695 * btrfs_start_dirty_block_groups() and iterations of the following
3696 * loop.
3697 * Also we want to do the cache_save_setup first and then run the
3689 * delayed refs to make sure we have the best chance at doing this all 3698 * delayed refs to make sure we have the best chance at doing this all
3690 * in one shot. 3699 * in one shot.
3691 */ 3700 */
3701 spin_lock(&cur_trans->dirty_bgs_lock);
3692 while (!list_empty(&cur_trans->dirty_bgs)) { 3702 while (!list_empty(&cur_trans->dirty_bgs)) {
3693 cache = list_first_entry(&cur_trans->dirty_bgs, 3703 cache = list_first_entry(&cur_trans->dirty_bgs,
3694 struct btrfs_block_group_cache, 3704 struct btrfs_block_group_cache,
@@ -3700,11 +3710,13 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3700 * finish and then do it all again 3710 * finish and then do it all again
3701 */ 3711 */
3702 if (!list_empty(&cache->io_list)) { 3712 if (!list_empty(&cache->io_list)) {
3713 spin_unlock(&cur_trans->dirty_bgs_lock);
3703 list_del_init(&cache->io_list); 3714 list_del_init(&cache->io_list);
3704 btrfs_wait_cache_io(root, trans, cache, 3715 btrfs_wait_cache_io(root, trans, cache,
3705 &cache->io_ctl, path, 3716 &cache->io_ctl, path,
3706 cache->key.objectid); 3717 cache->key.objectid);
3707 btrfs_put_block_group(cache); 3718 btrfs_put_block_group(cache);
3719 spin_lock(&cur_trans->dirty_bgs_lock);
3708 } 3720 }
3709 3721
3710 /* 3722 /*
@@ -3712,6 +3724,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3712 * on any pending IO 3724 * on any pending IO
3713 */ 3725 */
3714 list_del_init(&cache->dirty_list); 3726 list_del_init(&cache->dirty_list);
3727 spin_unlock(&cur_trans->dirty_bgs_lock);
3715 should_put = 1; 3728 should_put = 1;
3716 3729
3717 cache_save_setup(cache, trans, path); 3730 cache_save_setup(cache, trans, path);
@@ -3743,7 +3756,9 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3743 /* if its not on the io list, we need to put the block group */ 3756 /* if its not on the io list, we need to put the block group */
3744 if (should_put) 3757 if (should_put)
3745 btrfs_put_block_group(cache); 3758 btrfs_put_block_group(cache);
3759 spin_lock(&cur_trans->dirty_bgs_lock);
3746 } 3760 }
3761 spin_unlock(&cur_trans->dirty_bgs_lock);
3747 3762
3748 while (!list_empty(io)) { 3763 while (!list_empty(io)) {
3749 cache = list_first_entry(io, struct btrfs_block_group_cache, 3764 cache = list_first_entry(io, struct btrfs_block_group_cache,