aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQu Wenruo <quwenruo@cn.fujitsu.com>2016-10-17 21:31:29 -0400
committerDavid Sterba <dsterba@suse.com>2016-11-30 07:45:21 -0500
commit824d8dff8846533c9f1f9b1eabb0c03959e989ca (patch)
treea3d9d7e292fce8c02d945d5e774e0f754a255114
parent33d1f05ccb698aa92db3e64a639ce523cf18a408 (diff)
btrfs: qgroup: Fix qgroup data leaking by using subtree tracing
Commit 62b99540a1d91e464 (btrfs: relocation: Fix leaking qgroups numbers on data extents) only fixes the problem partly. The previous fix is to trace all new data extents at transaction commit time when balance finishes. However balance is not done in a large transaction, every path replacement can happen in its own transaction. This makes the fix useless if transaction commits during relocation. For example: relocate_block_group() |-merge_reloc_roots() | |- merge_reloc_root() | |- btrfs_start_transaction() <- Trans X | |- replace_path() <- Cause leak | |- btrfs_end_transaction_throttle() <- Trans X commits here | | Leak not fixed | | | |- btrfs_start_transaction() <- Trans Y | |- replace_path() <- Cause leak | |- btrfs_end_transaction_throttle() <- Trans Y ends | but not committed |-btrfs_join_transaction() <- Still trans Y |-qgroup_fix() <- Only fixes data leak | in trans Y |-btrfs_commit_transaction() <- Trans Y commits In that case, qgroup fixup can only fix data leak in trans Y, data leak in trans X is out of fix. So the correct fix should happen in the same transaction of replace_path(). This patch fixes it by tracing both subtrees of tree block swap, so it can fix the problem and ensure all leaking and fix are in the same transaction, so no leak again. Reported-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Reviewed-and-Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/relocation.c119
1 files changed, 23 insertions, 96 deletions
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index c430f2f5be24..3dc7232aa038 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1901,6 +1901,29 @@ again:
1901 BUG_ON(ret); 1901 BUG_ON(ret);
1902 1902
1903 /* 1903 /*
1904 * Info qgroup to trace both subtrees.
1905 *
1906 * We must trace both trees.
1907 * 1) Tree reloc subtree
1908 * If not traced, we will leak data numbers
1909 * 2) Fs subtree
1910 * If not traced, we will double count old data
1911 * and tree block numbers, if current trans doesn't free
1912 * data reloc tree inode.
1913 */
1914 ret = btrfs_qgroup_trace_subtree(trans, src, parent,
1915 btrfs_header_generation(parent),
1916 btrfs_header_level(parent));
1917 if (ret < 0)
1918 break;
1919 ret = btrfs_qgroup_trace_subtree(trans, dest,
1920 path->nodes[level],
1921 btrfs_header_generation(path->nodes[level]),
1922 btrfs_header_level(path->nodes[level]));
1923 if (ret < 0)
1924 break;
1925
1926 /*
1904 * swap blocks in fs tree and reloc tree. 1927 * swap blocks in fs tree and reloc tree.
1905 */ 1928 */
1906 btrfs_set_node_blockptr(parent, slot, new_bytenr); 1929 btrfs_set_node_blockptr(parent, slot, new_bytenr);
@@ -3949,90 +3972,6 @@ int prepare_to_relocate(struct reloc_control *rc)
3949 return 0; 3972 return 0;
3950} 3973}
3951 3974
3952/*
3953 * Qgroup fixer for data chunk relocation.
3954 * The data relocation is done in the following steps
3955 * 1) Copy data extents into data reloc tree
3956 * 2) Create tree reloc tree(special snapshot) for related subvolumes
3957 * 3) Modify file extents in tree reloc tree
3958 * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks
3959 *
3960 * The problem is, data and tree reloc tree are not accounted to qgroup,
3961 * and 4) will only info qgroup to track tree blocks change, not file extents
3962 * in the tree blocks.
3963 *
3964 * The good news is, related data extents are all in data reloc tree, so we
3965 * only need to info qgroup to track all file extents in data reloc tree
3966 * before commit trans.
3967 */
3968static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans,
3969 struct reloc_control *rc)
3970{
3971 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3972 struct inode *inode = rc->data_inode;
3973 struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root;
3974 struct btrfs_path *path;
3975 struct btrfs_key key;
3976 int ret = 0;
3977
3978 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
3979 return 0;
3980
3981 /*
3982 * Only for stage where we update data pointers the qgroup fix is
3983 * valid.
3984 * For MOVING_DATA stage, we will miss the timing of swapping tree
3985 * blocks, and won't fix it.
3986 */
3987 if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found))
3988 return 0;
3989
3990 path = btrfs_alloc_path();
3991 if (!path)
3992 return -ENOMEM;
3993 key.objectid = btrfs_ino(inode);
3994 key.type = BTRFS_EXTENT_DATA_KEY;
3995 key.offset = 0;
3996
3997 ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0);
3998 if (ret < 0)
3999 goto out;
4000
4001 lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1);
4002 while (1) {
4003 struct btrfs_file_extent_item *fi;
4004
4005 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4006 if (key.objectid > btrfs_ino(inode))
4007 break;
4008 if (key.type != BTRFS_EXTENT_DATA_KEY)
4009 goto next;
4010 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
4011 struct btrfs_file_extent_item);
4012 if (btrfs_file_extent_type(path->nodes[0], fi) !=
4013 BTRFS_FILE_EXTENT_REG)
4014 goto next;
4015 ret = btrfs_qgroup_trace_extent(trans, fs_info,
4016 btrfs_file_extent_disk_bytenr(path->nodes[0], fi),
4017 btrfs_file_extent_disk_num_bytes(path->nodes[0], fi),
4018 GFP_NOFS);
4019 if (ret < 0)
4020 break;
4021next:
4022 ret = btrfs_next_item(data_reloc_root, path);
4023 if (ret < 0)
4024 break;
4025 if (ret > 0) {
4026 ret = 0;
4027 break;
4028 }
4029 }
4030 unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1);
4031out:
4032 btrfs_free_path(path);
4033 return ret;
4034}
4035
4036static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 3975static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
4037{ 3976{
4038 struct rb_root blocks = RB_ROOT; 3977 struct rb_root blocks = RB_ROOT;
@@ -4223,13 +4162,6 @@ restart:
4223 err = PTR_ERR(trans); 4162 err = PTR_ERR(trans);
4224 goto out_free; 4163 goto out_free;
4225 } 4164 }
4226 ret = qgroup_fix_relocated_data_extents(trans, rc);
4227 if (ret < 0) {
4228 btrfs_abort_transaction(trans, ret);
4229 if (!err)
4230 err = ret;
4231 goto out_free;
4232 }
4233 btrfs_commit_transaction(trans, rc->extent_root); 4165 btrfs_commit_transaction(trans, rc->extent_root);
4234out_free: 4166out_free:
4235 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); 4167 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
@@ -4635,11 +4567,6 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4635 err = PTR_ERR(trans); 4567 err = PTR_ERR(trans);
4636 goto out_free; 4568 goto out_free;
4637 } 4569 }
4638 err = qgroup_fix_relocated_data_extents(trans, rc);
4639 if (err < 0) {
4640 btrfs_abort_transaction(trans, err);
4641 goto out_free;
4642 }
4643 err = btrfs_commit_transaction(trans, rc->extent_root); 4570 err = btrfs_commit_transaction(trans, rc->extent_root);
4644out_free: 4571out_free:
4645 kfree(rc); 4572 kfree(rc);