summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQu Wenruo <quwenruo@cn.fujitsu.com>2016-08-14 22:36:51 -0400
committerChris Mason <clm@fb.com>2016-08-25 06:58:22 -0400
commit62b99540a1d91e46422f0e04de50fc723812c421 (patch)
tree0233ea0a121889a80dd0b6769af22cfdc3c7458a
parentcb93b52cc005ba0e470845b519c662e661d5113c (diff)
btrfs: relocation: Fix leaking qgroups numbers on data extents
This patch fixes a REGRESSION introduced in 4.2, caused by the big quota rework. When balancing data extents, qgroup will leak all its numbers for relocated data extents. The relocation is done in the following steps for data extents: 1) Create data reloc tree and inode 2) Copy all data extents to data reloc tree And commit transaction 3) Create tree reloc tree(special snapshot) for any related subvolumes 4) Replace file extent in tree reloc tree with new extents in data reloc tree And commit transaction 5) Merge tree reloc tree with original fs, by swapping tree blocks For 1)~4), since tree reloc tree and data reloc tree doesn't count to qgroup, everything is OK. But for 5), the swapping of tree blocks will only info qgroup to track metadata extents. If metadata extents contain file extents, qgroup number for file extents will get lost, leading to corrupted qgroup accounting. The fix is, before commit transaction of step 5), manually info qgroup to track all file extents in data reloc tree. Since at commit transaction time, the tree swapping is done, and qgroup will account these data extents correctly. Cc: Mark Fasheh <mfasheh@suse.de> Reported-by: Mark Fasheh <mfasheh@suse.de> Reported-by: Filipe Manana <fdmanana@gmail.com> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/relocation.c109
1 files changed, 103 insertions, 6 deletions
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b26a5aea41b4..27480ef9813c 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -31,6 +31,7 @@
31#include "async-thread.h" 31#include "async-thread.h"
32#include "free-space-cache.h" 32#include "free-space-cache.h"
33#include "inode-map.h" 33#include "inode-map.h"
34#include "qgroup.h"
34 35
35/* 36/*
36 * backref_node, mapping_node and tree_block start with this 37 * backref_node, mapping_node and tree_block start with this
@@ -3916,6 +3917,90 @@ int prepare_to_relocate(struct reloc_control *rc)
3916 return 0; 3917 return 0;
3917} 3918}
3918 3919
3920/*
3921 * Qgroup fixer for data chunk relocation.
3922 * The data relocation is done in the following steps
3923 * 1) Copy data extents into data reloc tree
3924 * 2) Create tree reloc tree(special snapshot) for related subvolumes
3925 * 3) Modify file extents in tree reloc tree
3926 * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks
3927 *
3928 * The problem is, data and tree reloc tree are not accounted to qgroup,
3929 * and 4) will only info qgroup to track tree blocks change, not file extents
3930 * in the tree blocks.
3931 *
3932 * The good news is, related data extents are all in data reloc tree, so we
3933 * only need to info qgroup to track all file extents in data reloc tree
3934 * before commit trans.
3935 */
3936static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans,
3937 struct reloc_control *rc)
3938{
3939 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3940 struct inode *inode = rc->data_inode;
3941 struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root;
3942 struct btrfs_path *path;
3943 struct btrfs_key key;
3944 int ret = 0;
3945
3946 if (!fs_info->quota_enabled)
3947 return 0;
3948
3949 /*
3950 * Only for stage where we update data pointers the qgroup fix is
3951 * valid.
3952 * For MOVING_DATA stage, we will miss the timing of swapping tree
3953 * blocks, and won't fix it.
3954 */
3955 if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found))
3956 return 0;
3957
3958 path = btrfs_alloc_path();
3959 if (!path)
3960 return -ENOMEM;
3961 key.objectid = btrfs_ino(inode);
3962 key.type = BTRFS_EXTENT_DATA_KEY;
3963 key.offset = 0;
3964
3965 ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0);
3966 if (ret < 0)
3967 goto out;
3968
3969 lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1);
3970 while (1) {
3971 struct btrfs_file_extent_item *fi;
3972
3973 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3974 if (key.objectid > btrfs_ino(inode))
3975 break;
3976 if (key.type != BTRFS_EXTENT_DATA_KEY)
3977 goto next;
3978 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
3979 struct btrfs_file_extent_item);
3980 if (btrfs_file_extent_type(path->nodes[0], fi) !=
3981 BTRFS_FILE_EXTENT_REG)
3982 goto next;
3983 ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info,
3984 btrfs_file_extent_disk_bytenr(path->nodes[0], fi),
3985 btrfs_file_extent_disk_num_bytes(path->nodes[0], fi),
3986 GFP_NOFS);
3987 if (ret < 0)
3988 break;
3989next:
3990 ret = btrfs_next_item(data_reloc_root, path);
3991 if (ret < 0)
3992 break;
3993 if (ret > 0) {
3994 ret = 0;
3995 break;
3996 }
3997 }
3998 unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1);
3999out:
4000 btrfs_free_path(path);
4001 return ret;
4002}
4003
3919static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 4004static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3920{ 4005{
3921 struct rb_root blocks = RB_ROOT; 4006 struct rb_root blocks = RB_ROOT;
@@ -4102,10 +4187,16 @@ restart:
4102 4187
4103 /* get rid of pinned extents */ 4188 /* get rid of pinned extents */
4104 trans = btrfs_join_transaction(rc->extent_root); 4189 trans = btrfs_join_transaction(rc->extent_root);
4105 if (IS_ERR(trans)) 4190 if (IS_ERR(trans)) {
4106 err = PTR_ERR(trans); 4191 err = PTR_ERR(trans);
4107 else 4192 goto out_free;
4108 btrfs_commit_transaction(trans, rc->extent_root); 4193 }
4194 err = qgroup_fix_relocated_data_extents(trans, rc);
4195 if (err < 0) {
4196 btrfs_abort_transaction(trans, err);
4197 goto out_free;
4198 }
4199 btrfs_commit_transaction(trans, rc->extent_root);
4109out_free: 4200out_free:
4110 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); 4201 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
4111 btrfs_free_path(path); 4202 btrfs_free_path(path);
@@ -4468,10 +4559,16 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4468 unset_reloc_control(rc); 4559 unset_reloc_control(rc);
4469 4560
4470 trans = btrfs_join_transaction(rc->extent_root); 4561 trans = btrfs_join_transaction(rc->extent_root);
4471 if (IS_ERR(trans)) 4562 if (IS_ERR(trans)) {
4472 err = PTR_ERR(trans); 4563 err = PTR_ERR(trans);
4473 else 4564 goto out_free;
4474 err = btrfs_commit_transaction(trans, rc->extent_root); 4565 }
4566 err = qgroup_fix_relocated_data_extents(trans, rc);
4567 if (err < 0) {
4568 btrfs_abort_transaction(trans, err);
4569 goto out_free;
4570 }
4571 err = btrfs_commit_transaction(trans, rc->extent_root);
4475out_free: 4572out_free:
4476 kfree(rc); 4573 kfree(rc);
4477out: 4574out: