aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQu Wenruo <wqu@suse.com>2018-09-27 02:42:32 -0400
committerDavid Sterba <dsterba@suse.com>2018-10-15 11:23:36 -0400
commit5f527822be40104e9056c981ff06c7750153a10a (patch)
treeea6af553bc7641740a00177779020e271ecd69d4
parentea49f3e73c4b7252c1569906c1b2cd54605af3c9 (diff)
btrfs: qgroup: Use generation-aware subtree swap to mark dirty extents
Before this patch, with quota enabled during balance, we need to mark the whole subtree dirty for quota. E.g. OO = Old tree blocks (from file tree) NN = New tree blocks (from reloc tree) File tree (src) Reloc tree (dst) OO (a) NN (a) / \ / \ (b) OO OO (c) (b) NN NN (c) / \ / \ / \ / \ OO OO OO OO (d) OO OO OO NN (d) For old balance + quota case, quota will mark the whole src and dst tree dirty, including all the 3 old tree blocks in reloc tree. It's doable for small file tree or new tree blocks are all located at lower level. But for large file tree or new tree blocks are all located at higher level, this will lead to mark the whole tree dirty, and be unbelievably slow. This patch will change how we handle such balance with quota enabled case. Now we will search from (b) and (c) for any new tree blocks whose generation is equal to @last_snapshot, and only mark them dirty. In above case, we only need to trace tree blocks NN(b), NN(c) and NN(d). (NN(a) will be traced when COW happens for nodeptr modification). And also for tree blocks OO(b), OO(c), OO(d). (OO(a) will be traced when COW happens for nodeptr modification.) For above case, we could skip 3 tree blocks, but for larger tree, we can skip tons of unmodified tree blocks, and hugely speed up balance. This patch will introduce a new function, btrfs_qgroup_trace_subtree_swap(), which will do the following main work: 1) Read out real root eb And setup basic dst_path for later calls 2) Call qgroup_trace_new_subtree_blocks() To trace all new tree blocks in reloc tree and their counter parts in the file tree. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/qgroup.c104
-rw-r--r--fs/btrfs/qgroup.h5
-rw-r--r--fs/btrfs/relocation.c11
3 files changed, 112 insertions, 8 deletions
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 0b49575698da..6b35b3481085 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2009,6 +2009,110 @@ out:
2009 return ret; 2009 return ret;
2010} 2010}
2011 2011
2012/*
2013 * Inform qgroup to trace subtree swap used in balance.
2014 *
2015 * Unlike btrfs_qgroup_trace_subtree(), this function will only trace
2016 * new tree blocks whose generation is equal to (or larger than) @last_snapshot.
2017 *
2018 * Will go down the tree block pointed by @dst_eb (pointed by @dst_parent and
2019 * @dst_slot), and find any tree blocks whose generation is at @last_snapshot,
2020 * and then go down @src_eb (pointed by @src_parent and @src_slot) to find
2021 * the conterpart of the tree block, then mark both tree blocks as qgroup dirty,
2022 * and skip all tree blocks whose generation is smaller than last_snapshot.
2023 *
2024 * This would skip tons of tree blocks of original btrfs_qgroup_trace_subtree(),
2025 * which could be the cause of very slow balance if the file tree is large.
2026 *
2027 * @src_parent, @src_slot: pointer to src (file tree) eb.
2028 * @dst_parent, @dst_slot: pointer to dst (reloc tree) eb.
2029 */
2030int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
2031 struct extent_buffer *src_parent, int src_slot,
2032 struct extent_buffer *dst_parent, int dst_slot,
2033 u64 last_snapshot)
2034{
2035 struct btrfs_fs_info *fs_info = trans->fs_info;
2036 struct btrfs_path *dst_path = NULL;
2037 struct btrfs_key first_key;
2038 struct extent_buffer *src_eb = NULL;
2039 struct extent_buffer *dst_eb = NULL;
2040 u64 child_gen;
2041 u64 child_bytenr;
2042 int level;
2043 int ret;
2044
2045 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
2046 return 0;
2047
2048 /* Check parameter order */
2049 if (btrfs_node_ptr_generation(src_parent, src_slot) >
2050 btrfs_node_ptr_generation(dst_parent, dst_slot)) {
2051 btrfs_err_rl(fs_info,
2052 "%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__,
2053 btrfs_node_ptr_generation(src_parent, src_slot),
2054 btrfs_node_ptr_generation(dst_parent, dst_slot));
2055 return -EUCLEAN;
2056 }
2057
2058 /* Read out real @src_eb, pointed by @src_parent and @src_slot */
2059 child_bytenr = btrfs_node_blockptr(src_parent, src_slot);
2060 child_gen = btrfs_node_ptr_generation(src_parent, src_slot);
2061 btrfs_node_key_to_cpu(src_parent, &first_key, src_slot);
2062
2063 src_eb = read_tree_block(fs_info, child_bytenr, child_gen,
2064 btrfs_header_level(src_parent) - 1, &first_key);
2065 if (IS_ERR(src_eb)) {
2066 ret = PTR_ERR(src_eb);
2067 goto out;
2068 }
2069
2070 /* Read out real @dst_eb, pointed by @src_parent and @src_slot */
2071 child_bytenr = btrfs_node_blockptr(dst_parent, dst_slot);
2072 child_gen = btrfs_node_ptr_generation(dst_parent, dst_slot);
2073 btrfs_node_key_to_cpu(dst_parent, &first_key, dst_slot);
2074
2075 dst_eb = read_tree_block(fs_info, child_bytenr, child_gen,
2076 btrfs_header_level(dst_parent) - 1, &first_key);
2077 if (IS_ERR(dst_eb)) {
2078 ret = PTR_ERR(dst_eb);
2079 goto out;
2080 }
2081
2082 if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) {
2083 ret = -EINVAL;
2084 goto out;
2085 }
2086
2087 level = btrfs_header_level(dst_eb);
2088 dst_path = btrfs_alloc_path();
2089 if (!dst_path) {
2090 ret = -ENOMEM;
2091 goto out;
2092 }
2093
2094 /* For dst_path */
2095 extent_buffer_get(dst_eb);
2096 dst_path->nodes[level] = dst_eb;
2097 dst_path->slots[level] = 0;
2098 dst_path->locks[level] = 0;
2099
2100 /* Do the generation-aware breadth-first search */
2101 ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level,
2102 level, last_snapshot);
2103 if (ret < 0)
2104 goto out;
2105 ret = 0;
2106
2107out:
2108 free_extent_buffer(src_eb);
2109 free_extent_buffer(dst_eb);
2110 btrfs_free_path(dst_path);
2111 if (ret < 0)
2112 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2113 return ret;
2114}
2115
2012int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 2116int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
2013 struct extent_buffer *root_eb, 2117 struct extent_buffer *root_eb,
2014 u64 root_gen, int root_level) 2118 u64 root_gen, int root_level)
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 54b8bb282c0e..1aaf4c276900 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -236,6 +236,11 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
236int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 236int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
237 struct extent_buffer *root_eb, 237 struct extent_buffer *root_eb,
238 u64 root_gen, int root_level); 238 u64 root_gen, int root_level);
239
240int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
241 struct extent_buffer *src_parent, int src_slot,
242 struct extent_buffer *dst_parent, int dst_slot,
243 u64 last_snapshot);
239int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr, 244int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
240 u64 num_bytes, struct ulist *old_roots, 245 u64 num_bytes, struct ulist *old_roots,
241 struct ulist *new_roots); 246 struct ulist *new_roots);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index a5c5e9b3aceb..d10357122aa1 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1888,14 +1888,9 @@ again:
1888 * and tree block numbers, if current trans doesn't free 1888 * and tree block numbers, if current trans doesn't free
1889 * data reloc tree inode. 1889 * data reloc tree inode.
1890 */ 1890 */
1891 ret = btrfs_qgroup_trace_subtree(trans, parent, 1891 ret = btrfs_qgroup_trace_subtree_swap(trans, parent, slot,
1892 btrfs_header_generation(parent), 1892 path->nodes[level], path->slots[level],
1893 btrfs_header_level(parent)); 1893 last_snapshot);
1894 if (ret < 0)
1895 break;
1896 ret = btrfs_qgroup_trace_subtree(trans, path->nodes[level],
1897 btrfs_header_generation(path->nodes[level]),
1898 btrfs_header_level(path->nodes[level]));
1899 if (ret < 0) 1894 if (ret < 0)
1900 break; 1895 break;
1901 1896