diff options
author | Yan Zheng <zheng.yan@oracle.com> | 2008-10-29 14:49:05 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-10-29 14:49:05 -0400 |
commit | f82d02d9d8222183b7945e893111a6d1bf67ae4a (patch) | |
tree | 70be1bb231f4cc2e673920774e759359f3dcf1a5 /fs | |
parent | c8b978188c9a0fd3d535c13debd19d522b726f1f (diff) |
Btrfs: Improve space balancing code
This patch improves the space balancing code to keep more sharing
of tree blocks. The only case that breaks sharing of tree blocks is
data extents get fragmented during balancing. The main changes in
this patch are:
Add a 'drop sub-tree' function. This solves the problem in old code
that BTRFS_HEADER_FLAG_WRITTEN check breaks sharing of tree block.
Remove relocation mapping tree. Relocation mappings are stored in
struct btrfs_ref_path and updated dynamically during walking up/down
the reference path. This reduces CPU usage and simplifies code.
This patch also fixes a bug. Root items for reloc trees should be
updated in btrfs_free_reloc_root.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/ctree.c | 93 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 13 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 2 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 364 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 4 |
5 files changed, 277 insertions, 199 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9caeb377de63..73899d0f9d8f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -287,7 +287,7 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
287 | /* | 287 | /* |
288 | * There are only two places that can drop reference to | 288 | * There are only two places that can drop reference to |
289 | * tree blocks owned by living reloc trees, one is here, | 289 | * tree blocks owned by living reloc trees, one is here, |
290 | * the other place is btrfs_merge_path. In both places, | 290 | * the other place is btrfs_drop_subtree. In both places, |
291 | * we check reference count while tree block is locked. | 291 | * we check reference count while tree block is locked. |
292 | * Furthermore, if reference count is one, it won't get | 292 | * Furthermore, if reference count is one, it won't get |
293 | * increased by someone else. | 293 | * increased by someone else. |
@@ -312,9 +312,6 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
312 | } | 312 | } |
313 | 313 | ||
314 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 314 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { |
315 | ret = btrfs_add_reloc_mapping(root, buf->start, | ||
316 | buf->len, cow->start); | ||
317 | BUG_ON(ret); | ||
318 | ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); | 315 | ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); |
319 | WARN_ON(ret); | 316 | WARN_ON(ret); |
320 | } | 317 | } |
@@ -1627,61 +1624,57 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1627 | btrfs_node_key_to_cpu(eb, &key, slot); | 1624 | btrfs_node_key_to_cpu(eb, &key, slot); |
1628 | key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); | 1625 | key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); |
1629 | 1626 | ||
1627 | if (generation == trans->transid) { | ||
1628 | eb = read_tree_block(root, bytenr, blocksize, | ||
1629 | generation); | ||
1630 | btrfs_tree_lock(eb); | ||
1631 | } | ||
1632 | |||
1630 | /* | 1633 | /* |
1631 | * if node keys match and node pointer hasn't been modified | 1634 | * if node keys match and node pointer hasn't been modified |
1632 | * in the running transaction, we can merge the path. for | 1635 | * in the running transaction, we can merge the path. for |
1633 | * reloc trees, the node pointer check is skipped, this is | 1636 | * blocks owened by reloc trees, the node pointer check is |
1634 | * because the reloc trees are fully controlled by the space | 1637 | * skipped, this is because these blocks are fully controlled |
1635 | * balance code, no one else can modify them. | 1638 | * by the space balance code, no one else can modify them. |
1636 | */ | 1639 | */ |
1637 | if (!nodes[level - 1] || !key_match || | 1640 | if (!nodes[level - 1] || !key_match || |
1638 | (generation == trans->transid && | 1641 | (generation == trans->transid && |
1639 | root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) { | 1642 | btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) { |
1640 | next_level: | 1643 | if (level == 1 || level == lowest_level + 1) { |
1641 | if (level == 1 || level == lowest_level + 1) | 1644 | if (generation == trans->transid) { |
1645 | btrfs_tree_unlock(eb); | ||
1646 | free_extent_buffer(eb); | ||
1647 | } | ||
1642 | break; | 1648 | break; |
1649 | } | ||
1643 | 1650 | ||
1644 | eb = read_tree_block(root, bytenr, blocksize, | 1651 | if (generation != trans->transid) { |
1645 | generation); | 1652 | eb = read_tree_block(root, bytenr, blocksize, |
1646 | btrfs_tree_lock(eb); | 1653 | generation); |
1654 | btrfs_tree_lock(eb); | ||
1655 | } | ||
1647 | 1656 | ||
1648 | ret = btrfs_cow_block(trans, root, eb, parent, slot, | 1657 | ret = btrfs_cow_block(trans, root, eb, parent, slot, |
1649 | &eb, 0); | 1658 | &eb, 0); |
1650 | BUG_ON(ret); | 1659 | BUG_ON(ret); |
1651 | 1660 | ||
1661 | if (root->root_key.objectid == | ||
1662 | BTRFS_TREE_RELOC_OBJECTID) { | ||
1663 | if (!nodes[level - 1]) { | ||
1664 | nodes[level - 1] = eb->start; | ||
1665 | memcpy(&node_keys[level - 1], &key, | ||
1666 | sizeof(node_keys[0])); | ||
1667 | } else { | ||
1668 | WARN_ON(1); | ||
1669 | } | ||
1670 | } | ||
1671 | |||
1652 | btrfs_tree_unlock(parent); | 1672 | btrfs_tree_unlock(parent); |
1653 | free_extent_buffer(parent); | 1673 | free_extent_buffer(parent); |
1654 | parent = eb; | 1674 | parent = eb; |
1655 | continue; | 1675 | continue; |
1656 | } | 1676 | } |
1657 | 1677 | ||
1658 | if (generation == trans->transid) { | ||
1659 | u32 refs; | ||
1660 | BUG_ON(btrfs_header_owner(eb) != | ||
1661 | BTRFS_TREE_RELOC_OBJECTID); | ||
1662 | /* | ||
1663 | * lock the block to keep __btrfs_cow_block from | ||
1664 | * changing the reference count. | ||
1665 | */ | ||
1666 | eb = read_tree_block(root, bytenr, blocksize, | ||
1667 | generation); | ||
1668 | btrfs_tree_lock(eb); | ||
1669 | |||
1670 | ret = btrfs_lookup_extent_ref(trans, root, bytenr, | ||
1671 | blocksize, &refs); | ||
1672 | BUG_ON(ret); | ||
1673 | /* | ||
1674 | * if replace block whose reference count is one, | ||
1675 | * we have to "drop the subtree". so skip it for | ||
1676 | * simplicity | ||
1677 | */ | ||
1678 | if (refs == 1) { | ||
1679 | btrfs_tree_unlock(eb); | ||
1680 | free_extent_buffer(eb); | ||
1681 | goto next_level; | ||
1682 | } | ||
1683 | } | ||
1684 | |||
1685 | btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); | 1678 | btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); |
1686 | btrfs_set_node_ptr_generation(parent, slot, trans->transid); | 1679 | btrfs_set_node_ptr_generation(parent, slot, trans->transid); |
1687 | btrfs_mark_buffer_dirty(parent); | 1680 | btrfs_mark_buffer_dirty(parent); |
@@ -1693,16 +1686,24 @@ next_level: | |||
1693 | btrfs_header_generation(parent), | 1686 | btrfs_header_generation(parent), |
1694 | level - 1); | 1687 | level - 1); |
1695 | BUG_ON(ret); | 1688 | BUG_ON(ret); |
1696 | ret = btrfs_free_extent(trans, root, bytenr, | ||
1697 | blocksize, parent->start, | ||
1698 | btrfs_header_owner(parent), | ||
1699 | btrfs_header_generation(parent), | ||
1700 | level - 1, 1); | ||
1701 | BUG_ON(ret); | ||
1702 | 1689 | ||
1690 | /* | ||
1691 | * If the block was created in the running transaction, | ||
1692 | * it's possible this is the last reference to it, so we | ||
1693 | * should drop the subtree. | ||
1694 | */ | ||
1703 | if (generation == trans->transid) { | 1695 | if (generation == trans->transid) { |
1696 | ret = btrfs_drop_subtree(trans, root, eb, parent); | ||
1697 | BUG_ON(ret); | ||
1704 | btrfs_tree_unlock(eb); | 1698 | btrfs_tree_unlock(eb); |
1705 | free_extent_buffer(eb); | 1699 | free_extent_buffer(eb); |
1700 | } else { | ||
1701 | ret = btrfs_free_extent(trans, root, bytenr, | ||
1702 | blocksize, parent->start, | ||
1703 | btrfs_header_owner(parent), | ||
1704 | btrfs_header_generation(parent), | ||
1705 | level - 1, 1); | ||
1706 | BUG_ON(ret); | ||
1706 | } | 1707 | } |
1707 | break; | 1708 | break; |
1708 | } | 1709 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 793d8fdda244..117090995e7c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -684,7 +684,6 @@ struct btrfs_fs_info { | |||
684 | int thread_pool_size; | 684 | int thread_pool_size; |
685 | 685 | ||
686 | /* tree relocation relocated fields */ | 686 | /* tree relocation relocated fields */ |
687 | struct extent_io_tree reloc_mapping_tree; | ||
688 | struct list_head dead_reloc_roots; | 687 | struct list_head dead_reloc_roots; |
689 | struct btrfs_leaf_ref_tree reloc_ref_tree; | 688 | struct btrfs_leaf_ref_tree reloc_ref_tree; |
690 | struct btrfs_leaf_ref_tree shared_ref_tree; | 689 | struct btrfs_leaf_ref_tree shared_ref_tree; |
@@ -1636,13 +1635,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
1636 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 1635 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
1637 | struct btrfs_root *root, u64 group_start); | 1636 | struct btrfs_root *root, u64 group_start); |
1638 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); | 1637 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); |
1639 | int btrfs_free_reloc_root(struct btrfs_root *root); | 1638 | int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, |
1639 | struct btrfs_root *root); | ||
1640 | int btrfs_drop_dead_reloc_roots(struct btrfs_root *root); | 1640 | int btrfs_drop_dead_reloc_roots(struct btrfs_root *root); |
1641 | int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, | ||
1642 | u64 num_bytes, u64 new_bytenr); | ||
1643 | int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, | ||
1644 | u64 num_bytes, u64 *new_bytenr); | ||
1645 | void btrfs_free_reloc_mappings(struct btrfs_root *root); | ||
1646 | int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, | 1641 | int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, |
1647 | struct btrfs_root *root, | 1642 | struct btrfs_root *root, |
1648 | struct extent_buffer *buf, u64 orig_start); | 1643 | struct extent_buffer *buf, u64 orig_start); |
@@ -1726,6 +1721,10 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); | |||
1726 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); | 1721 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); |
1727 | int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | 1722 | int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root |
1728 | *root); | 1723 | *root); |
1724 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | ||
1725 | struct btrfs_root *root, | ||
1726 | struct extent_buffer *node, | ||
1727 | struct extent_buffer *parent); | ||
1729 | /* root-item.c */ | 1728 | /* root-item.c */ |
1730 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 1729 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
1731 | struct btrfs_key *key); | 1730 | struct btrfs_key *key); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dc95f636a11b..796256440dfa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1448,8 +1448,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1448 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1448 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
1449 | fs_info->do_barriers = 1; | 1449 | fs_info->do_barriers = 1; |
1450 | 1450 | ||
1451 | extent_io_tree_init(&fs_info->reloc_mapping_tree, | ||
1452 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1453 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); | 1451 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); |
1454 | btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); | 1452 | btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); |
1455 | btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); | 1453 | btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index bbf04e80a1a3..56e41369d713 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3032,13 +3032,103 @@ out: | |||
3032 | } | 3032 | } |
3033 | 3033 | ||
3034 | /* | 3034 | /* |
3035 | * helper function for drop_subtree, this function is similar to | ||
3036 | * walk_down_tree. The main difference is that it checks reference | ||
3037 | * counts while tree blocks are locked. | ||
3038 | */ | ||
3039 | static int noinline walk_down_subtree(struct btrfs_trans_handle *trans, | ||
3040 | struct btrfs_root *root, | ||
3041 | struct btrfs_path *path, int *level) | ||
3042 | { | ||
3043 | struct extent_buffer *next; | ||
3044 | struct extent_buffer *cur; | ||
3045 | struct extent_buffer *parent; | ||
3046 | u64 bytenr; | ||
3047 | u64 ptr_gen; | ||
3048 | u32 blocksize; | ||
3049 | u32 refs; | ||
3050 | int ret; | ||
3051 | |||
3052 | cur = path->nodes[*level]; | ||
3053 | ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len, | ||
3054 | &refs); | ||
3055 | BUG_ON(ret); | ||
3056 | if (refs > 1) | ||
3057 | goto out; | ||
3058 | |||
3059 | while (*level >= 0) { | ||
3060 | cur = path->nodes[*level]; | ||
3061 | if (*level == 0) { | ||
3062 | ret = btrfs_drop_leaf_ref(trans, root, cur); | ||
3063 | BUG_ON(ret); | ||
3064 | clean_tree_block(trans, root, cur); | ||
3065 | break; | ||
3066 | } | ||
3067 | if (path->slots[*level] >= btrfs_header_nritems(cur)) { | ||
3068 | clean_tree_block(trans, root, cur); | ||
3069 | break; | ||
3070 | } | ||
3071 | |||
3072 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | ||
3073 | blocksize = btrfs_level_size(root, *level - 1); | ||
3074 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | ||
3075 | |||
3076 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | ||
3077 | btrfs_tree_lock(next); | ||
3078 | |||
3079 | ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, | ||
3080 | &refs); | ||
3081 | BUG_ON(ret); | ||
3082 | if (refs > 1) { | ||
3083 | parent = path->nodes[*level]; | ||
3084 | ret = btrfs_free_extent(trans, root, bytenr, | ||
3085 | blocksize, parent->start, | ||
3086 | btrfs_header_owner(parent), | ||
3087 | btrfs_header_generation(parent), | ||
3088 | *level - 1, 1); | ||
3089 | BUG_ON(ret); | ||
3090 | path->slots[*level]++; | ||
3091 | btrfs_tree_unlock(next); | ||
3092 | free_extent_buffer(next); | ||
3093 | continue; | ||
3094 | } | ||
3095 | |||
3096 | *level = btrfs_header_level(next); | ||
3097 | path->nodes[*level] = next; | ||
3098 | path->slots[*level] = 0; | ||
3099 | path->locks[*level] = 1; | ||
3100 | cond_resched(); | ||
3101 | } | ||
3102 | out: | ||
3103 | parent = path->nodes[*level + 1]; | ||
3104 | bytenr = path->nodes[*level]->start; | ||
3105 | blocksize = path->nodes[*level]->len; | ||
3106 | |||
3107 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, | ||
3108 | parent->start, btrfs_header_owner(parent), | ||
3109 | btrfs_header_generation(parent), *level, 1); | ||
3110 | BUG_ON(ret); | ||
3111 | |||
3112 | if (path->locks[*level]) { | ||
3113 | btrfs_tree_unlock(path->nodes[*level]); | ||
3114 | path->locks[*level] = 0; | ||
3115 | } | ||
3116 | free_extent_buffer(path->nodes[*level]); | ||
3117 | path->nodes[*level] = NULL; | ||
3118 | *level += 1; | ||
3119 | cond_resched(); | ||
3120 | return 0; | ||
3121 | } | ||
3122 | |||
3123 | /* | ||
3035 | * helper for dropping snapshots. This walks back up the tree in the path | 3124 | * helper for dropping snapshots. This walks back up the tree in the path |
3036 | * to find the first node higher up where we haven't yet gone through | 3125 | * to find the first node higher up where we haven't yet gone through |
3037 | * all the slots | 3126 | * all the slots |
3038 | */ | 3127 | */ |
3039 | static int noinline walk_up_tree(struct btrfs_trans_handle *trans, | 3128 | static int noinline walk_up_tree(struct btrfs_trans_handle *trans, |
3040 | struct btrfs_root *root, | 3129 | struct btrfs_root *root, |
3041 | struct btrfs_path *path, int *level) | 3130 | struct btrfs_path *path, |
3131 | int *level, int max_level) | ||
3042 | { | 3132 | { |
3043 | u64 root_owner; | 3133 | u64 root_owner; |
3044 | u64 root_gen; | 3134 | u64 root_gen; |
@@ -3047,7 +3137,7 @@ static int noinline walk_up_tree(struct btrfs_trans_handle *trans, | |||
3047 | int slot; | 3137 | int slot; |
3048 | int ret; | 3138 | int ret; |
3049 | 3139 | ||
3050 | for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { | 3140 | for (i = *level; i < max_level && path->nodes[i]; i++) { |
3051 | slot = path->slots[i]; | 3141 | slot = path->slots[i]; |
3052 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { | 3142 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { |
3053 | struct extent_buffer *node; | 3143 | struct extent_buffer *node; |
@@ -3070,12 +3160,18 @@ static int noinline walk_up_tree(struct btrfs_trans_handle *trans, | |||
3070 | 3160 | ||
3071 | root_owner = btrfs_header_owner(parent); | 3161 | root_owner = btrfs_header_owner(parent); |
3072 | root_gen = btrfs_header_generation(parent); | 3162 | root_gen = btrfs_header_generation(parent); |
3163 | |||
3164 | clean_tree_block(trans, root, path->nodes[*level]); | ||
3073 | ret = btrfs_free_extent(trans, root, | 3165 | ret = btrfs_free_extent(trans, root, |
3074 | path->nodes[*level]->start, | 3166 | path->nodes[*level]->start, |
3075 | path->nodes[*level]->len, | 3167 | path->nodes[*level]->len, |
3076 | parent->start, root_owner, | 3168 | parent->start, root_owner, |
3077 | root_gen, *level, 1); | 3169 | root_gen, *level, 1); |
3078 | BUG_ON(ret); | 3170 | BUG_ON(ret); |
3171 | if (path->locks[*level]) { | ||
3172 | btrfs_tree_unlock(path->nodes[*level]); | ||
3173 | path->locks[*level] = 0; | ||
3174 | } | ||
3079 | free_extent_buffer(path->nodes[*level]); | 3175 | free_extent_buffer(path->nodes[*level]); |
3080 | path->nodes[*level] = NULL; | 3176 | path->nodes[*level] = NULL; |
3081 | *level = i + 1; | 3177 | *level = i + 1; |
@@ -3145,7 +3241,8 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3145 | if (wret < 0) | 3241 | if (wret < 0) |
3146 | ret = wret; | 3242 | ret = wret; |
3147 | 3243 | ||
3148 | wret = walk_up_tree(trans, root, path, &level); | 3244 | wret = walk_up_tree(trans, root, path, &level, |
3245 | BTRFS_MAX_LEVEL); | ||
3149 | if (wret > 0) | 3246 | if (wret > 0) |
3150 | break; | 3247 | break; |
3151 | if (wret < 0) | 3248 | if (wret < 0) |
@@ -3168,6 +3265,50 @@ out: | |||
3168 | return ret; | 3265 | return ret; |
3169 | } | 3266 | } |
3170 | 3267 | ||
3268 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | ||
3269 | struct btrfs_root *root, | ||
3270 | struct extent_buffer *node, | ||
3271 | struct extent_buffer *parent) | ||
3272 | { | ||
3273 | struct btrfs_path *path; | ||
3274 | int level; | ||
3275 | int parent_level; | ||
3276 | int ret = 0; | ||
3277 | int wret; | ||
3278 | |||
3279 | path = btrfs_alloc_path(); | ||
3280 | BUG_ON(!path); | ||
3281 | |||
3282 | BUG_ON(!btrfs_tree_locked(parent)); | ||
3283 | parent_level = btrfs_header_level(parent); | ||
3284 | extent_buffer_get(parent); | ||
3285 | path->nodes[parent_level] = parent; | ||
3286 | path->slots[parent_level] = btrfs_header_nritems(parent); | ||
3287 | |||
3288 | BUG_ON(!btrfs_tree_locked(node)); | ||
3289 | level = btrfs_header_level(node); | ||
3290 | extent_buffer_get(node); | ||
3291 | path->nodes[level] = node; | ||
3292 | path->slots[level] = 0; | ||
3293 | |||
3294 | while (1) { | ||
3295 | wret = walk_down_subtree(trans, root, path, &level); | ||
3296 | if (wret < 0) | ||
3297 | ret = wret; | ||
3298 | if (wret != 0) | ||
3299 | break; | ||
3300 | |||
3301 | wret = walk_up_tree(trans, root, path, &level, parent_level); | ||
3302 | if (wret < 0) | ||
3303 | ret = wret; | ||
3304 | if (wret != 0) | ||
3305 | break; | ||
3306 | } | ||
3307 | |||
3308 | btrfs_free_path(path); | ||
3309 | return ret; | ||
3310 | } | ||
3311 | |||
3171 | static unsigned long calc_ra(unsigned long start, unsigned long last, | 3312 | static unsigned long calc_ra(unsigned long start, unsigned long last, |
3172 | unsigned long nr) | 3313 | unsigned long nr) |
3173 | { | 3314 | { |
@@ -3312,6 +3453,10 @@ struct btrfs_ref_path { | |||
3312 | u32 num_refs; | 3453 | u32 num_refs; |
3313 | int lowest_level; | 3454 | int lowest_level; |
3314 | int current_level; | 3455 | int current_level; |
3456 | int shared_level; | ||
3457 | |||
3458 | struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; | ||
3459 | u64 new_nodes[BTRFS_MAX_LEVEL]; | ||
3315 | }; | 3460 | }; |
3316 | 3461 | ||
3317 | struct disk_extent { | 3462 | struct disk_extent { |
@@ -3360,6 +3505,7 @@ static int noinline __next_ref_path(struct btrfs_trans_handle *trans, | |||
3360 | if (first_time) { | 3505 | if (first_time) { |
3361 | ref_path->lowest_level = -1; | 3506 | ref_path->lowest_level = -1; |
3362 | ref_path->current_level = -1; | 3507 | ref_path->current_level = -1; |
3508 | ref_path->shared_level = -1; | ||
3363 | goto walk_up; | 3509 | goto walk_up; |
3364 | } | 3510 | } |
3365 | walk_down: | 3511 | walk_down: |
@@ -3403,8 +3549,11 @@ walk_down: | |||
3403 | 3549 | ||
3404 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 3550 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
3405 | if (found_key.objectid == bytenr && | 3551 | if (found_key.objectid == bytenr && |
3406 | found_key.type == BTRFS_EXTENT_REF_KEY) | 3552 | found_key.type == BTRFS_EXTENT_REF_KEY) { |
3553 | if (level < ref_path->shared_level) | ||
3554 | ref_path->shared_level = level; | ||
3407 | goto found; | 3555 | goto found; |
3556 | } | ||
3408 | next: | 3557 | next: |
3409 | level--; | 3558 | level--; |
3410 | btrfs_release_path(extent_root, path); | 3559 | btrfs_release_path(extent_root, path); |
@@ -3992,51 +4141,6 @@ out: | |||
3992 | return ret; | 4141 | return ret; |
3993 | } | 4142 | } |
3994 | 4143 | ||
3995 | int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, | ||
3996 | u64 num_bytes, u64 new_bytenr) | ||
3997 | { | ||
3998 | set_extent_bits(&root->fs_info->reloc_mapping_tree, | ||
3999 | orig_bytenr, orig_bytenr + num_bytes - 1, | ||
4000 | EXTENT_LOCKED, GFP_NOFS); | ||
4001 | set_state_private(&root->fs_info->reloc_mapping_tree, | ||
4002 | orig_bytenr, new_bytenr); | ||
4003 | return 0; | ||
4004 | } | ||
4005 | |||
4006 | int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, | ||
4007 | u64 num_bytes, u64 *new_bytenr) | ||
4008 | { | ||
4009 | u64 bytenr; | ||
4010 | u64 cur_bytenr = orig_bytenr; | ||
4011 | u64 prev_bytenr = orig_bytenr; | ||
4012 | int ret; | ||
4013 | |||
4014 | while (1) { | ||
4015 | ret = get_state_private(&root->fs_info->reloc_mapping_tree, | ||
4016 | cur_bytenr, &bytenr); | ||
4017 | if (ret) | ||
4018 | break; | ||
4019 | prev_bytenr = cur_bytenr; | ||
4020 | cur_bytenr = bytenr; | ||
4021 | } | ||
4022 | |||
4023 | if (orig_bytenr == cur_bytenr) | ||
4024 | return -ENOENT; | ||
4025 | |||
4026 | if (prev_bytenr != orig_bytenr) { | ||
4027 | set_state_private(&root->fs_info->reloc_mapping_tree, | ||
4028 | orig_bytenr, cur_bytenr); | ||
4029 | } | ||
4030 | *new_bytenr = cur_bytenr; | ||
4031 | return 0; | ||
4032 | } | ||
4033 | |||
4034 | void btrfs_free_reloc_mappings(struct btrfs_root *root) | ||
4035 | { | ||
4036 | clear_extent_bits(&root->fs_info->reloc_mapping_tree, | ||
4037 | 0, (u64)-1, -1, GFP_NOFS); | ||
4038 | } | ||
4039 | |||
4040 | int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, | 4144 | int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, |
4041 | struct btrfs_root *root, | 4145 | struct btrfs_root *root, |
4042 | struct extent_buffer *buf, u64 orig_start) | 4146 | struct extent_buffer *buf, u64 orig_start) |
@@ -4222,15 +4326,30 @@ static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, | |||
4222 | return 0; | 4326 | return 0; |
4223 | } | 4327 | } |
4224 | 4328 | ||
4225 | int btrfs_free_reloc_root(struct btrfs_root *root) | 4329 | int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, |
4330 | struct btrfs_root *root) | ||
4226 | { | 4331 | { |
4227 | struct btrfs_root *reloc_root; | 4332 | struct btrfs_root *reloc_root; |
4333 | int ret; | ||
4228 | 4334 | ||
4229 | if (root->reloc_root) { | 4335 | if (root->reloc_root) { |
4230 | reloc_root = root->reloc_root; | 4336 | reloc_root = root->reloc_root; |
4231 | root->reloc_root = NULL; | 4337 | root->reloc_root = NULL; |
4232 | list_add(&reloc_root->dead_list, | 4338 | list_add(&reloc_root->dead_list, |
4233 | &root->fs_info->dead_reloc_roots); | 4339 | &root->fs_info->dead_reloc_roots); |
4340 | |||
4341 | btrfs_set_root_bytenr(&reloc_root->root_item, | ||
4342 | reloc_root->node->start); | ||
4343 | btrfs_set_root_level(&root->root_item, | ||
4344 | btrfs_header_level(reloc_root->node)); | ||
4345 | memset(&reloc_root->root_item.drop_progress, 0, | ||
4346 | sizeof(struct btrfs_disk_key)); | ||
4347 | reloc_root->root_item.drop_level = 0; | ||
4348 | |||
4349 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | ||
4350 | &reloc_root->root_key, | ||
4351 | &reloc_root->root_item); | ||
4352 | BUG_ON(ret); | ||
4234 | } | 4353 | } |
4235 | return 0; | 4354 | return 0; |
4236 | } | 4355 | } |
@@ -4356,8 +4475,6 @@ static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, | |||
4356 | btrfs_set_root_refs(root_item, 0); | 4475 | btrfs_set_root_refs(root_item, 0); |
4357 | btrfs_set_root_bytenr(root_item, eb->start); | 4476 | btrfs_set_root_bytenr(root_item, eb->start); |
4358 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); | 4477 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); |
4359 | memset(&root_item->drop_progress, 0, sizeof(root_item->drop_progress)); | ||
4360 | root_item->drop_level = 0; | ||
4361 | 4478 | ||
4362 | btrfs_tree_unlock(eb); | 4479 | btrfs_tree_unlock(eb); |
4363 | free_extent_buffer(eb); | 4480 | free_extent_buffer(eb); |
@@ -4382,15 +4499,19 @@ static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, | |||
4382 | * Core function of space balance. | 4499 | * Core function of space balance. |
4383 | * | 4500 | * |
4384 | * The idea is using reloc trees to relocate tree blocks in reference | 4501 | * The idea is using reloc trees to relocate tree blocks in reference |
4385 | * counted roots. There is one reloc tree for each subvol, all reloc | 4502 | * counted roots. There is one reloc tree for each subvol, and all |
4386 | * trees share same key objectid. Reloc trees are snapshots of the | 4503 | * reloc trees share same root key objectid. Reloc trees are snapshots |
4387 | * latest committed roots (subvol root->commit_root). To relocate a tree | 4504 | * of the latest committed roots of subvols (root->commit_root). |
4388 | * block referenced by a subvol, the code COW the block through the reloc | 4505 | * |
4389 | * tree, then update pointer in the subvol to point to the new block. | 4506 | * To relocate a tree block referenced by a subvol, there are two steps. |
4390 | * Since all reloc trees share same key objectid, we can easily do special | 4507 | * COW the block through subvol's reloc tree, then update block pointer |
4391 | * handing to share tree blocks between reloc trees. Once a tree block has | 4508 | * in the subvol to point to the new block. Since all reloc trees share |
4392 | * been COWed in one reloc tree, we can use the result when the same block | 4509 | * same root key objectid, doing special handing for tree blocks owned |
4393 | * is COWed again through other reloc trees. | 4510 | * by them is easy. Once a tree block has been COWed in one reloc tree, |
4511 | * we can use the resulting new block directly when the same block is | ||
4512 | * required to COW again through other reloc trees. By this way, relocated | ||
4513 | * tree blocks are shared between reloc trees, so they are also shared | ||
4514 | * between subvols. | ||
4394 | */ | 4515 | */ |
4395 | static int noinline relocate_one_path(struct btrfs_trans_handle *trans, | 4516 | static int noinline relocate_one_path(struct btrfs_trans_handle *trans, |
4396 | struct btrfs_root *root, | 4517 | struct btrfs_root *root, |
@@ -4405,15 +4526,14 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans, | |||
4405 | struct btrfs_key *keys; | 4526 | struct btrfs_key *keys; |
4406 | u64 *nodes; | 4527 | u64 *nodes; |
4407 | int level; | 4528 | int level; |
4408 | int lowest_merge; | 4529 | int shared_level; |
4409 | int lowest_level = 0; | 4530 | int lowest_level = 0; |
4410 | int update_refs; | ||
4411 | int ret; | 4531 | int ret; |
4412 | 4532 | ||
4413 | if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | 4533 | if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) |
4414 | lowest_level = ref_path->owner_objectid; | 4534 | lowest_level = ref_path->owner_objectid; |
4415 | 4535 | ||
4416 | if (is_cowonly_root(ref_path->root_objectid)) { | 4536 | if (!root->ref_cows) { |
4417 | path->lowest_level = lowest_level; | 4537 | path->lowest_level = lowest_level; |
4418 | ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); | 4538 | ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); |
4419 | BUG_ON(ret < 0); | 4539 | BUG_ON(ret < 0); |
@@ -4422,91 +4542,49 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans, | |||
4422 | return 0; | 4542 | return 0; |
4423 | } | 4543 | } |
4424 | 4544 | ||
4425 | keys = kzalloc(sizeof(*keys) * BTRFS_MAX_LEVEL, GFP_NOFS); | ||
4426 | BUG_ON(!keys); | ||
4427 | nodes = kzalloc(sizeof(*nodes) * BTRFS_MAX_LEVEL, GFP_NOFS); | ||
4428 | BUG_ON(!nodes); | ||
4429 | |||
4430 | mutex_lock(&root->fs_info->tree_reloc_mutex); | 4545 | mutex_lock(&root->fs_info->tree_reloc_mutex); |
4431 | ret = init_reloc_tree(trans, root); | 4546 | ret = init_reloc_tree(trans, root); |
4432 | BUG_ON(ret); | 4547 | BUG_ON(ret); |
4433 | reloc_root = root->reloc_root; | 4548 | reloc_root = root->reloc_root; |
4434 | 4549 | ||
4435 | path->lowest_level = lowest_level; | 4550 | shared_level = ref_path->shared_level; |
4436 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 0); | 4551 | ref_path->shared_level = BTRFS_MAX_LEVEL - 1; |
4437 | BUG_ON(ret); | ||
4438 | /* | ||
4439 | * get relocation mapping for tree blocks in the path | ||
4440 | */ | ||
4441 | lowest_merge = BTRFS_MAX_LEVEL; | ||
4442 | for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) { | ||
4443 | u64 new_bytenr; | ||
4444 | eb = path->nodes[level]; | ||
4445 | if (!eb || eb == reloc_root->node) | ||
4446 | continue; | ||
4447 | ret = btrfs_get_reloc_mapping(reloc_root, eb->start, eb->len, | ||
4448 | &new_bytenr); | ||
4449 | if (ret) | ||
4450 | continue; | ||
4451 | if (level == 0) | ||
4452 | btrfs_item_key_to_cpu(eb, &keys[level], 0); | ||
4453 | else | ||
4454 | btrfs_node_key_to_cpu(eb, &keys[level], 0); | ||
4455 | nodes[level] = new_bytenr; | ||
4456 | lowest_merge = level; | ||
4457 | } | ||
4458 | 4552 | ||
4459 | update_refs = 0; | 4553 | keys = ref_path->node_keys; |
4460 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | 4554 | nodes = ref_path->new_nodes; |
4461 | eb = path->nodes[0]; | 4555 | memset(&keys[shared_level + 1], 0, |
4462 | if (btrfs_header_generation(eb) < trans->transid) | 4556 | sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); |
4463 | update_refs = 1; | 4557 | memset(&nodes[shared_level + 1], 0, |
4464 | } | 4558 | sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); |
4465 | 4559 | ||
4466 | btrfs_release_path(reloc_root, path); | 4560 | if (nodes[lowest_level] == 0) { |
4467 | /* | 4561 | path->lowest_level = lowest_level; |
4468 | * merge tree blocks that already relocated in other reloc trees | 4562 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, |
4469 | */ | 4563 | 0, 1); |
4470 | if (lowest_merge != BTRFS_MAX_LEVEL) { | 4564 | BUG_ON(ret); |
4565 | for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { | ||
4566 | eb = path->nodes[level]; | ||
4567 | if (!eb || eb == reloc_root->node) | ||
4568 | break; | ||
4569 | nodes[level] = eb->start; | ||
4570 | if (level == 0) | ||
4571 | btrfs_item_key_to_cpu(eb, &keys[level], 0); | ||
4572 | else | ||
4573 | btrfs_node_key_to_cpu(eb, &keys[level], 0); | ||
4574 | } | ||
4575 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
4576 | eb = path->nodes[0]; | ||
4577 | ret = replace_extents_in_leaf(trans, reloc_root, eb, | ||
4578 | group, reloc_inode); | ||
4579 | BUG_ON(ret); | ||
4580 | } | ||
4581 | btrfs_release_path(reloc_root, path); | ||
4582 | } else { | ||
4471 | ret = btrfs_merge_path(trans, reloc_root, keys, nodes, | 4583 | ret = btrfs_merge_path(trans, reloc_root, keys, nodes, |
4472 | lowest_merge); | 4584 | lowest_level); |
4473 | BUG_ON(ret < 0); | ||
4474 | } | ||
4475 | /* | ||
4476 | * cow any tree blocks that still haven't been relocated | ||
4477 | */ | ||
4478 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 1); | ||
4479 | BUG_ON(ret); | ||
4480 | /* | ||
4481 | * if we are relocating data block group, update extent pointers | ||
4482 | * in the newly created tree leaf. | ||
4483 | */ | ||
4484 | eb = path->nodes[0]; | ||
4485 | if (update_refs && nodes[0] != eb->start) { | ||
4486 | ret = replace_extents_in_leaf(trans, reloc_root, eb, group, | ||
4487 | reloc_inode); | ||
4488 | BUG_ON(ret); | 4585 | BUG_ON(ret); |
4489 | } | 4586 | } |
4490 | 4587 | ||
4491 | memset(keys, 0, sizeof(*keys) * BTRFS_MAX_LEVEL); | ||
4492 | memset(nodes, 0, sizeof(*nodes) * BTRFS_MAX_LEVEL); | ||
4493 | for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) { | ||
4494 | eb = path->nodes[level]; | ||
4495 | if (!eb || eb == reloc_root->node) | ||
4496 | continue; | ||
4497 | BUG_ON(btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID); | ||
4498 | nodes[level] = eb->start; | ||
4499 | if (level == 0) | ||
4500 | btrfs_item_key_to_cpu(eb, &keys[level], 0); | ||
4501 | else | ||
4502 | btrfs_node_key_to_cpu(eb, &keys[level], 0); | ||
4503 | } | ||
4504 | |||
4505 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
4506 | eb = path->nodes[0]; | ||
4507 | extent_buffer_get(eb); | ||
4508 | } | ||
4509 | btrfs_release_path(reloc_root, path); | ||
4510 | /* | 4588 | /* |
4511 | * replace tree blocks in the fs tree with tree blocks in | 4589 | * replace tree blocks in the fs tree with tree blocks in |
4512 | * the reloc tree. | 4590 | * the reloc tree. |
@@ -4515,15 +4593,19 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans, | |||
4515 | BUG_ON(ret < 0); | 4593 | BUG_ON(ret < 0); |
4516 | 4594 | ||
4517 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | 4595 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { |
4596 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, | ||
4597 | 0, 0); | ||
4598 | BUG_ON(ret); | ||
4599 | extent_buffer_get(path->nodes[0]); | ||
4600 | eb = path->nodes[0]; | ||
4601 | btrfs_release_path(reloc_root, path); | ||
4518 | ret = invalidate_extent_cache(reloc_root, eb, group, root); | 4602 | ret = invalidate_extent_cache(reloc_root, eb, group, root); |
4519 | BUG_ON(ret); | 4603 | BUG_ON(ret); |
4520 | free_extent_buffer(eb); | 4604 | free_extent_buffer(eb); |
4521 | } | 4605 | } |
4522 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
4523 | 4606 | ||
4607 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
4524 | path->lowest_level = 0; | 4608 | path->lowest_level = 0; |
4525 | kfree(nodes); | ||
4526 | kfree(keys); | ||
4527 | return 0; | 4609 | return 0; |
4528 | } | 4610 | } |
4529 | 4611 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5ecc24d634a2..1df67129cc3d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -521,7 +521,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, | |||
521 | dirty = root->dirty_root; | 521 | dirty = root->dirty_root; |
522 | 522 | ||
523 | btrfs_free_log(trans, root); | 523 | btrfs_free_log(trans, root); |
524 | btrfs_free_reloc_root(root); | 524 | btrfs_free_reloc_root(trans, root); |
525 | 525 | ||
526 | if (root->commit_root == root->node) { | 526 | if (root->commit_root == root->node) { |
527 | WARN_ON(root->node->start != | 527 | WARN_ON(root->node->start != |
@@ -930,8 +930,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
930 | */ | 930 | */ |
931 | btrfs_free_log_root_tree(trans, root->fs_info); | 931 | btrfs_free_log_root_tree(trans, root->fs_info); |
932 | 932 | ||
933 | btrfs_free_reloc_mappings(root); | ||
934 | |||
935 | ret = btrfs_commit_tree_roots(trans, root); | 933 | ret = btrfs_commit_tree_roots(trans, root); |
936 | BUG_ON(ret); | 934 | BUG_ON(ret); |
937 | 935 | ||