diff options
-rw-r--r-- | fs/btrfs/ctree.c | 93 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 13 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 2 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 364 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 4 |
5 files changed, 277 insertions, 199 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9caeb377de6..73899d0f9d8 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -287,7 +287,7 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
287 | /* | 287 | /* |
288 | * There are only two places that can drop reference to | 288 | * There are only two places that can drop reference to |
289 | * tree blocks owned by living reloc trees, one is here, | 289 | * tree blocks owned by living reloc trees, one is here, |
290 | * the other place is btrfs_merge_path. In both places, | 290 | * the other place is btrfs_drop_subtree. In both places, |
291 | * we check reference count while tree block is locked. | 291 | * we check reference count while tree block is locked. |
292 | * Furthermore, if reference count is one, it won't get | 292 | * Furthermore, if reference count is one, it won't get |
293 | * increased by someone else. | 293 | * increased by someone else. |
@@ -312,9 +312,6 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
312 | } | 312 | } |
313 | 313 | ||
314 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 314 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { |
315 | ret = btrfs_add_reloc_mapping(root, buf->start, | ||
316 | buf->len, cow->start); | ||
317 | BUG_ON(ret); | ||
318 | ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); | 315 | ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); |
319 | WARN_ON(ret); | 316 | WARN_ON(ret); |
320 | } | 317 | } |
@@ -1627,61 +1624,57 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1627 | btrfs_node_key_to_cpu(eb, &key, slot); | 1624 | btrfs_node_key_to_cpu(eb, &key, slot); |
1628 | key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); | 1625 | key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); |
1629 | 1626 | ||
1627 | if (generation == trans->transid) { | ||
1628 | eb = read_tree_block(root, bytenr, blocksize, | ||
1629 | generation); | ||
1630 | btrfs_tree_lock(eb); | ||
1631 | } | ||
1632 | |||
1630 | /* | 1633 | /* |
1631 | * if node keys match and node pointer hasn't been modified | 1634 | * if node keys match and node pointer hasn't been modified |
1632 | * in the running transaction, we can merge the path. for | 1635 | * in the running transaction, we can merge the path. for |
1633 | * reloc trees, the node pointer check is skipped, this is | 1636 | * blocks owened by reloc trees, the node pointer check is |
1634 | * because the reloc trees are fully controlled by the space | 1637 | * skipped, this is because these blocks are fully controlled |
1635 | * balance code, no one else can modify them. | 1638 | * by the space balance code, no one else can modify them. |
1636 | */ | 1639 | */ |
1637 | if (!nodes[level - 1] || !key_match || | 1640 | if (!nodes[level - 1] || !key_match || |
1638 | (generation == trans->transid && | 1641 | (generation == trans->transid && |
1639 | root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) { | 1642 | btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) { |
1640 | next_level: | 1643 | if (level == 1 || level == lowest_level + 1) { |
1641 | if (level == 1 || level == lowest_level + 1) | 1644 | if (generation == trans->transid) { |
1645 | btrfs_tree_unlock(eb); | ||
1646 | free_extent_buffer(eb); | ||
1647 | } | ||
1642 | break; | 1648 | break; |
1649 | } | ||
1643 | 1650 | ||
1644 | eb = read_tree_block(root, bytenr, blocksize, | 1651 | if (generation != trans->transid) { |
1645 | generation); | 1652 | eb = read_tree_block(root, bytenr, blocksize, |
1646 | btrfs_tree_lock(eb); | 1653 | generation); |
1654 | btrfs_tree_lock(eb); | ||
1655 | } | ||
1647 | 1656 | ||
1648 | ret = btrfs_cow_block(trans, root, eb, parent, slot, | 1657 | ret = btrfs_cow_block(trans, root, eb, parent, slot, |
1649 | &eb, 0); | 1658 | &eb, 0); |
1650 | BUG_ON(ret); | 1659 | BUG_ON(ret); |
1651 | 1660 | ||
1661 | if (root->root_key.objectid == | ||
1662 | BTRFS_TREE_RELOC_OBJECTID) { | ||
1663 | if (!nodes[level - 1]) { | ||
1664 | nodes[level - 1] = eb->start; | ||
1665 | memcpy(&node_keys[level - 1], &key, | ||
1666 | sizeof(node_keys[0])); | ||
1667 | } else { | ||
1668 | WARN_ON(1); | ||
1669 | } | ||
1670 | } | ||
1671 | |||
1652 | btrfs_tree_unlock(parent); | 1672 | btrfs_tree_unlock(parent); |
1653 | free_extent_buffer(parent); | 1673 | free_extent_buffer(parent); |
1654 | parent = eb; | 1674 | parent = eb; |
1655 | continue; | 1675 | continue; |
1656 | } | 1676 | } |
1657 | 1677 | ||
1658 | if (generation == trans->transid) { | ||
1659 | u32 refs; | ||
1660 | BUG_ON(btrfs_header_owner(eb) != | ||
1661 | BTRFS_TREE_RELOC_OBJECTID); | ||
1662 | /* | ||
1663 | * lock the block to keep __btrfs_cow_block from | ||
1664 | * changing the reference count. | ||
1665 | */ | ||
1666 | eb = read_tree_block(root, bytenr, blocksize, | ||
1667 | generation); | ||
1668 | btrfs_tree_lock(eb); | ||
1669 | |||
1670 | ret = btrfs_lookup_extent_ref(trans, root, bytenr, | ||
1671 | blocksize, &refs); | ||
1672 | BUG_ON(ret); | ||
1673 | /* | ||
1674 | * if replace block whose reference count is one, | ||
1675 | * we have to "drop the subtree". so skip it for | ||
1676 | * simplicity | ||
1677 | */ | ||
1678 | if (refs == 1) { | ||
1679 | btrfs_tree_unlock(eb); | ||
1680 | free_extent_buffer(eb); | ||
1681 | goto next_level; | ||
1682 | } | ||
1683 | } | ||
1684 | |||
1685 | btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); | 1678 | btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); |
1686 | btrfs_set_node_ptr_generation(parent, slot, trans->transid); | 1679 | btrfs_set_node_ptr_generation(parent, slot, trans->transid); |
1687 | btrfs_mark_buffer_dirty(parent); | 1680 | btrfs_mark_buffer_dirty(parent); |
@@ -1693,16 +1686,24 @@ next_level: | |||
1693 | btrfs_header_generation(parent), | 1686 | btrfs_header_generation(parent), |
1694 | level - 1); | 1687 | level - 1); |
1695 | BUG_ON(ret); | 1688 | BUG_ON(ret); |
1696 | ret = btrfs_free_extent(trans, root, bytenr, | ||
1697 | blocksize, parent->start, | ||
1698 | btrfs_header_owner(parent), | ||
1699 | btrfs_header_generation(parent), | ||
1700 | level - 1, 1); | ||
1701 | BUG_ON(ret); | ||
1702 | 1689 | ||
1690 | /* | ||
1691 | * If the block was created in the running transaction, | ||
1692 | * it's possible this is the last reference to it, so we | ||
1693 | * should drop the subtree. | ||
1694 | */ | ||
1703 | if (generation == trans->transid) { | 1695 | if (generation == trans->transid) { |
1696 | ret = btrfs_drop_subtree(trans, root, eb, parent); | ||
1697 | BUG_ON(ret); | ||
1704 | btrfs_tree_unlock(eb); | 1698 | btrfs_tree_unlock(eb); |
1705 | free_extent_buffer(eb); | 1699 | free_extent_buffer(eb); |
1700 | } else { | ||
1701 | ret = btrfs_free_extent(trans, root, bytenr, | ||
1702 | blocksize, parent->start, | ||
1703 | btrfs_header_owner(parent), | ||
1704 | btrfs_header_generation(parent), | ||
1705 | level - 1, 1); | ||
1706 | BUG_ON(ret); | ||
1706 | } | 1707 | } |
1707 | break; | 1708 | break; |
1708 | } | 1709 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 793d8fdda24..117090995e7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -684,7 +684,6 @@ struct btrfs_fs_info { | |||
684 | int thread_pool_size; | 684 | int thread_pool_size; |
685 | 685 | ||
686 | /* tree relocation relocated fields */ | 686 | /* tree relocation relocated fields */ |
687 | struct extent_io_tree reloc_mapping_tree; | ||
688 | struct list_head dead_reloc_roots; | 687 | struct list_head dead_reloc_roots; |
689 | struct btrfs_leaf_ref_tree reloc_ref_tree; | 688 | struct btrfs_leaf_ref_tree reloc_ref_tree; |
690 | struct btrfs_leaf_ref_tree shared_ref_tree; | 689 | struct btrfs_leaf_ref_tree shared_ref_tree; |
@@ -1636,13 +1635,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
1636 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 1635 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
1637 | struct btrfs_root *root, u64 group_start); | 1636 | struct btrfs_root *root, u64 group_start); |
1638 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); | 1637 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); |
1639 | int btrfs_free_reloc_root(struct btrfs_root *root); | 1638 | int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, |
1639 | struct btrfs_root *root); | ||
1640 | int btrfs_drop_dead_reloc_roots(struct btrfs_root *root); | 1640 | int btrfs_drop_dead_reloc_roots(struct btrfs_root *root); |
1641 | int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, | ||
1642 | u64 num_bytes, u64 new_bytenr); | ||
1643 | int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, | ||
1644 | u64 num_bytes, u64 *new_bytenr); | ||
1645 | void btrfs_free_reloc_mappings(struct btrfs_root *root); | ||
1646 | int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, | 1641 | int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, |
1647 | struct btrfs_root *root, | 1642 | struct btrfs_root *root, |
1648 | struct extent_buffer *buf, u64 orig_start); | 1643 | struct extent_buffer *buf, u64 orig_start); |
@@ -1726,6 +1721,10 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); | |||
1726 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); | 1721 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); |
1727 | int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | 1722 | int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root |
1728 | *root); | 1723 | *root); |
1724 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | ||
1725 | struct btrfs_root *root, | ||
1726 | struct extent_buffer *node, | ||
1727 | struct extent_buffer *parent); | ||
1729 | /* root-item.c */ | 1728 | /* root-item.c */ |
1730 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 1729 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
1731 | struct btrfs_key *key); | 1730 | struct btrfs_key *key); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dc95f636a11..796256440df 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1448,8 +1448,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1448 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1448 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
1449 | fs_info->do_barriers = 1; | 1449 | fs_info->do_barriers = 1; |
1450 | 1450 | ||
1451 | extent_io_tree_init(&fs_info->reloc_mapping_tree, | ||
1452 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1453 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); | 1451 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); |
1454 | btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); | 1452 | btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); |
1455 | btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); | 1453 | btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index bbf04e80a1a..56e41369d71 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3032,13 +3032,103 @@ out: | |||
3032 | } | 3032 | } |
3033 | 3033 | ||
3034 | /* | 3034 | /* |
3035 | * helper function for drop_subtree, this function is similar to | ||
3036 | * walk_down_tree. The main difference is that it checks reference | ||
3037 | * counts while tree blocks are locked. | ||
3038 | */ | ||
3039 | static int noinline walk_down_subtree(struct btrfs_trans_handle *trans, | ||
3040 | struct btrfs_root *root, | ||
3041 | struct btrfs_path *path, int *level) | ||
3042 | { | ||
3043 | struct extent_buffer *next; | ||
3044 | struct extent_buffer *cur; | ||
3045 | struct extent_buffer *parent; | ||
3046 | u64 bytenr; | ||
3047 | u64 ptr_gen; | ||
3048 | u32 blocksize; | ||
3049 | u32 refs; | ||
3050 | int ret; | ||
3051 | |||
3052 | cur = path->nodes[*level]; | ||
3053 | ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len, | ||
3054 | &refs); | ||
3055 | BUG_ON(ret); | ||
3056 | if (refs > 1) | ||
3057 | goto out; | ||
3058 | |||
3059 | while (*level >= 0) { | ||
3060 | cur = path->nodes[*level]; | ||
3061 | if (*level == 0) { | ||
3062 | ret = btrfs_drop_leaf_ref(trans, root, cur); | ||
3063 | BUG_ON(ret); | ||
3064 | clean_tree_block(trans, root, cur); | ||
3065 | break; | ||
3066 | } | ||
3067 | if (path->slots[*level] >= btrfs_header_nritems(cur)) { | ||
3068 | clean_tree_block(trans, root, cur); | ||
3069 | break; | ||
3070 | } | ||
3071 | |||
3072 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | ||
3073 | blocksize = btrfs_level_size(root, *level - 1); | ||
3074 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | ||
3075 | |||
3076 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | ||
3077 | btrfs_tree_lock(next); | ||
3078 | |||
3079 | ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, | ||
3080 | &refs); | ||
3081 | BUG_ON(ret); | ||
3082 | if (refs > 1) { | ||
3083 | parent = path->nodes[*level]; | ||
3084 | ret = btrfs_free_extent(trans, root, bytenr, | ||
3085 | blocksize, parent->start, | ||
3086 | btrfs_header_owner(parent), | ||
3087 | btrfs_header_generation(parent), | ||
3088 | *level - 1, 1); | ||
3089 | BUG_ON(ret); | ||
3090 | path->slots[*level]++; | ||
3091 | btrfs_tree_unlock(next); | ||
3092 | free_extent_buffer(next); | ||
3093 | continue; | ||
3094 | } | ||
3095 | |||
3096 | *level = btrfs_header_level(next); | ||
3097 | path->nodes[*level] = next; | ||
3098 | path->slots[*level] = 0; | ||
3099 | path->locks[*level] = 1; | ||
3100 | cond_resched(); | ||
3101 | } | ||
3102 | out: | ||
3103 | parent = path->nodes[*level + 1]; | ||
3104 | bytenr = path->nodes[*level]->start; | ||
3105 | blocksize = path->nodes[*level]->len; | ||
3106 | |||
3107 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, | ||
3108 | parent->start, btrfs_header_owner(parent), | ||
3109 | btrfs_header_generation(parent), *level, 1); | ||
3110 | BUG_ON(ret); | ||
3111 | |||
3112 | if (path->locks[*level]) { | ||
3113 | btrfs_tree_unlock(path->nodes[*level]); | ||
3114 | path->locks[*level] = 0; | ||
3115 | } | ||
3116 | free_extent_buffer(path->nodes[*level]); | ||
3117 | path->nodes[*level] = NULL; | ||
3118 | *level += 1; | ||
3119 | cond_resched(); | ||
3120 | return 0; | ||
3121 | } | ||
3122 | |||
3123 | /* | ||
3035 | * helper for dropping snapshots. This walks back up the tree in the path | 3124 | * helper for dropping snapshots. This walks back up the tree in the path |
3036 | * to find the first node higher up where we haven't yet gone through | 3125 | * to find the first node higher up where we haven't yet gone through |
3037 | * all the slots | 3126 | * all the slots |
3038 | */ | 3127 | */ |
3039 | static int noinline walk_up_tree(struct btrfs_trans_handle *trans, | 3128 | static int noinline walk_up_tree(struct btrfs_trans_handle *trans, |
3040 | struct btrfs_root *root, | 3129 | struct btrfs_root *root, |
3041 | struct btrfs_path *path, int *level) | 3130 | struct btrfs_path *path, |
3131 | int *level, int max_level) | ||
3042 | { | 3132 | { |
3043 | u64 root_owner; | 3133 | u64 root_owner; |
3044 | u64 root_gen; | 3134 | u64 root_gen; |
@@ -3047,7 +3137,7 @@ static int noinline walk_up_tree(struct btrfs_trans_handle *trans, | |||
3047 | int slot; | 3137 | int slot; |
3048 | int ret; | 3138 | int ret; |
3049 | 3139 | ||
3050 | for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { | 3140 | for (i = *level; i < max_level && path->nodes[i]; i++) { |
3051 | slot = path->slots[i]; | 3141 | slot = path->slots[i]; |
3052 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { | 3142 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { |
3053 | struct extent_buffer *node; | 3143 | struct extent_buffer *node; |
@@ -3070,12 +3160,18 @@ static int noinline walk_up_tree(struct btrfs_trans_handle *trans, | |||
3070 | 3160 | ||
3071 | root_owner = btrfs_header_owner(parent); | 3161 | root_owner = btrfs_header_owner(parent); |
3072 | root_gen = btrfs_header_generation(parent); | 3162 | root_gen = btrfs_header_generation(parent); |
3163 | |||
3164 | clean_tree_block(trans, root, path->nodes[*level]); | ||
3073 | ret = btrfs_free_extent(trans, root, | 3165 | ret = btrfs_free_extent(trans, root, |
3074 | path->nodes[*level]->start, | 3166 | path->nodes[*level]->start, |
3075 | path->nodes[*level]->len, | 3167 | path->nodes[*level]->len, |
3076 | parent->start, root_owner, | 3168 | parent->start, root_owner, |
3077 | root_gen, *level, 1); | 3169 | root_gen, *level, 1); |
3078 | BUG_ON(ret); | 3170 | BUG_ON(ret); |
3171 | if (path->locks[*level]) { | ||
3172 | btrfs_tree_unlock(path->nodes[*level]); | ||
3173 | path->locks[*level] = 0; | ||
3174 | } | ||
3079 | free_extent_buffer(path->nodes[*level]); | 3175 | free_extent_buffer(path->nodes[*level]); |
3080 | path->nodes[*level] = NULL; | 3176 | path->nodes[*level] = NULL; |
3081 | *level = i + 1; | 3177 | *level = i + 1; |
@@ -3145,7 +3241,8 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3145 | if (wret < 0) | 3241 | if (wret < 0) |
3146 | ret = wret; | 3242 | ret = wret; |
3147 | 3243 | ||
3148 | wret = walk_up_tree(trans, root, path, &level); | 3244 | wret = walk_up_tree(trans, root, path, &level, |
3245 | BTRFS_MAX_LEVEL); | ||
3149 | if (wret > 0) | 3246 | if (wret > 0) |
3150 | break; | 3247 | break; |
3151 | if (wret < 0) | 3248 | if (wret < 0) |
@@ -3168,6 +3265,50 @@ out: | |||
3168 | return ret; | 3265 | return ret; |
3169 | } | 3266 | } |
3170 | 3267 | ||
3268 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | ||
3269 | struct btrfs_root *root, | ||
3270 | struct extent_buffer *node, | ||
3271 | struct extent_buffer *parent) | ||
3272 | { | ||
3273 | struct btrfs_path *path; | ||
3274 | int level; | ||
3275 | int parent_level; | ||
3276 | int ret = 0; | ||
3277 | int wret; | ||
3278 | |||
3279 | path = btrfs_alloc_path(); | ||
3280 | BUG_ON(!path); | ||
3281 | |||
3282 | BUG_ON(!btrfs_tree_locked(parent)); | ||
3283 | parent_level = btrfs_header_level(parent); | ||
3284 | extent_buffer_get(parent); | ||
3285 | path->nodes[parent_level] = parent; | ||
3286 | path->slots[parent_level] = btrfs_header_nritems(parent); | ||
3287 | |||
3288 | BUG_ON(!btrfs_tree_locked(node)); | ||
3289 | level = btrfs_header_level(node); | ||
3290 | extent_buffer_get(node); | ||
3291 | path->nodes[level] = node; | ||
3292 | path->slots[level] = 0; | ||
3293 | |||
3294 | while (1) { | ||
3295 | wret = walk_down_subtree(trans, root, path, &level); | ||
3296 | if (wret < 0) | ||
3297 | ret = wret; | ||
3298 | if (wret != 0) | ||
3299 | break; | ||
3300 | |||
3301 | wret = walk_up_tree(trans, root, path, &level, parent_level); | ||
3302 | if (wret < 0) | ||
3303 | ret = wret; | ||
3304 | if (wret != 0) | ||
3305 | break; | ||
3306 | } | ||
3307 | |||
3308 | btrfs_free_path(path); | ||
3309 | return ret; | ||
3310 | } | ||
3311 | |||
3171 | static unsigned long calc_ra(unsigned long start, unsigned long last, | 3312 | static unsigned long calc_ra(unsigned long start, unsigned long last, |
3172 | unsigned long nr) | 3313 | unsigned long nr) |
3173 | { | 3314 | { |
@@ -3312,6 +3453,10 @@ struct btrfs_ref_path { | |||
3312 | u32 num_refs; | 3453 | u32 num_refs; |
3313 | int lowest_level; | 3454 | int lowest_level; |
3314 | int current_level; | 3455 | int current_level; |
3456 | int shared_level; | ||
3457 | |||
3458 | struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; | ||
3459 | u64 new_nodes[BTRFS_MAX_LEVEL]; | ||
3315 | }; | 3460 | }; |
3316 | 3461 | ||
3317 | struct disk_extent { | 3462 | struct disk_extent { |
@@ -3360,6 +3505,7 @@ static int noinline __next_ref_path(struct btrfs_trans_handle *trans, | |||
3360 | if (first_time) { | 3505 | if (first_time) { |
3361 | ref_path->lowest_level = -1; | 3506 | ref_path->lowest_level = -1; |
3362 | ref_path->current_level = -1; | 3507 | ref_path->current_level = -1; |
3508 | ref_path->shared_level = -1; | ||
3363 | goto walk_up; | 3509 | goto walk_up; |
3364 | } | 3510 | } |
3365 | walk_down: | 3511 | walk_down: |
@@ -3403,8 +3549,11 @@ walk_down: | |||
3403 | 3549 | ||
3404 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 3550 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
3405 | if (found_key.objectid == bytenr && | 3551 | if (found_key.objectid == bytenr && |
3406 | found_key.type == BTRFS_EXTENT_REF_KEY) | 3552 | found_key.type == BTRFS_EXTENT_REF_KEY) { |
3553 | if (level < ref_path->shared_level) | ||
3554 | ref_path->shared_level = level; | ||
3407 | goto found; | 3555 | goto found; |
3556 | } | ||
3408 | next: | 3557 | next: |
3409 | level--; | 3558 | level--; |
3410 | btrfs_release_path(extent_root, path); | 3559 | btrfs_release_path(extent_root, path); |
@@ -3992,51 +4141,6 @@ out: | |||
3992 | return ret; | 4141 | return ret; |
3993 | } | 4142 | } |
3994 | 4143 | ||
3995 | int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, | ||
3996 | u64 num_bytes, u64 new_bytenr) | ||
3997 | { | ||
3998 | set_extent_bits(&root->fs_info->reloc_mapping_tree, | ||
3999 | orig_bytenr, orig_bytenr + num_bytes - 1, | ||
4000 | EXTENT_LOCKED, GFP_NOFS); | ||
4001 | set_state_private(&root->fs_info->reloc_mapping_tree, | ||
4002 | orig_bytenr, new_bytenr); | ||
4003 | return 0; | ||
4004 | } | ||
4005 | |||
4006 | int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, | ||
4007 | u64 num_bytes, u64 *new_bytenr) | ||
4008 | { | ||
4009 | u64 bytenr; | ||
4010 | u64 cur_bytenr = orig_bytenr; | ||
4011 | u64 prev_bytenr = orig_bytenr; | ||
4012 | int ret; | ||
4013 | |||
4014 | while (1) { | ||
4015 | ret = get_state_private(&root->fs_info->reloc_mapping_tree, | ||
4016 | cur_bytenr, &bytenr); | ||
4017 | if (ret) | ||
4018 | break; | ||
4019 | prev_bytenr = cur_bytenr; | ||
4020 | cur_bytenr = bytenr; | ||
4021 | } | ||
4022 | |||
4023 | if (orig_bytenr == cur_bytenr) | ||
4024 | return -ENOENT; | ||
4025 | |||
4026 | if (prev_bytenr != orig_bytenr) { | ||
4027 | set_state_private(&root->fs_info->reloc_mapping_tree, | ||
4028 | orig_bytenr, cur_bytenr); | ||
4029 | } | ||
4030 | *new_bytenr = cur_bytenr; | ||
4031 | return 0; | ||
4032 | } | ||
4033 | |||
4034 | void btrfs_free_reloc_mappings(struct btrfs_root *root) | ||
4035 | { | ||
4036 | clear_extent_bits(&root->fs_info->reloc_mapping_tree, | ||
4037 | 0, (u64)-1, -1, GFP_NOFS); | ||
4038 | } | ||
4039 | |||
4040 | int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, | 4144 | int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, |
4041 | struct btrfs_root *root, | 4145 | struct btrfs_root *root, |
4042 | struct extent_buffer *buf, u64 orig_start) | 4146 | struct extent_buffer *buf, u64 orig_start) |
@@ -4222,15 +4326,30 @@ static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, | |||
4222 | return 0; | 4326 | return 0; |
4223 | } | 4327 | } |
4224 | 4328 | ||
4225 | int btrfs_free_reloc_root(struct btrfs_root *root) | 4329 | int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, |
4330 | struct btrfs_root *root) | ||
4226 | { | 4331 | { |
4227 | struct btrfs_root *reloc_root; | 4332 | struct btrfs_root *reloc_root; |
4333 | int ret; | ||
4228 | 4334 | ||
4229 | if (root->reloc_root) { | 4335 | if (root->reloc_root) { |
4230 | reloc_root = root->reloc_root; | 4336 | reloc_root = root->reloc_root; |
4231 | root->reloc_root = NULL; | 4337 | root->reloc_root = NULL; |
4232 | list_add(&reloc_root->dead_list, | 4338 | list_add(&reloc_root->dead_list, |
4233 | &root->fs_info->dead_reloc_roots); | 4339 | &root->fs_info->dead_reloc_roots); |
4340 | |||
4341 | btrfs_set_root_bytenr(&reloc_root->root_item, | ||
4342 | reloc_root->node->start); | ||
4343 | btrfs_set_root_level(&root->root_item, | ||
4344 | btrfs_header_level(reloc_root->node)); | ||
4345 | memset(&reloc_root->root_item.drop_progress, 0, | ||
4346 | sizeof(struct btrfs_disk_key)); | ||
4347 | reloc_root->root_item.drop_level = 0; | ||
4348 | |||
4349 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | ||
4350 | &reloc_root->root_key, | ||
4351 | &reloc_root->root_item); | ||
4352 | BUG_ON(ret); | ||
4234 | } | 4353 | } |
4235 | return 0; | 4354 | return 0; |
4236 | } | 4355 | } |
@@ -4356,8 +4475,6 @@ static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, | |||
4356 | btrfs_set_root_refs(root_item, 0); | 4475 | btrfs_set_root_refs(root_item, 0); |
4357 | btrfs_set_root_bytenr(root_item, eb->start); | 4476 | btrfs_set_root_bytenr(root_item, eb->start); |
4358 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); | 4477 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); |
4359 | memset(&root_item->drop_progress, 0, sizeof(root_item->drop_progress)); | ||
4360 | root_item->drop_level = 0; | ||
4361 | 4478 | ||
4362 | btrfs_tree_unlock(eb); | 4479 | btrfs_tree_unlock(eb); |
4363 | free_extent_buffer(eb); | 4480 | free_extent_buffer(eb); |
@@ -4382,15 +4499,19 @@ static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, | |||
4382 | * Core function of space balance. | 4499 | * Core function of space balance. |
4383 | * | 4500 | * |
4384 | * The idea is using reloc trees to relocate tree blocks in reference | 4501 | * The idea is using reloc trees to relocate tree blocks in reference |
4385 | * counted roots. There is one reloc tree for each subvol, all reloc | 4502 | * counted roots. There is one reloc tree for each subvol, and all |
4386 | * trees share same key objectid. Reloc trees are snapshots of the | 4503 | * reloc trees share same root key objectid. Reloc trees are snapshots |
4387 | * latest committed roots (subvol root->commit_root). To relocate a tree | 4504 | * of the latest committed roots of subvols (root->commit_root). |
4388 | * block referenced by a subvol, the code COW the block through the reloc | 4505 | * |
4389 | * tree, then update pointer in the subvol to point to the new block. | 4506 | * To relocate a tree block referenced by a subvol, there are two steps. |
4390 | * Since all reloc trees share same key objectid, we can easily do special | 4507 | * COW the block through subvol's reloc tree, then update block pointer |
4391 | * handing to share tree blocks between reloc trees. Once a tree block has | 4508 | * in the subvol to point to the new block. Since all reloc trees share |
4392 | * been COWed in one reloc tree, we can use the result when the same block | 4509 | * same root key objectid, doing special handing for tree blocks owned |
4393 | * is COWed again through other reloc trees. | 4510 | * by them is easy. Once a tree block has been COWed in one reloc tree, |
4511 | * we can use the resulting new block directly when the same block is | ||
4512 | * required to COW again through other reloc trees. By this way, relocated | ||
4513 | * tree blocks are shared between reloc trees, so they are also shared | ||
4514 | * between subvols. | ||
4394 | */ | 4515 | */ |
4395 | static int noinline relocate_one_path(struct btrfs_trans_handle *trans, | 4516 | static int noinline relocate_one_path(struct btrfs_trans_handle *trans, |
4396 | struct btrfs_root *root, | 4517 | struct btrfs_root *root, |
@@ -4405,15 +4526,14 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans, | |||
4405 | struct btrfs_key *keys; | 4526 | struct btrfs_key *keys; |
4406 | u64 *nodes; | 4527 | u64 *nodes; |
4407 | int level; | 4528 | int level; |
4408 | int lowest_merge; | 4529 | int shared_level; |
4409 | int lowest_level = 0; | 4530 | int lowest_level = 0; |
4410 | int update_refs; | ||
4411 | int ret; | 4531 | int ret; |
4412 | 4532 | ||
4413 | if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | 4533 | if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) |
4414 | lowest_level = ref_path->owner_objectid; | 4534 | lowest_level = ref_path->owner_objectid; |
4415 | 4535 | ||
4416 | if (is_cowonly_root(ref_path->root_objectid)) { | 4536 | if (!root->ref_cows) { |
4417 | path->lowest_level = lowest_level; | 4537 | path->lowest_level = lowest_level; |
4418 | ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); | 4538 | ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); |
4419 | BUG_ON(ret < 0); | 4539 | BUG_ON(ret < 0); |
@@ -4422,91 +4542,49 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans, | |||
4422 | return 0; | 4542 | return 0; |
4423 | } | 4543 | } |
4424 | 4544 | ||
4425 | keys = kzalloc(sizeof(*keys) * BTRFS_MAX_LEVEL, GFP_NOFS); | ||
4426 | BUG_ON(!keys); | ||
4427 | nodes = kzalloc(sizeof(*nodes) * BTRFS_MAX_LEVEL, GFP_NOFS); | ||
4428 | BUG_ON(!nodes); | ||
4429 | |||
4430 | mutex_lock(&root->fs_info->tree_reloc_mutex); | 4545 | mutex_lock(&root->fs_info->tree_reloc_mutex); |
4431 | ret = init_reloc_tree(trans, root); | 4546 | ret = init_reloc_tree(trans, root); |
4432 | BUG_ON(ret); | 4547 | BUG_ON(ret); |
4433 | reloc_root = root->reloc_root; | 4548 | reloc_root = root->reloc_root; |
4434 | 4549 | ||
4435 | path->lowest_level = lowest_level; | 4550 | shared_level = ref_path->shared_level; |
4436 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 0); | 4551 | ref_path->shared_level = BTRFS_MAX_LEVEL - 1; |
4437 | BUG_ON(ret); | ||
4438 | /* | ||
4439 | * get relocation mapping for tree blocks in the path | ||
4440 | */ | ||
4441 | lowest_merge = BTRFS_MAX_LEVEL; | ||
4442 | for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) { | ||
4443 | u64 new_bytenr; | ||
4444 | eb = path->nodes[level]; | ||
4445 | if (!eb || eb == reloc_root->node) | ||
4446 | continue; | ||
4447 | ret = btrfs_get_reloc_mapping(reloc_root, eb->start, eb->len, | ||
4448 | &new_bytenr); | ||
4449 | if (ret) | ||
4450 | continue; | ||
4451 | if (level == 0) | ||
4452 | btrfs_item_key_to_cpu(eb, &keys[level], 0); | ||
4453 | else | ||
4454 | btrfs_node_key_to_cpu(eb, &keys[level], 0); | ||
4455 | nodes[level] = new_bytenr; | ||
4456 | lowest_merge = level; | ||
4457 | } | ||
4458 | 4552 | ||
4459 | update_refs = 0; | 4553 | keys = ref_path->node_keys; |
4460 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | 4554 | nodes = ref_path->new_nodes; |
4461 | eb = path->nodes[0]; | 4555 | memset(&keys[shared_level + 1], 0, |
4462 | if (btrfs_header_generation(eb) < trans->transid) | 4556 | sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); |
4463 | update_refs = 1; | 4557 | memset(&nodes[shared_level + 1], 0, |
4464 | } | 4558 | sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); |
4465 | 4559 | ||
4466 | btrfs_release_path(reloc_root, path); | 4560 | if (nodes[lowest_level] == 0) { |
4467 | /* | 4561 | path->lowest_level = lowest_level; |
4468 | * merge tree blocks that already relocated in other reloc trees | 4562 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, |
4469 | */ | 4563 | 0, 1); |
4470 | if (lowest_merge != BTRFS_MAX_LEVEL) { | 4564 | BUG_ON(ret); |
4565 | for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { | ||
4566 | eb = path->nodes[level]; | ||
4567 | if (!eb || eb == reloc_root->node) | ||
4568 | break; | ||
4569 | nodes[level] = eb->start; | ||
4570 | if (level == 0) | ||
4571 | btrfs_item_key_to_cpu(eb, &keys[level], 0); | ||
4572 | else | ||
4573 | btrfs_node_key_to_cpu(eb, &keys[level], 0); | ||
4574 | } | ||
4575 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
4576 | eb = path->nodes[0]; | ||
4577 | ret = replace_extents_in_leaf(trans, reloc_root, eb, | ||
4578 | group, reloc_inode); | ||
4579 | BUG_ON(ret); | ||
4580 | } | ||
4581 | btrfs_release_path(reloc_root, path); | ||
4582 | } else { | ||
4471 | ret = btrfs_merge_path(trans, reloc_root, keys, nodes, | 4583 | ret = btrfs_merge_path(trans, reloc_root, keys, nodes, |
4472 | lowest_merge); | 4584 | lowest_level); |
4473 | BUG_ON(ret < 0); | ||
4474 | } | ||
4475 | /* | ||
4476 | * cow any tree blocks that still haven't been relocated | ||
4477 | */ | ||
4478 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 1); | ||
4479 | BUG_ON(ret); | ||
4480 | /* | ||
4481 | * if we are relocating data block group, update extent pointers | ||
4482 | * in the newly created tree leaf. | ||
4483 | */ | ||
4484 | eb = path->nodes[0]; | ||
4485 | if (update_refs && nodes[0] != eb->start) { | ||
4486 | ret = replace_extents_in_leaf(trans, reloc_root, eb, group, | ||
4487 | reloc_inode); | ||
4488 | BUG_ON(ret); | 4585 | BUG_ON(ret); |
4489 | } | 4586 | } |
4490 | 4587 | ||
4491 | memset(keys, 0, sizeof(*keys) * BTRFS_MAX_LEVEL); | ||
4492 | memset(nodes, 0, sizeof(*nodes) * BTRFS_MAX_LEVEL); | ||
4493 | for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) { | ||
4494 | eb = path->nodes[level]; | ||
4495 | if (!eb || eb == reloc_root->node) | ||
4496 | continue; | ||
4497 | BUG_ON(btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID); | ||
4498 | nodes[level] = eb->start; | ||
4499 | if (level == 0) | ||
4500 | btrfs_item_key_to_cpu(eb, &keys[level], 0); | ||
4501 | else | ||
4502 | btrfs_node_key_to_cpu(eb, &keys[level], 0); | ||
4503 | } | ||
4504 | |||
4505 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | ||
4506 | eb = path->nodes[0]; | ||
4507 | extent_buffer_get(eb); | ||
4508 | } | ||
4509 | btrfs_release_path(reloc_root, path); | ||
4510 | /* | 4588 | /* |
4511 | * replace tree blocks in the fs tree with tree blocks in | 4589 | * replace tree blocks in the fs tree with tree blocks in |
4512 | * the reloc tree. | 4590 | * the reloc tree. |
@@ -4515,15 +4593,19 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans, | |||
4515 | BUG_ON(ret < 0); | 4593 | BUG_ON(ret < 0); |
4516 | 4594 | ||
4517 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | 4595 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { |
4596 | ret = btrfs_search_slot(trans, reloc_root, first_key, path, | ||
4597 | 0, 0); | ||
4598 | BUG_ON(ret); | ||
4599 | extent_buffer_get(path->nodes[0]); | ||
4600 | eb = path->nodes[0]; | ||
4601 | btrfs_release_path(reloc_root, path); | ||
4518 | ret = invalidate_extent_cache(reloc_root, eb, group, root); | 4602 | ret = invalidate_extent_cache(reloc_root, eb, group, root); |
4519 | BUG_ON(ret); | 4603 | BUG_ON(ret); |
4520 | free_extent_buffer(eb); | 4604 | free_extent_buffer(eb); |
4521 | } | 4605 | } |
4522 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
4523 | 4606 | ||
4607 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
4524 | path->lowest_level = 0; | 4608 | path->lowest_level = 0; |
4525 | kfree(nodes); | ||
4526 | kfree(keys); | ||
4527 | return 0; | 4609 | return 0; |
4528 | } | 4610 | } |
4529 | 4611 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5ecc24d634a..1df67129cc3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -521,7 +521,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, | |||
521 | dirty = root->dirty_root; | 521 | dirty = root->dirty_root; |
522 | 522 | ||
523 | btrfs_free_log(trans, root); | 523 | btrfs_free_log(trans, root); |
524 | btrfs_free_reloc_root(root); | 524 | btrfs_free_reloc_root(trans, root); |
525 | 525 | ||
526 | if (root->commit_root == root->node) { | 526 | if (root->commit_root == root->node) { |
527 | WARN_ON(root->node->start != | 527 | WARN_ON(root->node->start != |
@@ -930,8 +930,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
930 | */ | 930 | */ |
931 | btrfs_free_log_root_tree(trans, root->fs_info); | 931 | btrfs_free_log_root_tree(trans, root->fs_info); |
932 | 932 | ||
933 | btrfs_free_reloc_mappings(root); | ||
934 | |||
935 | ret = btrfs_commit_tree_roots(trans, root); | 933 | ret = btrfs_commit_tree_roots(trans, root); |
936 | BUG_ON(ret); | 934 | BUG_ON(ret); |
937 | 935 | ||