aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/ctree.c93
-rw-r--r--fs/btrfs/ctree.h13
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c364
-rw-r--r--fs/btrfs/transaction.c4
5 files changed, 277 insertions, 199 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9caeb377de6..73899d0f9d8 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -287,7 +287,7 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
287 /* 287 /*
288 * There are only two places that can drop reference to 288 * There are only two places that can drop reference to
289 * tree blocks owned by living reloc trees, one is here, 289 * tree blocks owned by living reloc trees, one is here,
290 * the other place is btrfs_merge_path. In both places, 290 * the other place is btrfs_drop_subtree. In both places,
291 * we check reference count while tree block is locked. 291 * we check reference count while tree block is locked.
292 * Furthermore, if reference count is one, it won't get 292 * Furthermore, if reference count is one, it won't get
293 * increased by someone else. 293 * increased by someone else.
@@ -312,9 +312,6 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
312 } 312 }
313 313
314 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 314 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
315 ret = btrfs_add_reloc_mapping(root, buf->start,
316 buf->len, cow->start);
317 BUG_ON(ret);
318 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); 315 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
319 WARN_ON(ret); 316 WARN_ON(ret);
320 } 317 }
@@ -1627,61 +1624,57 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
1627 btrfs_node_key_to_cpu(eb, &key, slot); 1624 btrfs_node_key_to_cpu(eb, &key, slot);
1628 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); 1625 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
1629 1626
1627 if (generation == trans->transid) {
1628 eb = read_tree_block(root, bytenr, blocksize,
1629 generation);
1630 btrfs_tree_lock(eb);
1631 }
1632
1630 /* 1633 /*
1631 * if node keys match and node pointer hasn't been modified 1634 * if node keys match and node pointer hasn't been modified
1632 * in the running transaction, we can merge the path. for 1635 * in the running transaction, we can merge the path. for
1633 * reloc trees, the node pointer check is skipped, this is 1636 * blocks owened by reloc trees, the node pointer check is
1634 * because the reloc trees are fully controlled by the space 1637 * skipped, this is because these blocks are fully controlled
1635 * balance code, no one else can modify them. 1638 * by the space balance code, no one else can modify them.
1636 */ 1639 */
1637 if (!nodes[level - 1] || !key_match || 1640 if (!nodes[level - 1] || !key_match ||
1638 (generation == trans->transid && 1641 (generation == trans->transid &&
1639 root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) { 1642 btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
1640next_level: 1643 if (level == 1 || level == lowest_level + 1) {
1641 if (level == 1 || level == lowest_level + 1) 1644 if (generation == trans->transid) {
1645 btrfs_tree_unlock(eb);
1646 free_extent_buffer(eb);
1647 }
1642 break; 1648 break;
1649 }
1643 1650
1644 eb = read_tree_block(root, bytenr, blocksize, 1651 if (generation != trans->transid) {
1645 generation); 1652 eb = read_tree_block(root, bytenr, blocksize,
1646 btrfs_tree_lock(eb); 1653 generation);
1654 btrfs_tree_lock(eb);
1655 }
1647 1656
1648 ret = btrfs_cow_block(trans, root, eb, parent, slot, 1657 ret = btrfs_cow_block(trans, root, eb, parent, slot,
1649 &eb, 0); 1658 &eb, 0);
1650 BUG_ON(ret); 1659 BUG_ON(ret);
1651 1660
1661 if (root->root_key.objectid ==
1662 BTRFS_TREE_RELOC_OBJECTID) {
1663 if (!nodes[level - 1]) {
1664 nodes[level - 1] = eb->start;
1665 memcpy(&node_keys[level - 1], &key,
1666 sizeof(node_keys[0]));
1667 } else {
1668 WARN_ON(1);
1669 }
1670 }
1671
1652 btrfs_tree_unlock(parent); 1672 btrfs_tree_unlock(parent);
1653 free_extent_buffer(parent); 1673 free_extent_buffer(parent);
1654 parent = eb; 1674 parent = eb;
1655 continue; 1675 continue;
1656 } 1676 }
1657 1677
1658 if (generation == trans->transid) {
1659 u32 refs;
1660 BUG_ON(btrfs_header_owner(eb) !=
1661 BTRFS_TREE_RELOC_OBJECTID);
1662 /*
1663 * lock the block to keep __btrfs_cow_block from
1664 * changing the reference count.
1665 */
1666 eb = read_tree_block(root, bytenr, blocksize,
1667 generation);
1668 btrfs_tree_lock(eb);
1669
1670 ret = btrfs_lookup_extent_ref(trans, root, bytenr,
1671 blocksize, &refs);
1672 BUG_ON(ret);
1673 /*
1674 * if replace block whose reference count is one,
1675 * we have to "drop the subtree". so skip it for
1676 * simplicity
1677 */
1678 if (refs == 1) {
1679 btrfs_tree_unlock(eb);
1680 free_extent_buffer(eb);
1681 goto next_level;
1682 }
1683 }
1684
1685 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); 1678 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
1686 btrfs_set_node_ptr_generation(parent, slot, trans->transid); 1679 btrfs_set_node_ptr_generation(parent, slot, trans->transid);
1687 btrfs_mark_buffer_dirty(parent); 1680 btrfs_mark_buffer_dirty(parent);
@@ -1693,16 +1686,24 @@ next_level:
1693 btrfs_header_generation(parent), 1686 btrfs_header_generation(parent),
1694 level - 1); 1687 level - 1);
1695 BUG_ON(ret); 1688 BUG_ON(ret);
1696 ret = btrfs_free_extent(trans, root, bytenr,
1697 blocksize, parent->start,
1698 btrfs_header_owner(parent),
1699 btrfs_header_generation(parent),
1700 level - 1, 1);
1701 BUG_ON(ret);
1702 1689
1690 /*
1691 * If the block was created in the running transaction,
1692 * it's possible this is the last reference to it, so we
1693 * should drop the subtree.
1694 */
1703 if (generation == trans->transid) { 1695 if (generation == trans->transid) {
1696 ret = btrfs_drop_subtree(trans, root, eb, parent);
1697 BUG_ON(ret);
1704 btrfs_tree_unlock(eb); 1698 btrfs_tree_unlock(eb);
1705 free_extent_buffer(eb); 1699 free_extent_buffer(eb);
1700 } else {
1701 ret = btrfs_free_extent(trans, root, bytenr,
1702 blocksize, parent->start,
1703 btrfs_header_owner(parent),
1704 btrfs_header_generation(parent),
1705 level - 1, 1);
1706 BUG_ON(ret);
1706 } 1707 }
1707 break; 1708 break;
1708 } 1709 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 793d8fdda24..117090995e7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -684,7 +684,6 @@ struct btrfs_fs_info {
684 int thread_pool_size; 684 int thread_pool_size;
685 685
686 /* tree relocation relocated fields */ 686 /* tree relocation relocated fields */
687 struct extent_io_tree reloc_mapping_tree;
688 struct list_head dead_reloc_roots; 687 struct list_head dead_reloc_roots;
689 struct btrfs_leaf_ref_tree reloc_ref_tree; 688 struct btrfs_leaf_ref_tree reloc_ref_tree;
690 struct btrfs_leaf_ref_tree shared_ref_tree; 689 struct btrfs_leaf_ref_tree shared_ref_tree;
@@ -1636,13 +1635,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
1636int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 1635int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
1637 struct btrfs_root *root, u64 group_start); 1636 struct btrfs_root *root, u64 group_start);
1638int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); 1637int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
1639int btrfs_free_reloc_root(struct btrfs_root *root); 1638int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
1639 struct btrfs_root *root);
1640int btrfs_drop_dead_reloc_roots(struct btrfs_root *root); 1640int btrfs_drop_dead_reloc_roots(struct btrfs_root *root);
1641int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
1642 u64 num_bytes, u64 new_bytenr);
1643int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
1644 u64 num_bytes, u64 *new_bytenr);
1645void btrfs_free_reloc_mappings(struct btrfs_root *root);
1646int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, 1641int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
1647 struct btrfs_root *root, 1642 struct btrfs_root *root,
1648 struct extent_buffer *buf, u64 orig_start); 1643 struct extent_buffer *buf, u64 orig_start);
@@ -1726,6 +1721,10 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
1726int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); 1721int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
1727int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 1722int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1728 *root); 1723 *root);
1724int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
1725 struct btrfs_root *root,
1726 struct extent_buffer *node,
1727 struct extent_buffer *parent);
1729/* root-item.c */ 1728/* root-item.c */
1730int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1729int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1731 struct btrfs_key *key); 1730 struct btrfs_key *key);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index dc95f636a11..796256440df 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1448,8 +1448,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1448 fs_info->btree_inode->i_mapping, GFP_NOFS); 1448 fs_info->btree_inode->i_mapping, GFP_NOFS);
1449 fs_info->do_barriers = 1; 1449 fs_info->do_barriers = 1;
1450 1450
1451 extent_io_tree_init(&fs_info->reloc_mapping_tree,
1452 fs_info->btree_inode->i_mapping, GFP_NOFS);
1453 INIT_LIST_HEAD(&fs_info->dead_reloc_roots); 1451 INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
1454 btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); 1452 btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree);
1455 btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); 1453 btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index bbf04e80a1a..56e41369d71 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3032,13 +3032,103 @@ out:
3032} 3032}
3033 3033
3034/* 3034/*
3035 * helper function for drop_subtree, this function is similar to
3036 * walk_down_tree. The main difference is that it checks reference
3037 * counts while tree blocks are locked.
3038 */
3039static int noinline walk_down_subtree(struct btrfs_trans_handle *trans,
3040 struct btrfs_root *root,
3041 struct btrfs_path *path, int *level)
3042{
3043 struct extent_buffer *next;
3044 struct extent_buffer *cur;
3045 struct extent_buffer *parent;
3046 u64 bytenr;
3047 u64 ptr_gen;
3048 u32 blocksize;
3049 u32 refs;
3050 int ret;
3051
3052 cur = path->nodes[*level];
3053 ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len,
3054 &refs);
3055 BUG_ON(ret);
3056 if (refs > 1)
3057 goto out;
3058
3059 while (*level >= 0) {
3060 cur = path->nodes[*level];
3061 if (*level == 0) {
3062 ret = btrfs_drop_leaf_ref(trans, root, cur);
3063 BUG_ON(ret);
3064 clean_tree_block(trans, root, cur);
3065 break;
3066 }
3067 if (path->slots[*level] >= btrfs_header_nritems(cur)) {
3068 clean_tree_block(trans, root, cur);
3069 break;
3070 }
3071
3072 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
3073 blocksize = btrfs_level_size(root, *level - 1);
3074 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
3075
3076 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
3077 btrfs_tree_lock(next);
3078
3079 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
3080 &refs);
3081 BUG_ON(ret);
3082 if (refs > 1) {
3083 parent = path->nodes[*level];
3084 ret = btrfs_free_extent(trans, root, bytenr,
3085 blocksize, parent->start,
3086 btrfs_header_owner(parent),
3087 btrfs_header_generation(parent),
3088 *level - 1, 1);
3089 BUG_ON(ret);
3090 path->slots[*level]++;
3091 btrfs_tree_unlock(next);
3092 free_extent_buffer(next);
3093 continue;
3094 }
3095
3096 *level = btrfs_header_level(next);
3097 path->nodes[*level] = next;
3098 path->slots[*level] = 0;
3099 path->locks[*level] = 1;
3100 cond_resched();
3101 }
3102out:
3103 parent = path->nodes[*level + 1];
3104 bytenr = path->nodes[*level]->start;
3105 blocksize = path->nodes[*level]->len;
3106
3107 ret = btrfs_free_extent(trans, root, bytenr, blocksize,
3108 parent->start, btrfs_header_owner(parent),
3109 btrfs_header_generation(parent), *level, 1);
3110 BUG_ON(ret);
3111
3112 if (path->locks[*level]) {
3113 btrfs_tree_unlock(path->nodes[*level]);
3114 path->locks[*level] = 0;
3115 }
3116 free_extent_buffer(path->nodes[*level]);
3117 path->nodes[*level] = NULL;
3118 *level += 1;
3119 cond_resched();
3120 return 0;
3121}
3122
3123/*
3035 * helper for dropping snapshots. This walks back up the tree in the path 3124 * helper for dropping snapshots. This walks back up the tree in the path
3036 * to find the first node higher up where we haven't yet gone through 3125 * to find the first node higher up where we haven't yet gone through
3037 * all the slots 3126 * all the slots
3038 */ 3127 */
3039static int noinline walk_up_tree(struct btrfs_trans_handle *trans, 3128static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3040 struct btrfs_root *root, 3129 struct btrfs_root *root,
3041 struct btrfs_path *path, int *level) 3130 struct btrfs_path *path,
3131 int *level, int max_level)
3042{ 3132{
3043 u64 root_owner; 3133 u64 root_owner;
3044 u64 root_gen; 3134 u64 root_gen;
@@ -3047,7 +3137,7 @@ static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3047 int slot; 3137 int slot;
3048 int ret; 3138 int ret;
3049 3139
3050 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { 3140 for (i = *level; i < max_level && path->nodes[i]; i++) {
3051 slot = path->slots[i]; 3141 slot = path->slots[i];
3052 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { 3142 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3053 struct extent_buffer *node; 3143 struct extent_buffer *node;
@@ -3070,12 +3160,18 @@ static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3070 3160
3071 root_owner = btrfs_header_owner(parent); 3161 root_owner = btrfs_header_owner(parent);
3072 root_gen = btrfs_header_generation(parent); 3162 root_gen = btrfs_header_generation(parent);
3163
3164 clean_tree_block(trans, root, path->nodes[*level]);
3073 ret = btrfs_free_extent(trans, root, 3165 ret = btrfs_free_extent(trans, root,
3074 path->nodes[*level]->start, 3166 path->nodes[*level]->start,
3075 path->nodes[*level]->len, 3167 path->nodes[*level]->len,
3076 parent->start, root_owner, 3168 parent->start, root_owner,
3077 root_gen, *level, 1); 3169 root_gen, *level, 1);
3078 BUG_ON(ret); 3170 BUG_ON(ret);
3171 if (path->locks[*level]) {
3172 btrfs_tree_unlock(path->nodes[*level]);
3173 path->locks[*level] = 0;
3174 }
3079 free_extent_buffer(path->nodes[*level]); 3175 free_extent_buffer(path->nodes[*level]);
3080 path->nodes[*level] = NULL; 3176 path->nodes[*level] = NULL;
3081 *level = i + 1; 3177 *level = i + 1;
@@ -3145,7 +3241,8 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3145 if (wret < 0) 3241 if (wret < 0)
3146 ret = wret; 3242 ret = wret;
3147 3243
3148 wret = walk_up_tree(trans, root, path, &level); 3244 wret = walk_up_tree(trans, root, path, &level,
3245 BTRFS_MAX_LEVEL);
3149 if (wret > 0) 3246 if (wret > 0)
3150 break; 3247 break;
3151 if (wret < 0) 3248 if (wret < 0)
@@ -3168,6 +3265,50 @@ out:
3168 return ret; 3265 return ret;
3169} 3266}
3170 3267
3268int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
3269 struct btrfs_root *root,
3270 struct extent_buffer *node,
3271 struct extent_buffer *parent)
3272{
3273 struct btrfs_path *path;
3274 int level;
3275 int parent_level;
3276 int ret = 0;
3277 int wret;
3278
3279 path = btrfs_alloc_path();
3280 BUG_ON(!path);
3281
3282 BUG_ON(!btrfs_tree_locked(parent));
3283 parent_level = btrfs_header_level(parent);
3284 extent_buffer_get(parent);
3285 path->nodes[parent_level] = parent;
3286 path->slots[parent_level] = btrfs_header_nritems(parent);
3287
3288 BUG_ON(!btrfs_tree_locked(node));
3289 level = btrfs_header_level(node);
3290 extent_buffer_get(node);
3291 path->nodes[level] = node;
3292 path->slots[level] = 0;
3293
3294 while (1) {
3295 wret = walk_down_subtree(trans, root, path, &level);
3296 if (wret < 0)
3297 ret = wret;
3298 if (wret != 0)
3299 break;
3300
3301 wret = walk_up_tree(trans, root, path, &level, parent_level);
3302 if (wret < 0)
3303 ret = wret;
3304 if (wret != 0)
3305 break;
3306 }
3307
3308 btrfs_free_path(path);
3309 return ret;
3310}
3311
3171static unsigned long calc_ra(unsigned long start, unsigned long last, 3312static unsigned long calc_ra(unsigned long start, unsigned long last,
3172 unsigned long nr) 3313 unsigned long nr)
3173{ 3314{
@@ -3312,6 +3453,10 @@ struct btrfs_ref_path {
3312 u32 num_refs; 3453 u32 num_refs;
3313 int lowest_level; 3454 int lowest_level;
3314 int current_level; 3455 int current_level;
3456 int shared_level;
3457
3458 struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
3459 u64 new_nodes[BTRFS_MAX_LEVEL];
3315}; 3460};
3316 3461
3317struct disk_extent { 3462struct disk_extent {
@@ -3360,6 +3505,7 @@ static int noinline __next_ref_path(struct btrfs_trans_handle *trans,
3360 if (first_time) { 3505 if (first_time) {
3361 ref_path->lowest_level = -1; 3506 ref_path->lowest_level = -1;
3362 ref_path->current_level = -1; 3507 ref_path->current_level = -1;
3508 ref_path->shared_level = -1;
3363 goto walk_up; 3509 goto walk_up;
3364 } 3510 }
3365walk_down: 3511walk_down:
@@ -3403,8 +3549,11 @@ walk_down:
3403 3549
3404 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 3550 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3405 if (found_key.objectid == bytenr && 3551 if (found_key.objectid == bytenr &&
3406 found_key.type == BTRFS_EXTENT_REF_KEY) 3552 found_key.type == BTRFS_EXTENT_REF_KEY) {
3553 if (level < ref_path->shared_level)
3554 ref_path->shared_level = level;
3407 goto found; 3555 goto found;
3556 }
3408next: 3557next:
3409 level--; 3558 level--;
3410 btrfs_release_path(extent_root, path); 3559 btrfs_release_path(extent_root, path);
@@ -3992,51 +4141,6 @@ out:
3992 return ret; 4141 return ret;
3993} 4142}
3994 4143
3995int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
3996 u64 num_bytes, u64 new_bytenr)
3997{
3998 set_extent_bits(&root->fs_info->reloc_mapping_tree,
3999 orig_bytenr, orig_bytenr + num_bytes - 1,
4000 EXTENT_LOCKED, GFP_NOFS);
4001 set_state_private(&root->fs_info->reloc_mapping_tree,
4002 orig_bytenr, new_bytenr);
4003 return 0;
4004}
4005
4006int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
4007 u64 num_bytes, u64 *new_bytenr)
4008{
4009 u64 bytenr;
4010 u64 cur_bytenr = orig_bytenr;
4011 u64 prev_bytenr = orig_bytenr;
4012 int ret;
4013
4014 while (1) {
4015 ret = get_state_private(&root->fs_info->reloc_mapping_tree,
4016 cur_bytenr, &bytenr);
4017 if (ret)
4018 break;
4019 prev_bytenr = cur_bytenr;
4020 cur_bytenr = bytenr;
4021 }
4022
4023 if (orig_bytenr == cur_bytenr)
4024 return -ENOENT;
4025
4026 if (prev_bytenr != orig_bytenr) {
4027 set_state_private(&root->fs_info->reloc_mapping_tree,
4028 orig_bytenr, cur_bytenr);
4029 }
4030 *new_bytenr = cur_bytenr;
4031 return 0;
4032}
4033
4034void btrfs_free_reloc_mappings(struct btrfs_root *root)
4035{
4036 clear_extent_bits(&root->fs_info->reloc_mapping_tree,
4037 0, (u64)-1, -1, GFP_NOFS);
4038}
4039
4040int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, 4144int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
4041 struct btrfs_root *root, 4145 struct btrfs_root *root,
4042 struct extent_buffer *buf, u64 orig_start) 4146 struct extent_buffer *buf, u64 orig_start)
@@ -4222,15 +4326,30 @@ static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans,
4222 return 0; 4326 return 0;
4223} 4327}
4224 4328
4225int btrfs_free_reloc_root(struct btrfs_root *root) 4329int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
4330 struct btrfs_root *root)
4226{ 4331{
4227 struct btrfs_root *reloc_root; 4332 struct btrfs_root *reloc_root;
4333 int ret;
4228 4334
4229 if (root->reloc_root) { 4335 if (root->reloc_root) {
4230 reloc_root = root->reloc_root; 4336 reloc_root = root->reloc_root;
4231 root->reloc_root = NULL; 4337 root->reloc_root = NULL;
4232 list_add(&reloc_root->dead_list, 4338 list_add(&reloc_root->dead_list,
4233 &root->fs_info->dead_reloc_roots); 4339 &root->fs_info->dead_reloc_roots);
4340
4341 btrfs_set_root_bytenr(&reloc_root->root_item,
4342 reloc_root->node->start);
4343 btrfs_set_root_level(&root->root_item,
4344 btrfs_header_level(reloc_root->node));
4345 memset(&reloc_root->root_item.drop_progress, 0,
4346 sizeof(struct btrfs_disk_key));
4347 reloc_root->root_item.drop_level = 0;
4348
4349 ret = btrfs_update_root(trans, root->fs_info->tree_root,
4350 &reloc_root->root_key,
4351 &reloc_root->root_item);
4352 BUG_ON(ret);
4234 } 4353 }
4235 return 0; 4354 return 0;
4236} 4355}
@@ -4356,8 +4475,6 @@ static int noinline init_reloc_tree(struct btrfs_trans_handle *trans,
4356 btrfs_set_root_refs(root_item, 0); 4475 btrfs_set_root_refs(root_item, 0);
4357 btrfs_set_root_bytenr(root_item, eb->start); 4476 btrfs_set_root_bytenr(root_item, eb->start);
4358 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 4477 btrfs_set_root_level(root_item, btrfs_header_level(eb));
4359 memset(&root_item->drop_progress, 0, sizeof(root_item->drop_progress));
4360 root_item->drop_level = 0;
4361 4478
4362 btrfs_tree_unlock(eb); 4479 btrfs_tree_unlock(eb);
4363 free_extent_buffer(eb); 4480 free_extent_buffer(eb);
@@ -4382,15 +4499,19 @@ static int noinline init_reloc_tree(struct btrfs_trans_handle *trans,
4382 * Core function of space balance. 4499 * Core function of space balance.
4383 * 4500 *
4384 * The idea is using reloc trees to relocate tree blocks in reference 4501 * The idea is using reloc trees to relocate tree blocks in reference
4385 * counted roots. There is one reloc tree for each subvol, all reloc 4502 * counted roots. There is one reloc tree for each subvol, and all
4386 * trees share same key objectid. Reloc trees are snapshots of the 4503 * reloc trees share same root key objectid. Reloc trees are snapshots
4387 * latest committed roots (subvol root->commit_root). To relocate a tree 4504 * of the latest committed roots of subvols (root->commit_root).
4388 * block referenced by a subvol, the code COW the block through the reloc 4505 *
4389 * tree, then update pointer in the subvol to point to the new block. 4506 * To relocate a tree block referenced by a subvol, there are two steps.
4390 * Since all reloc trees share same key objectid, we can easily do special 4507 * COW the block through subvol's reloc tree, then update block pointer
4391 * handing to share tree blocks between reloc trees. Once a tree block has 4508 * in the subvol to point to the new block. Since all reloc trees share
4392 * been COWed in one reloc tree, we can use the result when the same block 4509 * same root key objectid, doing special handing for tree blocks owned
4393 * is COWed again through other reloc trees. 4510 * by them is easy. Once a tree block has been COWed in one reloc tree,
4511 * we can use the resulting new block directly when the same block is
4512 * required to COW again through other reloc trees. By this way, relocated
4513 * tree blocks are shared between reloc trees, so they are also shared
4514 * between subvols.
4394 */ 4515 */
4395static int noinline relocate_one_path(struct btrfs_trans_handle *trans, 4516static int noinline relocate_one_path(struct btrfs_trans_handle *trans,
4396 struct btrfs_root *root, 4517 struct btrfs_root *root,
@@ -4405,15 +4526,14 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans,
4405 struct btrfs_key *keys; 4526 struct btrfs_key *keys;
4406 u64 *nodes; 4527 u64 *nodes;
4407 int level; 4528 int level;
4408 int lowest_merge; 4529 int shared_level;
4409 int lowest_level = 0; 4530 int lowest_level = 0;
4410 int update_refs;
4411 int ret; 4531 int ret;
4412 4532
4413 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 4533 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
4414 lowest_level = ref_path->owner_objectid; 4534 lowest_level = ref_path->owner_objectid;
4415 4535
4416 if (is_cowonly_root(ref_path->root_objectid)) { 4536 if (!root->ref_cows) {
4417 path->lowest_level = lowest_level; 4537 path->lowest_level = lowest_level;
4418 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); 4538 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
4419 BUG_ON(ret < 0); 4539 BUG_ON(ret < 0);
@@ -4422,91 +4542,49 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans,
4422 return 0; 4542 return 0;
4423 } 4543 }
4424 4544
4425 keys = kzalloc(sizeof(*keys) * BTRFS_MAX_LEVEL, GFP_NOFS);
4426 BUG_ON(!keys);
4427 nodes = kzalloc(sizeof(*nodes) * BTRFS_MAX_LEVEL, GFP_NOFS);
4428 BUG_ON(!nodes);
4429
4430 mutex_lock(&root->fs_info->tree_reloc_mutex); 4545 mutex_lock(&root->fs_info->tree_reloc_mutex);
4431 ret = init_reloc_tree(trans, root); 4546 ret = init_reloc_tree(trans, root);
4432 BUG_ON(ret); 4547 BUG_ON(ret);
4433 reloc_root = root->reloc_root; 4548 reloc_root = root->reloc_root;
4434 4549
4435 path->lowest_level = lowest_level; 4550 shared_level = ref_path->shared_level;
4436 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 0); 4551 ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
4437 BUG_ON(ret);
4438 /*
4439 * get relocation mapping for tree blocks in the path
4440 */
4441 lowest_merge = BTRFS_MAX_LEVEL;
4442 for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) {
4443 u64 new_bytenr;
4444 eb = path->nodes[level];
4445 if (!eb || eb == reloc_root->node)
4446 continue;
4447 ret = btrfs_get_reloc_mapping(reloc_root, eb->start, eb->len,
4448 &new_bytenr);
4449 if (ret)
4450 continue;
4451 if (level == 0)
4452 btrfs_item_key_to_cpu(eb, &keys[level], 0);
4453 else
4454 btrfs_node_key_to_cpu(eb, &keys[level], 0);
4455 nodes[level] = new_bytenr;
4456 lowest_merge = level;
4457 }
4458 4552
4459 update_refs = 0; 4553 keys = ref_path->node_keys;
4460 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 4554 nodes = ref_path->new_nodes;
4461 eb = path->nodes[0]; 4555 memset(&keys[shared_level + 1], 0,
4462 if (btrfs_header_generation(eb) < trans->transid) 4556 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
4463 update_refs = 1; 4557 memset(&nodes[shared_level + 1], 0,
4464 } 4558 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
4465 4559
4466 btrfs_release_path(reloc_root, path); 4560 if (nodes[lowest_level] == 0) {
4467 /* 4561 path->lowest_level = lowest_level;
4468 * merge tree blocks that already relocated in other reloc trees 4562 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
4469 */ 4563 0, 1);
4470 if (lowest_merge != BTRFS_MAX_LEVEL) { 4564 BUG_ON(ret);
4565 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
4566 eb = path->nodes[level];
4567 if (!eb || eb == reloc_root->node)
4568 break;
4569 nodes[level] = eb->start;
4570 if (level == 0)
4571 btrfs_item_key_to_cpu(eb, &keys[level], 0);
4572 else
4573 btrfs_node_key_to_cpu(eb, &keys[level], 0);
4574 }
4575 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4576 eb = path->nodes[0];
4577 ret = replace_extents_in_leaf(trans, reloc_root, eb,
4578 group, reloc_inode);
4579 BUG_ON(ret);
4580 }
4581 btrfs_release_path(reloc_root, path);
4582 } else {
4471 ret = btrfs_merge_path(trans, reloc_root, keys, nodes, 4583 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
4472 lowest_merge); 4584 lowest_level);
4473 BUG_ON(ret < 0);
4474 }
4475 /*
4476 * cow any tree blocks that still haven't been relocated
4477 */
4478 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 1);
4479 BUG_ON(ret);
4480 /*
4481 * if we are relocating data block group, update extent pointers
4482 * in the newly created tree leaf.
4483 */
4484 eb = path->nodes[0];
4485 if (update_refs && nodes[0] != eb->start) {
4486 ret = replace_extents_in_leaf(trans, reloc_root, eb, group,
4487 reloc_inode);
4488 BUG_ON(ret); 4585 BUG_ON(ret);
4489 } 4586 }
4490 4587
4491 memset(keys, 0, sizeof(*keys) * BTRFS_MAX_LEVEL);
4492 memset(nodes, 0, sizeof(*nodes) * BTRFS_MAX_LEVEL);
4493 for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) {
4494 eb = path->nodes[level];
4495 if (!eb || eb == reloc_root->node)
4496 continue;
4497 BUG_ON(btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID);
4498 nodes[level] = eb->start;
4499 if (level == 0)
4500 btrfs_item_key_to_cpu(eb, &keys[level], 0);
4501 else
4502 btrfs_node_key_to_cpu(eb, &keys[level], 0);
4503 }
4504
4505 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4506 eb = path->nodes[0];
4507 extent_buffer_get(eb);
4508 }
4509 btrfs_release_path(reloc_root, path);
4510 /* 4588 /*
4511 * replace tree blocks in the fs tree with tree blocks in 4589 * replace tree blocks in the fs tree with tree blocks in
4512 * the reloc tree. 4590 * the reloc tree.
@@ -4515,15 +4593,19 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans,
4515 BUG_ON(ret < 0); 4593 BUG_ON(ret < 0);
4516 4594
4517 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 4595 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4596 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
4597 0, 0);
4598 BUG_ON(ret);
4599 extent_buffer_get(path->nodes[0]);
4600 eb = path->nodes[0];
4601 btrfs_release_path(reloc_root, path);
4518 ret = invalidate_extent_cache(reloc_root, eb, group, root); 4602 ret = invalidate_extent_cache(reloc_root, eb, group, root);
4519 BUG_ON(ret); 4603 BUG_ON(ret);
4520 free_extent_buffer(eb); 4604 free_extent_buffer(eb);
4521 } 4605 }
4522 mutex_unlock(&root->fs_info->tree_reloc_mutex);
4523 4606
4607 mutex_unlock(&root->fs_info->tree_reloc_mutex);
4524 path->lowest_level = 0; 4608 path->lowest_level = 0;
4525 kfree(nodes);
4526 kfree(keys);
4527 return 0; 4609 return 0;
4528} 4610}
4529 4611
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5ecc24d634a..1df67129cc3 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -521,7 +521,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
521 dirty = root->dirty_root; 521 dirty = root->dirty_root;
522 522
523 btrfs_free_log(trans, root); 523 btrfs_free_log(trans, root);
524 btrfs_free_reloc_root(root); 524 btrfs_free_reloc_root(trans, root);
525 525
526 if (root->commit_root == root->node) { 526 if (root->commit_root == root->node) {
527 WARN_ON(root->node->start != 527 WARN_ON(root->node->start !=
@@ -930,8 +930,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
930 */ 930 */
931 btrfs_free_log_root_tree(trans, root->fs_info); 931 btrfs_free_log_root_tree(trans, root->fs_info);
932 932
933 btrfs_free_reloc_mappings(root);
934
935 ret = btrfs_commit_tree_roots(trans, root); 933 ret = btrfs_commit_tree_roots(trans, root);
936 BUG_ON(ret); 934 BUG_ON(ret);
937 935