aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZheng Yan <zheng.yan@oracle.com>2008-09-26 10:09:34 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-26 10:09:34 -0400
commit1a40e23b95da45051ee4d74374c58ae87a14051c (patch)
tree77faffd3f9d3a26c22e6cf03b83762c95d687596
parent5b21f2ed3f2947b5195b65c9fdbdd9e52904cc03 (diff)
Btrfs: update space balancing code
This patch updates the space balancing code to utilize the new backref format. Before, btrfs-vol -b would break any COW links on data blocks or metadata. This was slow and caused the amount of space used to explode if a large number of snapshots were present. The new code can keeps the sharing of all data extents and most of the tree blocks. To maintain the sharing of data extents, the space balance code uses a seperate inode hold data extent pointers, then updates the references to point to the new location. To maintain the sharing of tree blocks, the space balance code uses reloc trees to relocate tree blocks in reference counted roots. There is one reloc tree for each subvol, and all reloc trees share same root key objectid. Reloc trees are snapshots of the latest committed roots of subvols (root->commit_root). To relocate a tree block referenced by a subvol, there are two steps. COW the block through subvol's reloc tree, then update block pointer in the subvol to point to the new block. Since all reloc trees share same root key objectid, doing special handing for tree blocks owned by them is easy. Once a tree block has been COWed in one reloc tree, we can use the resulting new block directly when the same block is required to COW again through other reloc trees. In this way, relocated tree blocks are shared between reloc trees, so they are also shared between subvols. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.c155
-rw-r--r--fs/btrfs/ctree.h26
-rw-r--r--fs/btrfs/disk-io.c9
-rw-r--r--fs/btrfs/extent-tree.c2034
-rw-r--r--fs/btrfs/root-tree.c5
-rw-r--r--fs/btrfs/transaction.c15
-rw-r--r--fs/btrfs/volumes.c9
7 files changed, 1828 insertions, 425 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index f9cd40967d04..50e81f43e6d4 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -179,7 +179,6 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
179 struct extent_buffer *cow; 179 struct extent_buffer *cow;
180 u32 nritems; 180 u32 nritems;
181 int ret = 0; 181 int ret = 0;
182 int different_trans = 0;
183 int level; 182 int level;
184 int unlock_orig = 0; 183 int unlock_orig = 0;
185 184
@@ -233,13 +232,33 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
233 WARN_ON(btrfs_header_generation(buf) > trans->transid); 232 WARN_ON(btrfs_header_generation(buf) > trans->transid);
234 if (btrfs_header_generation(buf) != trans->transid) { 233 if (btrfs_header_generation(buf) != trans->transid) {
235 u32 nr_extents; 234 u32 nr_extents;
236 different_trans = 1;
237 ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents); 235 ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
238 if (ret) 236 if (ret)
239 return ret; 237 return ret;
240 238
241 ret = btrfs_cache_ref(trans, root, buf, nr_extents); 239 ret = btrfs_cache_ref(trans, root, buf, nr_extents);
242 WARN_ON(ret); 240 WARN_ON(ret);
241 } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
242 /*
243 * There are only two places that can drop reference to
244 * tree blocks owned by living reloc trees, one is here,
245 * the other place is btrfs_merge_path. In both places,
246 * we check reference count while tree block is locked.
247 * Furthermore, if reference count is one, it won't get
248 * increased by someone else.
249 */
250 u32 refs;
251 ret = btrfs_lookup_extent_ref(trans, root, buf->start,
252 buf->len, &refs);
253 BUG_ON(ret);
254 if (refs == 1) {
255 ret = btrfs_update_ref(trans, root, buf, cow,
256 0, nritems);
257 clean_tree_block(trans, root, buf);
258 } else {
259 ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
260 }
261 BUG_ON(ret);
243 } else { 262 } else {
244 ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems); 263 ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
245 if (ret) 264 if (ret)
@@ -247,6 +266,14 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
247 clean_tree_block(trans, root, buf); 266 clean_tree_block(trans, root, buf);
248 } 267 }
249 268
269 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
270 ret = btrfs_add_reloc_mapping(root, buf->start,
271 buf->len, cow->start);
272 BUG_ON(ret);
273 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
274 WARN_ON(ret);
275 }
276
250 if (buf == root->node) { 277 if (buf == root->node) {
251 WARN_ON(parent && parent != buf); 278 WARN_ON(parent && parent != buf);
252 279
@@ -1466,6 +1493,130 @@ done:
1466 return ret; 1493 return ret;
1467} 1494}
1468 1495
1496int btrfs_merge_path(struct btrfs_trans_handle *trans,
1497 struct btrfs_root *root,
1498 struct btrfs_key *node_keys,
1499 u64 *nodes, int lowest_level)
1500{
1501 struct extent_buffer *eb;
1502 struct extent_buffer *parent;
1503 struct btrfs_key key;
1504 u64 bytenr;
1505 u64 generation;
1506 u32 blocksize;
1507 int level;
1508 int slot;
1509 int key_match;
1510 int ret;
1511
1512 eb = btrfs_lock_root_node(root);
1513 ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0);
1514 BUG_ON(ret);
1515
1516 parent = eb;
1517 while (1) {
1518 level = btrfs_header_level(parent);
1519 if (level == 0 || level <= lowest_level)
1520 break;
1521
1522 ret = bin_search(parent, &node_keys[lowest_level], level,
1523 &slot);
1524 if (ret && slot > 0)
1525 slot--;
1526
1527 bytenr = btrfs_node_blockptr(parent, slot);
1528 if (nodes[level - 1] == bytenr)
1529 break;
1530
1531 blocksize = btrfs_level_size(root, level - 1);
1532 generation = btrfs_node_ptr_generation(parent, slot);
1533 btrfs_node_key_to_cpu(eb, &key, slot);
1534 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
1535
1536 /*
1537 * if node keys match and node pointer hasn't been modified
1538 * in the running transaction, we can merge the path. for
1539 * reloc trees, the node pointer check is skipped, this is
1540 * because the reloc trees are fully controlled by the space
1541 * balance code, no one else can modify them.
1542 */
1543 if (!nodes[level - 1] || !key_match ||
1544 (generation == trans->transid &&
1545 root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) {
1546next_level:
1547 if (level == 1 || level == lowest_level + 1)
1548 break;
1549
1550 eb = read_tree_block(root, bytenr, blocksize,
1551 generation);
1552 btrfs_tree_lock(eb);
1553
1554 ret = btrfs_cow_block(trans, root, eb, parent, slot,
1555 &eb, 0);
1556 BUG_ON(ret);
1557
1558 btrfs_tree_unlock(parent);
1559 free_extent_buffer(parent);
1560 parent = eb;
1561 continue;
1562 }
1563
1564 if (generation == trans->transid) {
1565 u32 refs;
1566 BUG_ON(btrfs_header_owner(eb) !=
1567 BTRFS_TREE_RELOC_OBJECTID);
1568 /*
1569 * lock the block to keep __btrfs_cow_block from
1570 * changing the reference count.
1571 */
1572 eb = read_tree_block(root, bytenr, blocksize,
1573 generation);
1574 btrfs_tree_lock(eb);
1575
1576 ret = btrfs_lookup_extent_ref(trans, root, bytenr,
1577 blocksize, &refs);
1578 BUG_ON(ret);
1579 /*
1580 * if replace block whose reference count is one,
1581 * we have to "drop the subtree". so skip it for
1582 * simplicity
1583 */
1584 if (refs == 1) {
1585 btrfs_tree_unlock(eb);
1586 free_extent_buffer(eb);
1587 goto next_level;
1588 }
1589 }
1590
1591 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
1592 btrfs_set_node_ptr_generation(parent, slot, trans->transid);
1593 btrfs_mark_buffer_dirty(parent);
1594
1595 ret = btrfs_inc_extent_ref(trans, root,
1596 nodes[level - 1],
1597 blocksize, parent->start,
1598 btrfs_header_owner(parent),
1599 btrfs_header_generation(parent),
1600 level - 1, 0);
1601 BUG_ON(ret);
1602 ret = btrfs_free_extent(trans, root, bytenr,
1603 blocksize, parent->start,
1604 btrfs_header_owner(parent),
1605 btrfs_header_generation(parent),
1606 level - 1, 0, 1);
1607 BUG_ON(ret);
1608
1609 if (generation == trans->transid) {
1610 btrfs_tree_unlock(eb);
1611 free_extent_buffer(eb);
1612 }
1613 break;
1614 }
1615 btrfs_tree_unlock(parent);
1616 free_extent_buffer(parent);
1617 return 0;
1618}
1619
1469/* 1620/*
1470 * adjust the pointers going up the tree, starting at level 1621 * adjust the pointers going up the tree, starting at level
1471 * making sure the right key of each node is points to 'key'. 1622 * making sure the right key of each node is points to 'key'.
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3e62a1b0a1f7..2775e270881e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -604,6 +604,7 @@ struct btrfs_fs_info {
604 struct mutex chunk_mutex; 604 struct mutex chunk_mutex;
605 struct mutex drop_mutex; 605 struct mutex drop_mutex;
606 struct mutex volume_mutex; 606 struct mutex volume_mutex;
607 struct mutex tree_reloc_mutex;
607 struct list_head trans_list; 608 struct list_head trans_list;
608 struct list_head hashers; 609 struct list_head hashers;
609 struct list_head dead_roots; 610 struct list_head dead_roots;
@@ -647,6 +648,10 @@ struct btrfs_fs_info {
647 struct task_struct *cleaner_kthread; 648 struct task_struct *cleaner_kthread;
648 int thread_pool_size; 649 int thread_pool_size;
649 650
651 /* tree relocation relocated fields */
652 struct extent_io_tree reloc_mapping_tree;
653 struct list_head dead_reloc_roots;
654 struct btrfs_leaf_ref_tree reloc_ref_tree;
650 struct btrfs_leaf_ref_tree shared_ref_tree; 655 struct btrfs_leaf_ref_tree shared_ref_tree;
651 656
652 struct kobject super_kobj; 657 struct kobject super_kobj;
@@ -698,6 +703,7 @@ struct btrfs_root {
698 struct btrfs_leaf_ref_tree ref_tree_struct; 703 struct btrfs_leaf_ref_tree ref_tree_struct;
699 struct btrfs_dirty_root *dirty_root; 704 struct btrfs_dirty_root *dirty_root;
700 struct btrfs_root *log_root; 705 struct btrfs_root *log_root;
706 struct btrfs_root *reloc_root;
701 707
702 struct btrfs_root_item root_item; 708 struct btrfs_root_item root_item;
703 struct btrfs_key root_key; 709 struct btrfs_key root_key;
@@ -1517,7 +1523,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1517struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 1523struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
1518 struct btrfs_root *root, 1524 struct btrfs_root *root,
1519 u64 bytenr, u32 blocksize); 1525 u64 bytenr, u32 blocksize);
1520int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size);
1521int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, 1526int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
1522 struct btrfs_root *root, 1527 struct btrfs_root *root,
1523 struct btrfs_path *path, 1528 struct btrfs_path *path,
@@ -1582,10 +1587,29 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
1582 struct btrfs_root *root, u64 bytes_used, 1587 struct btrfs_root *root, u64 bytes_used,
1583 u64 type, u64 chunk_objectid, u64 chunk_offset, 1588 u64 type, u64 chunk_objectid, u64 chunk_offset,
1584 u64 size); 1589 u64 size);
1590int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
1591 struct btrfs_root *root, u64 group_start);
1592int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
1593int btrfs_free_reloc_root(struct btrfs_root *root);
1594int btrfs_drop_dead_reloc_roots(struct btrfs_root *root);
1595int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
1596 u64 num_bytes, u64 new_bytenr);
1597int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
1598 u64 num_bytes, u64 *new_bytenr);
1599void btrfs_free_reloc_mappings(struct btrfs_root *root);
1600int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
1601 struct btrfs_root *root,
1602 struct extent_buffer *buf, u64 orig_start);
1603int btrfs_add_dead_reloc_root(struct btrfs_root *root);
1604int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
1585/* ctree.c */ 1605/* ctree.c */
1586int btrfs_previous_item(struct btrfs_root *root, 1606int btrfs_previous_item(struct btrfs_root *root,
1587 struct btrfs_path *path, u64 min_objectid, 1607 struct btrfs_path *path, u64 min_objectid,
1588 int type); 1608 int type);
1609int btrfs_merge_path(struct btrfs_trans_handle *trans,
1610 struct btrfs_root *root,
1611 struct btrfs_key *node_keys,
1612 u64 *nodes, int lowest_level);
1589int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 1613int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1590 struct btrfs_root *root, struct btrfs_path *path, 1614 struct btrfs_root *root, struct btrfs_path *path,
1591 struct btrfs_key *new_key); 1615 struct btrfs_key *new_key);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8969fee23318..45bc3132b054 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1406,6 +1406,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1406 fs_info->btree_inode->i_mapping, GFP_NOFS); 1406 fs_info->btree_inode->i_mapping, GFP_NOFS);
1407 fs_info->do_barriers = 1; 1407 fs_info->do_barriers = 1;
1408 1408
1409 extent_io_tree_init(&fs_info->reloc_mapping_tree,
1410 fs_info->btree_inode->i_mapping, GFP_NOFS);
1411 INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
1412 btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree);
1409 btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); 1413 btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree);
1410 1414
1411 BTRFS_I(fs_info->btree_inode)->root = tree_root; 1415 BTRFS_I(fs_info->btree_inode)->root = tree_root;
@@ -1421,6 +1425,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1421 mutex_init(&fs_info->transaction_kthread_mutex); 1425 mutex_init(&fs_info->transaction_kthread_mutex);
1422 mutex_init(&fs_info->cleaner_mutex); 1426 mutex_init(&fs_info->cleaner_mutex);
1423 mutex_init(&fs_info->volume_mutex); 1427 mutex_init(&fs_info->volume_mutex);
1428 mutex_init(&fs_info->tree_reloc_mutex);
1424 init_waitqueue_head(&fs_info->transaction_throttle); 1429 init_waitqueue_head(&fs_info->transaction_throttle);
1425 init_waitqueue_head(&fs_info->transaction_wait); 1430 init_waitqueue_head(&fs_info->transaction_wait);
1426 init_waitqueue_head(&fs_info->async_submit_wait); 1431 init_waitqueue_head(&fs_info->async_submit_wait);
@@ -1627,6 +1632,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1627 ret = btrfs_recover_log_trees(log_tree_root); 1632 ret = btrfs_recover_log_trees(log_tree_root);
1628 BUG_ON(ret); 1633 BUG_ON(ret);
1629 } 1634 }
1635
1636 ret = btrfs_cleanup_reloc_trees(tree_root);
1637 BUG_ON(ret);
1638
1630 fs_info->last_trans_committed = btrfs_super_generation(disk_super); 1639 fs_info->last_trans_committed = btrfs_super_generation(disk_super);
1631 return tree_root; 1640 return tree_root;
1632 1641
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9ab099bc01a4..8043b9d584a9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1834,6 +1834,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
1834 u64 header_owner = btrfs_header_owner(buf); 1834 u64 header_owner = btrfs_header_owner(buf);
1835 u64 header_transid = btrfs_header_generation(buf); 1835 u64 header_transid = btrfs_header_generation(buf);
1836 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 1836 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
1837 header_owner != BTRFS_TREE_RELOC_OBJECTID &&
1837 header_transid == trans->transid && 1838 header_transid == trans->transid &&
1838 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 1839 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
1839 clean_tree_block(NULL, root, buf); 1840 clean_tree_block(NULL, root, buf);
@@ -2487,6 +2488,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
2487 return -ENOSPC; 2488 return -ENOSPC;
2488 } 2489 }
2489 btrfs_add_free_space(cache, start, len); 2490 btrfs_add_free_space(cache, start, len);
2491 update_reserved_extents(root, start, len, 0);
2490 maybe_unlock_mutex(root); 2492 maybe_unlock_mutex(root);
2491 return 0; 2493 return 0;
2492} 2494}
@@ -2947,6 +2949,10 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2947 */ 2949 */
2948 if (*level == 1) { 2950 if (*level == 1) {
2949 ref = btrfs_lookup_leaf_ref(root, bytenr); 2951 ref = btrfs_lookup_leaf_ref(root, bytenr);
2952 if (ref && ref->generation != ptr_gen) {
2953 btrfs_free_leaf_ref(root, ref);
2954 ref = NULL;
2955 }
2950 if (ref) { 2956 if (ref) {
2951 ret = cache_drop_leaf_ref(trans, root, ref); 2957 ret = cache_drop_leaf_ref(trans, root, ref);
2952 BUG_ON(ret); 2958 BUG_ON(ret);
@@ -3153,34 +3159,6 @@ out:
3153 return ret; 3159 return ret;
3154} 3160}
3155 3161
3156int btrfs_free_block_groups(struct btrfs_fs_info *info)
3157{
3158 struct btrfs_block_group_cache *block_group;
3159 struct rb_node *n;
3160
3161 mutex_lock(&info->alloc_mutex);
3162 spin_lock(&info->block_group_cache_lock);
3163 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
3164 block_group = rb_entry(n, struct btrfs_block_group_cache,
3165 cache_node);
3166
3167 spin_unlock(&info->block_group_cache_lock);
3168 btrfs_remove_free_space_cache(block_group);
3169 spin_lock(&info->block_group_cache_lock);
3170
3171 rb_erase(&block_group->cache_node,
3172 &info->block_group_cache_tree);
3173
3174 spin_lock(&block_group->space_info->lock);
3175 list_del(&block_group->list);
3176 spin_unlock(&block_group->space_info->lock);
3177 kfree(block_group);
3178 }
3179 spin_unlock(&info->block_group_cache_lock);
3180 mutex_unlock(&info->alloc_mutex);
3181 return 0;
3182}
3183
3184static unsigned long calc_ra(unsigned long start, unsigned long last, 3162static unsigned long calc_ra(unsigned long start, unsigned long last,
3185 unsigned long nr) 3163 unsigned long nr)
3186{ 3164{
@@ -3192,37 +3170,43 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
3192{ 3170{
3193 u64 page_start; 3171 u64 page_start;
3194 u64 page_end; 3172 u64 page_end;
3173 unsigned long first_index;
3195 unsigned long last_index; 3174 unsigned long last_index;
3196 unsigned long i; 3175 unsigned long i;
3197 struct page *page; 3176 struct page *page;
3198 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3177 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3199 struct file_ra_state *ra; 3178 struct file_ra_state *ra;
3200 unsigned long total_read = 0;
3201 unsigned long ra_pages;
3202 struct btrfs_ordered_extent *ordered; 3179 struct btrfs_ordered_extent *ordered;
3203 struct btrfs_trans_handle *trans; 3180 unsigned int total_read = 0;
3181 unsigned int total_dirty = 0;
3182 int ret = 0;
3204 3183
3205 ra = kzalloc(sizeof(*ra), GFP_NOFS); 3184 ra = kzalloc(sizeof(*ra), GFP_NOFS);
3206 3185
3207 mutex_lock(&inode->i_mutex); 3186 mutex_lock(&inode->i_mutex);
3208 i = start >> PAGE_CACHE_SHIFT; 3187 first_index = start >> PAGE_CACHE_SHIFT;
3209 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 3188 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
3210 3189
3211 ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; 3190 /* make sure the dirty trick played by the caller work */
3191 ret = invalidate_inode_pages2_range(inode->i_mapping,
3192 first_index, last_index);
3193 if (ret)
3194 goto out_unlock;
3212 3195
3213 file_ra_state_init(ra, inode->i_mapping); 3196 file_ra_state_init(ra, inode->i_mapping);
3214 3197
3215 for (; i <= last_index; i++) { 3198 for (i = first_index ; i <= last_index; i++) {
3216 if (total_read % ra_pages == 0) { 3199 if (total_read % ra->ra_pages == 0) {
3217 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 3200 btrfs_force_ra(inode->i_mapping, ra, NULL, i,
3218 calc_ra(i, last_index, ra_pages)); 3201 calc_ra(i, last_index, ra->ra_pages));
3219 } 3202 }
3220 total_read++; 3203 total_read++;
3221again: 3204again:
3222 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) 3205 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
3223 goto truncate_racing; 3206 BUG_ON(1);
3224 page = grab_cache_page(inode->i_mapping, i); 3207 page = grab_cache_page(inode->i_mapping, i);
3225 if (!page) { 3208 if (!page) {
3209 ret = -ENOMEM;
3226 goto out_unlock; 3210 goto out_unlock;
3227 } 3211 }
3228 if (!PageUptodate(page)) { 3212 if (!PageUptodate(page)) {
@@ -3231,6 +3215,7 @@ again:
3231 if (!PageUptodate(page)) { 3215 if (!PageUptodate(page)) {
3232 unlock_page(page); 3216 unlock_page(page);
3233 page_cache_release(page); 3217 page_cache_release(page);
3218 ret = -EIO;
3234 goto out_unlock; 3219 goto out_unlock;
3235 } 3220 }
3236 } 3221 }
@@ -3251,14 +3236,13 @@ again:
3251 } 3236 }
3252 set_page_extent_mapped(page); 3237 set_page_extent_mapped(page);
3253 3238
3254 /*
3255 * make sure page_mkwrite is called for this page if userland
3256 * wants to change it from mmap
3257 */
3258 clear_page_dirty_for_io(page);
3259
3260 btrfs_set_extent_delalloc(inode, page_start, page_end); 3239 btrfs_set_extent_delalloc(inode, page_start, page_end);
3240 if (i == first_index)
3241 set_extent_bits(io_tree, page_start, page_end,
3242 EXTENT_BOUNDARY, GFP_NOFS);
3243
3261 set_page_dirty(page); 3244 set_page_dirty(page);
3245 total_dirty++;
3262 3246
3263 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3247 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3264 unlock_page(page); 3248 unlock_page(page);
@@ -3266,347 +3250,1457 @@ again:
3266 } 3250 }
3267 3251
3268out_unlock: 3252out_unlock:
3269 /* we have to start the IO in order to get the ordered extents
3270 * instantiated. This allows the relocation to code to wait
3271 * for all the ordered extents to hit the disk.
3272 *
3273 * Otherwise, it would constantly loop over the same extents
3274 * because the old ones don't get deleted until the IO is
3275 * started
3276 */
3277 btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1,
3278 WB_SYNC_NONE);
3279 kfree(ra); 3253 kfree(ra);
3280 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
3281 if (trans) {
3282 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
3283 mark_inode_dirty(inode);
3284 }
3285 mutex_unlock(&inode->i_mutex); 3254 mutex_unlock(&inode->i_mutex);
3286 return 0; 3255 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
3256 return ret;
3257}
3258
3259static int noinline relocate_data_extent(struct inode *reloc_inode,
3260 struct btrfs_key *extent_key,
3261 u64 offset)
3262{
3263 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
3264 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
3265 struct extent_map *em;
3266
3267 em = alloc_extent_map(GFP_NOFS);
3268 BUG_ON(!em || IS_ERR(em));
3269
3270 em->start = extent_key->objectid - offset;
3271 em->len = extent_key->offset;
3272 em->block_start = extent_key->objectid;
3273 em->bdev = root->fs_info->fs_devices->latest_bdev;
3274 set_bit(EXTENT_FLAG_PINNED, &em->flags);
3275
3276 /* setup extent map to cheat btrfs_readpage */
3277 mutex_lock(&BTRFS_I(reloc_inode)->extent_mutex);
3278 while (1) {
3279 int ret;
3280 spin_lock(&em_tree->lock);
3281 ret = add_extent_mapping(em_tree, em);
3282 spin_unlock(&em_tree->lock);
3283 if (ret != -EEXIST) {
3284 free_extent_map(em);
3285 break;
3286 }
3287 btrfs_drop_extent_cache(reloc_inode, em->start,
3288 em->start + em->len - 1, 0);
3289 }
3290 mutex_unlock(&BTRFS_I(reloc_inode)->extent_mutex);
3287 3291
3288truncate_racing: 3292 return relocate_inode_pages(reloc_inode, extent_key->objectid - offset,
3289 vmtruncate(inode, inode->i_size); 3293 extent_key->offset);
3290 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
3291 total_read);
3292 goto out_unlock;
3293} 3294}
3294 3295
3295/* 3296struct btrfs_ref_path {
3296 * The back references tell us which tree holds a ref on a block, 3297 u64 extent_start;
3297 * but it is possible for the tree root field in the reference to 3298 u64 nodes[BTRFS_MAX_LEVEL];
3298 * reflect the original root before a snapshot was made. In this 3299 u64 root_objectid;
3299 * case we should search through all the children of a given root 3300 u64 root_generation;
3300 * to find potential holders of references on a block. 3301 u64 owner_objectid;
3301 * 3302 u64 owner_offset;
3302 * Instead, we do something a little less fancy and just search 3303 u32 num_refs;
3303 * all the roots for a given key/block combination. 3304 int lowest_level;
3304 */ 3305 int current_level;
3305static int find_root_for_ref(struct btrfs_root *root, 3306};
3306 struct btrfs_path *path,
3307 struct btrfs_key *key0,
3308 int level,
3309 int file_key,
3310 struct btrfs_root **found_root,
3311 u64 bytenr)
3312{
3313 struct btrfs_key root_location;
3314 struct btrfs_root *cur_root = *found_root;
3315 struct btrfs_file_extent_item *file_extent;
3316 u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
3317 u64 found_bytenr;
3318 int ret;
3319 3307
3320 root_location.offset = (u64)-1; 3308struct disk_extent {
3321 root_location.type = BTRFS_ROOT_ITEM_KEY; 3309 u64 disk_bytenr;
3322 path->lowest_level = level; 3310 u64 disk_num_bytes;
3323 path->reada = 0; 3311 u64 offset;
3324 while(1) { 3312 u64 num_bytes;
3325 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0); 3313};
3326 found_bytenr = 0; 3314
3327 if (ret == 0 && file_key) { 3315static int is_cowonly_root(u64 root_objectid)
3328 struct extent_buffer *leaf = path->nodes[0]; 3316{
3329 file_extent = btrfs_item_ptr(leaf, path->slots[0], 3317 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
3330 struct btrfs_file_extent_item); 3318 root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
3331 if (btrfs_file_extent_type(leaf, file_extent) == 3319 root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
3332 BTRFS_FILE_EXTENT_REG) { 3320 root_objectid == BTRFS_DEV_TREE_OBJECTID ||
3333 found_bytenr = 3321 root_objectid == BTRFS_TREE_LOG_OBJECTID)
3334 btrfs_file_extent_disk_bytenr(leaf, 3322 return 1;
3335 file_extent); 3323 return 0;
3336 } 3324}
3337 } else if (!file_key) { 3325
3338 if (path->nodes[level]) 3326static int noinline __next_ref_path(struct btrfs_trans_handle *trans,
3339 found_bytenr = path->nodes[level]->start; 3327 struct btrfs_root *extent_root,
3340 } 3328 struct btrfs_ref_path *ref_path,
3341 3329 int first_time)
3342 btrfs_release_path(cur_root, path); 3330{
3343 3331 struct extent_buffer *leaf;
3344 if (found_bytenr == bytenr) { 3332 struct btrfs_path *path;
3345 *found_root = cur_root; 3333 struct btrfs_extent_ref *ref;
3334 struct btrfs_key key;
3335 struct btrfs_key found_key;
3336 u64 bytenr;
3337 u32 nritems;
3338 int level;
3339 int ret = 1;
3340
3341 path = btrfs_alloc_path();
3342 if (!path)
3343 return -ENOMEM;
3344
3345 mutex_lock(&extent_root->fs_info->alloc_mutex);
3346
3347 if (first_time) {
3348 ref_path->lowest_level = -1;
3349 ref_path->current_level = -1;
3350 goto walk_up;
3351 }
3352walk_down:
3353 level = ref_path->current_level - 1;
3354 while (level >= -1) {
3355 u64 parent;
3356 if (level < ref_path->lowest_level)
3357 break;
3358
3359 if (level >= 0) {
3360 bytenr = ref_path->nodes[level];
3361 } else {
3362 bytenr = ref_path->extent_start;
3363 }
3364 BUG_ON(bytenr == 0);
3365
3366 parent = ref_path->nodes[level + 1];
3367 ref_path->nodes[level + 1] = 0;
3368 ref_path->current_level = level;
3369 BUG_ON(parent == 0);
3370
3371 key.objectid = bytenr;
3372 key.offset = parent + 1;
3373 key.type = BTRFS_EXTENT_REF_KEY;
3374
3375 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
3376 if (ret < 0)
3377 goto out;
3378 BUG_ON(ret == 0);
3379
3380 leaf = path->nodes[0];
3381 nritems = btrfs_header_nritems(leaf);
3382 if (path->slots[0] >= nritems) {
3383 ret = btrfs_next_leaf(extent_root, path);
3384 if (ret < 0)
3385 goto out;
3386 if (ret > 0)
3387 goto next;
3388 leaf = path->nodes[0];
3389 }
3390
3391 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3392 if (found_key.objectid == bytenr &&
3393 found_key.type == BTRFS_EXTENT_REF_KEY)
3394 goto found;
3395next:
3396 level--;
3397 btrfs_release_path(extent_root, path);
3398 if (need_resched()) {
3399 mutex_unlock(&extent_root->fs_info->alloc_mutex);
3400 cond_resched();
3401 mutex_lock(&extent_root->fs_info->alloc_mutex);
3402 }
3403 }
3404 /* reached lowest level */
3405 ret = 1;
3406 goto out;
3407walk_up:
3408 level = ref_path->current_level;
3409 while (level < BTRFS_MAX_LEVEL - 1) {
3410 u64 ref_objectid;
3411 if (level >= 0) {
3412 bytenr = ref_path->nodes[level];
3413 } else {
3414 bytenr = ref_path->extent_start;
3415 }
3416 BUG_ON(bytenr == 0);
3417
3418 key.objectid = bytenr;
3419 key.offset = 0;
3420 key.type = BTRFS_EXTENT_REF_KEY;
3421
3422 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
3423 if (ret < 0)
3424 goto out;
3425
3426 leaf = path->nodes[0];
3427 nritems = btrfs_header_nritems(leaf);
3428 if (path->slots[0] >= nritems) {
3429 ret = btrfs_next_leaf(extent_root, path);
3430 if (ret < 0)
3431 goto out;
3432 if (ret > 0) {
3433 /* the extent was freed by someone */
3434 if (ref_path->lowest_level == level)
3435 goto out;
3436 btrfs_release_path(extent_root, path);
3437 goto walk_down;
3438 }
3439 leaf = path->nodes[0];
3440 }
3441
3442 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3443 if (found_key.objectid != bytenr ||
3444 found_key.type != BTRFS_EXTENT_REF_KEY) {
3445 /* the extent was freed by someone */
3446 if (ref_path->lowest_level == level) {
3447 ret = 1;
3448 goto out;
3449 }
3450 btrfs_release_path(extent_root, path);
3451 goto walk_down;
3452 }
3453found:
3454 ref = btrfs_item_ptr(leaf, path->slots[0],
3455 struct btrfs_extent_ref);
3456 ref_objectid = btrfs_ref_objectid(leaf, ref);
3457 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
3458 if (first_time) {
3459 level = (int)ref_objectid;
3460 BUG_ON(level >= BTRFS_MAX_LEVEL);
3461 ref_path->lowest_level = level;
3462 ref_path->current_level = level;
3463 ref_path->nodes[level] = bytenr;
3464 } else {
3465 WARN_ON(ref_objectid != level);
3466 }
3467 } else {
3468 WARN_ON(level != -1);
3469 }
3470 first_time = 0;
3471
3472 if (ref_path->lowest_level == level) {
3473 ref_path->owner_objectid = ref_objectid;
3474 ref_path->owner_offset = btrfs_ref_offset(leaf, ref);
3475 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
3476 }
3477
3478 /*
3479 * the block is tree root or the block isn't in reference
3480 * counted tree.
3481 */
3482 if (found_key.objectid == found_key.offset ||
3483 is_cowonly_root(btrfs_ref_root(leaf, ref))) {
3484 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
3485 ref_path->root_generation =
3486 btrfs_ref_generation(leaf, ref);
3487 if (level < 0) {
3488 /* special reference from the tree log */
3489 ref_path->nodes[0] = found_key.offset;
3490 ref_path->current_level = 0;
3491 }
3346 ret = 0; 3492 ret = 0;
3347 goto out; 3493 goto out;
3348 } 3494 }
3349 ret = btrfs_search_root(root->fs_info->tree_root,
3350 root_search_start, &root_search_start);
3351 if (ret)
3352 break;
3353 3495
3354 root_location.objectid = root_search_start; 3496 level++;
3355 cur_root = btrfs_read_fs_root_no_name(root->fs_info, 3497 BUG_ON(ref_path->nodes[level] != 0);
3356 &root_location); 3498 ref_path->nodes[level] = found_key.offset;
3357 if (!cur_root) { 3499 ref_path->current_level = level;
3358 ret = 1; 3500
3359 break; 3501 /*
3502 * the reference was created in the running transaction,
3503 * no need to continue walking up.
3504 */
3505 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
3506 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
3507 ref_path->root_generation =
3508 btrfs_ref_generation(leaf, ref);
3509 ret = 0;
3510 goto out;
3511 }
3512
3513 btrfs_release_path(extent_root, path);
3514 if (need_resched()) {
3515 mutex_unlock(&extent_root->fs_info->alloc_mutex);
3516 cond_resched();
3517 mutex_lock(&extent_root->fs_info->alloc_mutex);
3360 } 3518 }
3361 } 3519 }
3520 /* reached max tree level, but no tree root found. */
3521 BUG();
3362out: 3522out:
3363 path->lowest_level = 0; 3523 mutex_unlock(&extent_root->fs_info->alloc_mutex);
3524 btrfs_free_path(path);
3364 return ret; 3525 return ret;
3365} 3526}
3366 3527
3367/* 3528static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
3368 * note, this releases the path 3529 struct btrfs_root *extent_root,
3369 */ 3530 struct btrfs_ref_path *ref_path,
3370static int noinline relocate_one_reference(struct btrfs_root *extent_root, 3531 u64 extent_start)
3371 struct btrfs_path *path, 3532{
3372 struct btrfs_key *extent_key, 3533 memset(ref_path, 0, sizeof(*ref_path));
3373 u64 *last_file_objectid, 3534 ref_path->extent_start = extent_start;
3374 u64 *last_file_offset, 3535
3375 u64 *last_file_root, 3536 return __next_ref_path(trans, extent_root, ref_path, 1);
3376 u64 last_extent) 3537}
3377{ 3538
3378 struct inode *inode; 3539static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
3379 struct btrfs_root *found_root; 3540 struct btrfs_root *extent_root,
3380 struct btrfs_key root_location; 3541 struct btrfs_ref_path *ref_path)
3542{
3543 return __next_ref_path(trans, extent_root, ref_path, 0);
3544}
3545
3546static int noinline get_new_locations(struct inode *reloc_inode,
3547 struct btrfs_key *extent_key,
3548 u64 offset, int no_fragment,
3549 struct disk_extent **extents,
3550 int *nr_extents)
3551{
3552 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
3553 struct btrfs_path *path;
3554 struct btrfs_file_extent_item *fi;
3555 struct extent_buffer *leaf;
3556 struct disk_extent *exts = *extents;
3381 struct btrfs_key found_key; 3557 struct btrfs_key found_key;
3382 struct btrfs_extent_ref *ref; 3558 u64 cur_pos;
3383 u64 ref_root; 3559 u64 last_byte;
3384 u64 ref_gen; 3560 u32 nritems;
3385 u64 ref_objectid; 3561 int nr = 0;
3386 u64 ref_offset; 3562 int max = *nr_extents;
3387 int ret; 3563 int ret;
3388 int level;
3389 3564
3390 WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); 3565 WARN_ON(!no_fragment && *extents);
3566 if (!exts) {
3567 max = 1;
3568 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
3569 if (!exts)
3570 return -ENOMEM;
3571 }
3391 3572
3392 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 3573 path = btrfs_alloc_path();
3393 struct btrfs_extent_ref); 3574 BUG_ON(!path);
3394 ref_root = btrfs_ref_root(path->nodes[0], ref);
3395 ref_gen = btrfs_ref_generation(path->nodes[0], ref);
3396 ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
3397 ref_offset = btrfs_ref_offset(path->nodes[0], ref);
3398 btrfs_release_path(extent_root, path);
3399 3575
3400 root_location.objectid = ref_root; 3576 cur_pos = extent_key->objectid - offset;
3401 if (ref_gen == 0) 3577 last_byte = extent_key->objectid + extent_key->offset;
3402 root_location.offset = 0; 3578 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
3403 else 3579 cur_pos, 0);
3404 root_location.offset = (u64)-1; 3580 if (ret < 0)
3405 root_location.type = BTRFS_ROOT_ITEM_KEY; 3581 goto out;
3582 if (ret > 0) {
3583 ret = -ENOENT;
3584 goto out;
3585 }
3406 3586
3407 found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, 3587 while (1) {
3408 &root_location); 3588 leaf = path->nodes[0];
3409 BUG_ON(!found_root); 3589 nritems = btrfs_header_nritems(leaf);
3410 mutex_unlock(&extent_root->fs_info->alloc_mutex); 3590 if (path->slots[0] >= nritems) {
3591 ret = btrfs_next_leaf(root, path);
3592 if (ret < 0)
3593 goto out;
3594 if (ret > 0)
3595 break;
3596 leaf = path->nodes[0];
3597 }
3411 3598
3412 if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 3599 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3413 found_key.objectid = ref_objectid; 3600 if (found_key.offset != cur_pos ||
3414 found_key.type = BTRFS_EXTENT_DATA_KEY; 3601 found_key.type != BTRFS_EXTENT_DATA_KEY ||
3415 found_key.offset = ref_offset; 3602 found_key.objectid != reloc_inode->i_ino)
3416 level = 0; 3603 break;
3417 3604
3418 if (last_extent == extent_key->objectid && 3605 fi = btrfs_item_ptr(leaf, path->slots[0],
3419 *last_file_objectid == ref_objectid && 3606 struct btrfs_file_extent_item);
3420 *last_file_offset == ref_offset && 3607 if (btrfs_file_extent_type(leaf, fi) !=
3421 *last_file_root == ref_root) 3608 BTRFS_FILE_EXTENT_REG ||
3422 goto out; 3609 btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
3610 break;
3423 3611
3424 ret = find_root_for_ref(extent_root, path, &found_key, 3612 if (nr == max) {
3425 level, 1, &found_root, 3613 struct disk_extent *old = exts;
3426 extent_key->objectid); 3614 max *= 2;
3615 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
3616 memcpy(exts, old, sizeof(*exts) * nr);
3617 if (old != *extents)
3618 kfree(old);
3619 }
3427 3620
3428 if (ret) 3621 exts[nr].disk_bytenr =
3622 btrfs_file_extent_disk_bytenr(leaf, fi);
3623 exts[nr].disk_num_bytes =
3624 btrfs_file_extent_disk_num_bytes(leaf, fi);
3625 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
3626 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
3627 WARN_ON(exts[nr].offset > 0);
3628 WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
3629
3630 cur_pos += exts[nr].num_bytes;
3631 nr++;
3632
3633 if (cur_pos + offset >= last_byte)
3634 break;
3635
3636 if (no_fragment) {
3637 ret = 1;
3429 goto out; 3638 goto out;
3639 }
3640 path->slots[0]++;
3641 }
3430 3642
3431 if (last_extent == extent_key->objectid && 3643 WARN_ON(cur_pos + offset > last_byte);
3432 *last_file_objectid == ref_objectid && 3644 if (cur_pos + offset < last_byte) {
3433 *last_file_offset == ref_offset && 3645 ret = -ENOENT;
3434 *last_file_root == ref_root) 3646 goto out;
3647 }
3648 ret = 0;
3649out:
3650 btrfs_free_path(path);
3651 if (ret) {
3652 if (exts != *extents)
3653 kfree(exts);
3654 } else {
3655 *extents = exts;
3656 *nr_extents = nr;
3657 }
3658 return ret;
3659}
3660
3661static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
3662 struct btrfs_root *root,
3663 struct btrfs_path *path,
3664 struct btrfs_key *extent_key,
3665 struct btrfs_key *leaf_key,
3666 struct btrfs_ref_path *ref_path,
3667 struct disk_extent *new_extents,
3668 int nr_extents)
3669{
3670 struct extent_buffer *leaf;
3671 struct btrfs_file_extent_item *fi;
3672 struct inode *inode = NULL;
3673 struct btrfs_key key;
3674 u64 lock_start = 0;
3675 u64 lock_end = 0;
3676 u64 num_bytes;
3677 u64 ext_offset;
3678 u64 first_pos;
3679 u32 nritems;
3680 int extent_locked = 0;
3681 int ret;
3682
3683 first_pos = ref_path->owner_offset;
3684 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
3685 key.objectid = ref_path->owner_objectid;
3686 key.offset = ref_path->owner_offset;
3687 key.type = BTRFS_EXTENT_DATA_KEY;
3688 } else {
3689 memcpy(&key, leaf_key, sizeof(key));
3690 }
3691
3692 while (1) {
3693 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3694 if (ret < 0)
3435 goto out; 3695 goto out;
3436 3696
3437 inode = btrfs_iget_locked(extent_root->fs_info->sb, 3697 leaf = path->nodes[0];
3438 ref_objectid, found_root); 3698 nritems = btrfs_header_nritems(leaf);
3439 if (inode->i_state & I_NEW) { 3699next:
3440 /* the inode and parent dir are two different roots */ 3700 if (extent_locked && ret > 0) {
3441 BTRFS_I(inode)->root = found_root; 3701 /*
3442 BTRFS_I(inode)->location.objectid = ref_objectid; 3702 * the file extent item was modified by someone
3443 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; 3703 * before the extent got locked.
3444 BTRFS_I(inode)->location.offset = 0; 3704 */
3445 btrfs_read_locked_inode(inode); 3705 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
3446 unlock_new_inode(inode); 3706 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
3707 lock_end, GFP_NOFS);
3708 extent_locked = 0;
3709 }
3710
3711 if (path->slots[0] >= nritems) {
3712 if (ref_path->owner_objectid ==
3713 BTRFS_MULTIPLE_OBJECTIDS)
3714 break;
3715
3716 BUG_ON(extent_locked);
3717 ret = btrfs_next_leaf(root, path);
3718 if (ret < 0)
3719 goto out;
3720 if (ret > 0)
3721 break;
3722 leaf = path->nodes[0];
3723 nritems = btrfs_header_nritems(leaf);
3724 }
3725
3726 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3447 3727
3728 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
3729 if ((key.objectid > ref_path->owner_objectid) ||
3730 (key.objectid == ref_path->owner_objectid &&
3731 key.type > BTRFS_EXTENT_DATA_KEY) ||
3732 (key.offset >= first_pos + extent_key->offset))
3733 break;
3448 } 3734 }
3449 /* this can happen if the reference is not against
3450 * the latest version of the tree root
3451 */
3452 if (is_bad_inode(inode))
3453 goto out;
3454 3735
3455 *last_file_objectid = inode->i_ino; 3736 if (inode && key.objectid != inode->i_ino) {
3456 *last_file_root = found_root->root_key.objectid; 3737 BUG_ON(extent_locked);
3457 *last_file_offset = ref_offset; 3738 btrfs_release_path(root, path);
3739 mutex_unlock(&inode->i_mutex);
3740 iput(inode);
3741 inode = NULL;
3742 continue;
3743 }
3458 3744
3459 relocate_inode_pages(inode, ref_offset, extent_key->offset); 3745 if (key.type != BTRFS_EXTENT_DATA_KEY) {
3460 iput(inode); 3746 path->slots[0]++;
3461 } else { 3747 ret = 1;
3462 struct btrfs_trans_handle *trans; 3748 goto next;
3463 struct extent_buffer *eb; 3749 }
3464 int needs_lock = 0; 3750 fi = btrfs_item_ptr(leaf, path->slots[0],
3751 struct btrfs_file_extent_item);
3752 if ((btrfs_file_extent_type(leaf, fi) !=
3753 BTRFS_FILE_EXTENT_REG) ||
3754 (btrfs_file_extent_disk_bytenr(leaf, fi) !=
3755 extent_key->objectid)) {
3756 path->slots[0]++;
3757 ret = 1;
3758 goto next;
3759 }
3465 3760
3466 eb = read_tree_block(found_root, extent_key->objectid, 3761 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
3467 extent_key->offset, 0); 3762 ext_offset = btrfs_file_extent_offset(leaf, fi);
3468 btrfs_tree_lock(eb);
3469 level = btrfs_header_level(eb);
3470 3763
3471 if (level == 0) 3764 if (first_pos > key.offset - ext_offset)
3472 btrfs_item_key_to_cpu(eb, &found_key, 0); 3765 first_pos = key.offset - ext_offset;
3473 else
3474 btrfs_node_key_to_cpu(eb, &found_key, 0);
3475 3766
3476 btrfs_tree_unlock(eb); 3767 if (!extent_locked) {
3477 free_extent_buffer(eb); 3768 lock_start = key.offset;
3769 lock_end = lock_start + num_bytes - 1;
3770 } else {
3771 BUG_ON(lock_start != key.offset);
3772 BUG_ON(lock_end - lock_start + 1 < num_bytes);
3773 }
3774
3775 if (!inode) {
3776 btrfs_release_path(root, path);
3777
3778 inode = btrfs_iget_locked(root->fs_info->sb,
3779 key.objectid, root);
3780 if (inode->i_state & I_NEW) {
3781 BTRFS_I(inode)->root = root;
3782 BTRFS_I(inode)->location.objectid =
3783 key.objectid;
3784 BTRFS_I(inode)->location.type =
3785 BTRFS_INODE_ITEM_KEY;
3786 BTRFS_I(inode)->location.offset = 0;
3787 btrfs_read_locked_inode(inode);
3788 unlock_new_inode(inode);
3789 }
3790 /*
3791 * some code call btrfs_commit_transaction while
3792 * holding the i_mutex, so we can't use mutex_lock
3793 * here.
3794 */
3795 if (is_bad_inode(inode) ||
3796 !mutex_trylock(&inode->i_mutex)) {
3797 iput(inode);
3798 inode = NULL;
3799 key.offset = (u64)-1;
3800 goto skip;
3801 }
3802 }
3803
3804 if (!extent_locked) {
3805 struct btrfs_ordered_extent *ordered;
3806
3807 btrfs_release_path(root, path);
3478 3808
3479 ret = find_root_for_ref(extent_root, path, &found_key, 3809 lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
3480 level, 0, &found_root, 3810 lock_end, GFP_NOFS);
3481 extent_key->objectid); 3811 ordered = btrfs_lookup_first_ordered_extent(inode,
3812 lock_end);
3813 if (ordered &&
3814 ordered->file_offset <= lock_end &&
3815 ordered->file_offset + ordered->len > lock_start) {
3816 unlock_extent(&BTRFS_I(inode)->io_tree,
3817 lock_start, lock_end, GFP_NOFS);
3818 btrfs_start_ordered_extent(inode, ordered, 1);
3819 btrfs_put_ordered_extent(ordered);
3820 key.offset += num_bytes;
3821 goto skip;
3822 }
3823 if (ordered)
3824 btrfs_put_ordered_extent(ordered);
3825
3826 mutex_lock(&BTRFS_I(inode)->extent_mutex);
3827 extent_locked = 1;
3828 continue;
3829 }
3482 3830
3831 if (nr_extents == 1) {
3832 /* update extent pointer in place */
3833 btrfs_set_file_extent_generation(leaf, fi,
3834 trans->transid);
3835 btrfs_set_file_extent_disk_bytenr(leaf, fi,
3836 new_extents[0].disk_bytenr);
3837 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
3838 new_extents[0].disk_num_bytes);
3839 ext_offset += new_extents[0].offset;
3840 btrfs_set_file_extent_offset(leaf, fi, ext_offset);
3841 btrfs_mark_buffer_dirty(leaf);
3842
3843 btrfs_drop_extent_cache(inode, key.offset,
3844 key.offset + num_bytes - 1, 0);
3845
3846 ret = btrfs_inc_extent_ref(trans, root,
3847 new_extents[0].disk_bytenr,
3848 new_extents[0].disk_num_bytes,
3849 leaf->start,
3850 root->root_key.objectid,
3851 trans->transid,
3852 key.objectid, key.offset);
3853 BUG_ON(ret);
3854
3855 ret = btrfs_free_extent(trans, root,
3856 extent_key->objectid,
3857 extent_key->offset,
3858 leaf->start,
3859 btrfs_header_owner(leaf),
3860 btrfs_header_generation(leaf),
3861 key.objectid, key.offset, 0);
3862 BUG_ON(ret);
3863
3864 btrfs_release_path(root, path);
3865 key.offset += num_bytes;
3866 } else {
3867 u64 alloc_hint;
3868 u64 extent_len;
3869 int i;
3870 /*
3871 * drop old extent pointer at first, then insert the
3872 * new pointers one bye one
3873 */
3874 btrfs_release_path(root, path);
3875 ret = btrfs_drop_extents(trans, root, inode, key.offset,
3876 key.offset + num_bytes,
3877 key.offset, &alloc_hint);
3878 BUG_ON(ret);
3879
3880 for (i = 0; i < nr_extents; i++) {
3881 if (ext_offset >= new_extents[i].num_bytes) {
3882 ext_offset -= new_extents[i].num_bytes;
3883 continue;
3884 }
3885 extent_len = min(new_extents[i].num_bytes -
3886 ext_offset, num_bytes);
3887
3888 ret = btrfs_insert_empty_item(trans, root,
3889 path, &key,
3890 sizeof(*fi));
3891 BUG_ON(ret);
3892
3893 leaf = path->nodes[0];
3894 fi = btrfs_item_ptr(leaf, path->slots[0],
3895 struct btrfs_file_extent_item);
3896 btrfs_set_file_extent_generation(leaf, fi,
3897 trans->transid);
3898 btrfs_set_file_extent_type(leaf, fi,
3899 BTRFS_FILE_EXTENT_REG);
3900 btrfs_set_file_extent_disk_bytenr(leaf, fi,
3901 new_extents[i].disk_bytenr);
3902 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
3903 new_extents[i].disk_num_bytes);
3904 btrfs_set_file_extent_num_bytes(leaf, fi,
3905 extent_len);
3906 ext_offset += new_extents[i].offset;
3907 btrfs_set_file_extent_offset(leaf, fi,
3908 ext_offset);
3909 btrfs_mark_buffer_dirty(leaf);
3910
3911 btrfs_drop_extent_cache(inode, key.offset,
3912 key.offset + extent_len - 1, 0);
3913
3914 ret = btrfs_inc_extent_ref(trans, root,
3915 new_extents[i].disk_bytenr,
3916 new_extents[i].disk_num_bytes,
3917 leaf->start,
3918 root->root_key.objectid,
3919 trans->transid,
3920 key.objectid, key.offset);
3921 BUG_ON(ret);
3922 btrfs_release_path(root, path);
3923
3924 inode->i_blocks += extent_len >> 9;
3925
3926 ext_offset = 0;
3927 num_bytes -= extent_len;
3928 key.offset += extent_len;
3929
3930 if (num_bytes == 0)
3931 break;
3932 }
3933 BUG_ON(i >= nr_extents);
3934 }
3935
3936 if (extent_locked) {
3937 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
3938 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
3939 lock_end, GFP_NOFS);
3940 extent_locked = 0;
3941 }
3942skip:
3943 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
3944 key.offset >= first_pos + extent_key->offset)
3945 break;
3946
3947 cond_resched();
3948 }
3949 ret = 0;
3950out:
3951 btrfs_release_path(root, path);
3952 if (inode) {
3953 mutex_unlock(&inode->i_mutex);
3954 if (extent_locked) {
3955 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
3956 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
3957 lock_end, GFP_NOFS);
3958 }
3959 iput(inode);
3960 }
3961 return ret;
3962}
3963
3964int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
3965 u64 num_bytes, u64 new_bytenr)
3966{
3967 set_extent_bits(&root->fs_info->reloc_mapping_tree,
3968 orig_bytenr, orig_bytenr + num_bytes - 1,
3969 EXTENT_LOCKED, GFP_NOFS);
3970 set_state_private(&root->fs_info->reloc_mapping_tree,
3971 orig_bytenr, new_bytenr);
3972 return 0;
3973}
3974
3975int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
3976 u64 num_bytes, u64 *new_bytenr)
3977{
3978 u64 bytenr;
3979 u64 cur_bytenr = orig_bytenr;
3980 u64 prev_bytenr = orig_bytenr;
3981 int ret;
3982
3983 while (1) {
3984 ret = get_state_private(&root->fs_info->reloc_mapping_tree,
3985 cur_bytenr, &bytenr);
3483 if (ret) 3986 if (ret)
3484 goto out; 3987 break;
3988 prev_bytenr = cur_bytenr;
3989 cur_bytenr = bytenr;
3990 }
3485 3991
3486 /* 3992 if (orig_bytenr == cur_bytenr)
3487 * right here almost anything could happen to our key, 3993 return -ENOENT;
3488 * but that's ok. The cow below will either relocate it
3489 * or someone else will have relocated it. Either way,
3490 * it is in a different spot than it was before and
3491 * we're happy.
3492 */
3493 3994
3494 trans = btrfs_start_transaction(found_root, 1); 3995 if (prev_bytenr != orig_bytenr) {
3996 set_state_private(&root->fs_info->reloc_mapping_tree,
3997 orig_bytenr, cur_bytenr);
3998 }
3999 *new_bytenr = cur_bytenr;
4000 return 0;
4001}
3495 4002
3496 if (found_root == extent_root->fs_info->extent_root || 4003void btrfs_free_reloc_mappings(struct btrfs_root *root)
3497 found_root == extent_root->fs_info->chunk_root || 4004{
3498 found_root == extent_root->fs_info->dev_root) { 4005 clear_extent_bits(&root->fs_info->reloc_mapping_tree,
3499 needs_lock = 1; 4006 0, (u64)-1, -1, GFP_NOFS);
3500 mutex_lock(&extent_root->fs_info->alloc_mutex); 4007}
4008
4009int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
4010 struct btrfs_root *root,
4011 struct extent_buffer *buf, u64 orig_start)
4012{
4013 int level;
4014 int ret;
4015
4016 BUG_ON(btrfs_header_generation(buf) != trans->transid);
4017 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
4018
4019 level = btrfs_header_level(buf);
4020 if (level == 0) {
4021 struct btrfs_leaf_ref *ref;
4022 struct btrfs_leaf_ref *orig_ref;
4023
4024 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
4025 if (!orig_ref)
4026 return -ENOENT;
4027
4028 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
4029 if (!ref) {
4030 btrfs_free_leaf_ref(root, orig_ref);
4031 return -ENOMEM;
3501 } 4032 }
3502 4033
3503 path->lowest_level = level; 4034 ref->nritems = orig_ref->nritems;
3504 path->reada = 2; 4035 memcpy(ref->extents, orig_ref->extents,
3505 ret = btrfs_search_slot(trans, found_root, &found_key, path, 4036 sizeof(ref->extents[0]) * ref->nritems);
3506 0, 1); 4037
4038 btrfs_free_leaf_ref(root, orig_ref);
4039
4040 ref->root_gen = trans->transid;
4041 ref->bytenr = buf->start;
4042 ref->owner = btrfs_header_owner(buf);
4043 ref->generation = btrfs_header_generation(buf);
4044 ret = btrfs_add_leaf_ref(root, ref, 0);
4045 WARN_ON(ret);
4046 btrfs_free_leaf_ref(root, ref);
4047 }
4048 return 0;
4049}
4050
4051static int noinline invalidate_extent_cache(struct btrfs_root *root,
4052 struct extent_buffer *leaf,
4053 struct btrfs_block_group_cache *group,
4054 struct btrfs_root *target_root)
4055{
4056 struct btrfs_key key;
4057 struct inode *inode = NULL;
4058 struct btrfs_file_extent_item *fi;
4059 u64 num_bytes;
4060 u64 skip_objectid = 0;
4061 u32 nritems;
4062 u32 i;
4063
4064 nritems = btrfs_header_nritems(leaf);
4065 for (i = 0; i < nritems; i++) {
4066 btrfs_item_key_to_cpu(leaf, &key, i);
4067 if (key.objectid == skip_objectid ||
4068 key.type != BTRFS_EXTENT_DATA_KEY)
4069 continue;
4070 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4071 if (btrfs_file_extent_type(leaf, fi) ==
4072 BTRFS_FILE_EXTENT_INLINE)
4073 continue;
4074 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
4075 continue;
4076 if (!inode || inode->i_ino != key.objectid) {
4077 iput(inode);
4078 inode = btrfs_ilookup(target_root->fs_info->sb,
4079 key.objectid, target_root, 1);
4080 }
4081 if (!inode) {
4082 skip_objectid = key.objectid;
4083 continue;
4084 }
4085 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4086
4087 lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4088 key.offset + num_bytes - 1, GFP_NOFS);
4089 mutex_lock(&BTRFS_I(inode)->extent_mutex);
4090 btrfs_drop_extent_cache(inode, key.offset,
4091 key.offset + num_bytes - 1, 1);
4092 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
4093 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4094 key.offset + num_bytes - 1, GFP_NOFS);
4095 cond_resched();
4096 }
4097 iput(inode);
4098 return 0;
4099}
4100
4101static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans,
4102 struct btrfs_root *root,
4103 struct extent_buffer *leaf,
4104 struct btrfs_block_group_cache *group,
4105 struct inode *reloc_inode)
4106{
4107 struct btrfs_key key;
4108 struct btrfs_key extent_key;
4109 struct btrfs_file_extent_item *fi;
4110 struct btrfs_leaf_ref *ref;
4111 struct disk_extent *new_extent;
4112 u64 bytenr;
4113 u64 num_bytes;
4114 u32 nritems;
4115 u32 i;
4116 int ext_index;
4117 int nr_extent;
4118 int ret;
4119
4120 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
4121 BUG_ON(!new_extent);
4122
4123 ref = btrfs_lookup_leaf_ref(root, leaf->start);
4124 BUG_ON(!ref);
4125
4126 ext_index = -1;
4127 nritems = btrfs_header_nritems(leaf);
4128 for (i = 0; i < nritems; i++) {
4129 btrfs_item_key_to_cpu(leaf, &key, i);
4130 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
4131 continue;
4132 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4133 if (btrfs_file_extent_type(leaf, fi) ==
4134 BTRFS_FILE_EXTENT_INLINE)
4135 continue;
4136 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4137 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4138 if (bytenr == 0)
4139 continue;
4140
4141 ext_index++;
4142 if (bytenr >= group->key.objectid + group->key.offset ||
4143 bytenr + num_bytes <= group->key.objectid)
4144 continue;
4145
4146 extent_key.objectid = bytenr;
4147 extent_key.offset = num_bytes;
4148 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4149 nr_extent = 1;
4150 ret = get_new_locations(reloc_inode, &extent_key,
4151 group->key.objectid, 1,
4152 &new_extent, &nr_extent);
4153 if (ret > 0)
4154 continue;
4155 BUG_ON(ret < 0);
4156
4157 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
4158 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
4159 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
4160 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
4161
4162 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
4163 btrfs_set_file_extent_disk_bytenr(leaf, fi,
4164 new_extent->disk_bytenr);
4165 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4166 new_extent->disk_num_bytes);
4167 new_extent->offset += btrfs_file_extent_offset(leaf, fi);
4168 btrfs_set_file_extent_offset(leaf, fi, new_extent->offset);
4169 btrfs_mark_buffer_dirty(leaf);
4170
4171 ret = btrfs_inc_extent_ref(trans, root,
4172 new_extent->disk_bytenr,
4173 new_extent->disk_num_bytes,
4174 leaf->start,
4175 root->root_key.objectid,
4176 trans->transid,
4177 key.objectid, key.offset);
4178 BUG_ON(ret);
4179 ret = btrfs_free_extent(trans, root,
4180 bytenr, num_bytes, leaf->start,
4181 btrfs_header_owner(leaf),
4182 btrfs_header_generation(leaf),
4183 key.objectid, key.offset, 0);
4184 BUG_ON(ret);
4185 cond_resched();
4186 }
4187 kfree(new_extent);
4188 BUG_ON(ext_index + 1 != ref->nritems);
4189 btrfs_free_leaf_ref(root, ref);
4190 return 0;
4191}
4192
4193int btrfs_free_reloc_root(struct btrfs_root *root)
4194{
4195 struct btrfs_root *reloc_root;
4196
4197 if (root->reloc_root) {
4198 reloc_root = root->reloc_root;
4199 root->reloc_root = NULL;
4200 list_add(&reloc_root->dead_list,
4201 &root->fs_info->dead_reloc_roots);
4202 }
4203 return 0;
4204}
4205
4206int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
4207{
4208 struct btrfs_trans_handle *trans;
4209 struct btrfs_root *reloc_root;
4210 struct btrfs_root *prev_root = NULL;
4211 struct list_head dead_roots;
4212 int ret;
4213 unsigned long nr;
4214
4215 INIT_LIST_HEAD(&dead_roots);
4216 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
4217
4218 while (!list_empty(&dead_roots)) {
4219 reloc_root = list_entry(dead_roots.prev,
4220 struct btrfs_root, dead_list);
4221 list_del_init(&reloc_root->dead_list);
4222
4223 BUG_ON(reloc_root->commit_root != NULL);
4224 while (1) {
4225 trans = btrfs_join_transaction(root, 1);
4226 BUG_ON(!trans);
4227
4228 mutex_lock(&root->fs_info->drop_mutex);
4229 ret = btrfs_drop_snapshot(trans, reloc_root);
4230 if (ret != -EAGAIN)
4231 break;
4232 mutex_unlock(&root->fs_info->drop_mutex);
4233
4234 nr = trans->blocks_used;
4235 ret = btrfs_end_transaction(trans, root);
4236 BUG_ON(ret);
4237 btrfs_btree_balance_dirty(root, nr);
4238 }
4239
4240 free_extent_buffer(reloc_root->node);
4241
4242 ret = btrfs_del_root(trans, root->fs_info->tree_root,
4243 &reloc_root->root_key);
4244 BUG_ON(ret);
4245 mutex_unlock(&root->fs_info->drop_mutex);
4246
4247 nr = trans->blocks_used;
4248 ret = btrfs_end_transaction(trans, root);
4249 BUG_ON(ret);
4250 btrfs_btree_balance_dirty(root, nr);
4251
4252 kfree(prev_root);
4253 prev_root = reloc_root;
4254 }
4255 if (prev_root) {
4256 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
4257 kfree(prev_root);
4258 }
4259 return 0;
4260}
4261
4262int btrfs_add_dead_reloc_root(struct btrfs_root *root)
4263{
4264 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
4265 return 0;
4266}
4267
4268int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
4269{
4270 struct btrfs_root *reloc_root;
4271 struct btrfs_trans_handle *trans;
4272 struct btrfs_key location;
4273 int found;
4274 int ret;
4275
4276 mutex_lock(&root->fs_info->tree_reloc_mutex);
4277 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
4278 BUG_ON(ret);
4279 found = !list_empty(&root->fs_info->dead_reloc_roots);
4280 mutex_unlock(&root->fs_info->tree_reloc_mutex);
4281
4282 if (found) {
4283 trans = btrfs_start_transaction(root, 1);
4284 BUG_ON(!trans);
4285 ret = btrfs_commit_transaction(trans, root);
4286 BUG_ON(ret);
4287 }
4288
4289 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
4290 location.offset = (u64)-1;
4291 location.type = BTRFS_ROOT_ITEM_KEY;
4292
4293 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
4294 BUG_ON(!reloc_root);
4295 btrfs_orphan_cleanup(reloc_root);
4296 return 0;
4297}
4298
4299static int noinline init_reloc_tree(struct btrfs_trans_handle *trans,
4300 struct btrfs_root *root)
4301{
4302 struct btrfs_root *reloc_root;
4303 struct extent_buffer *eb;
4304 struct btrfs_root_item *root_item;
4305 struct btrfs_key root_key;
4306 int ret;
4307
4308 BUG_ON(!root->ref_cows);
4309 if (root->reloc_root)
4310 return 0;
4311
4312 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
4313 BUG_ON(!root_item);
4314
4315 ret = btrfs_copy_root(trans, root, root->commit_root,
4316 &eb, BTRFS_TREE_RELOC_OBJECTID);
4317 BUG_ON(ret);
4318
4319 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4320 root_key.offset = root->root_key.objectid;
4321 root_key.type = BTRFS_ROOT_ITEM_KEY;
4322
4323 memcpy(root_item, &root->root_item, sizeof(root_item));
4324 btrfs_set_root_refs(root_item, 0);
4325 btrfs_set_root_bytenr(root_item, eb->start);
4326 btrfs_set_root_level(root_item, btrfs_header_level(eb));
4327 memset(&root_item->drop_progress, 0, sizeof(root_item->drop_progress));
4328 root_item->drop_level = 0;
4329
4330 btrfs_tree_unlock(eb);
4331 free_extent_buffer(eb);
4332
4333 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
4334 &root_key, root_item);
4335 BUG_ON(ret);
4336 kfree(root_item);
4337
4338 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
4339 &root_key);
4340 BUG_ON(!reloc_root);
4341 reloc_root->last_trans = trans->transid;
4342 reloc_root->commit_root = NULL;
4343 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
4344
4345 root->reloc_root = reloc_root;
4346 return 0;
4347}
4348
4349/*
4350 * Core function of space balance.
4351 *
4352 * The idea is using reloc trees to relocate tree blocks in reference
4353 * counted roots. There is one reloc tree for each subvol, all reloc
4354 * trees share same key objectid. Reloc trees are snapshots of the
4355 * latest committed roots (subvol root->commit_root). To relocate a tree
4356 * block referenced by a subvol, the code COW the block through the reloc
4357 * tree, then update pointer in the subvol to point to the new block.
4358 * Since all reloc trees share same key objectid, we can easily do special
4359 * handing to share tree blocks between reloc trees. Once a tree block has
4360 * been COWed in one reloc tree, we can use the result when the same block
4361 * is COWed again through other reloc trees.
4362 */
4363static int noinline relocate_one_path(struct btrfs_trans_handle *trans,
4364 struct btrfs_root *root,
4365 struct btrfs_path *path,
4366 struct btrfs_key *first_key,
4367 struct btrfs_ref_path *ref_path,
4368 struct btrfs_block_group_cache *group,
4369 struct inode *reloc_inode)
4370{
4371 struct btrfs_root *reloc_root;
4372 struct extent_buffer *eb = NULL;
4373 struct btrfs_key *keys;
4374 u64 *nodes;
4375 int level;
4376 int lowest_merge;
4377 int lowest_level = 0;
4378 int update_refs;
4379 int ret;
4380
4381 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
4382 lowest_level = ref_path->owner_objectid;
4383
4384 if (is_cowonly_root(ref_path->root_objectid)) {
4385 path->lowest_level = lowest_level;
4386 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
4387 BUG_ON(ret < 0);
3507 path->lowest_level = 0; 4388 path->lowest_level = 0;
3508 btrfs_release_path(found_root, path); 4389 btrfs_release_path(root, path);
4390 return 0;
4391 }
3509 4392
3510 if (found_root == found_root->fs_info->extent_root) 4393 keys = kzalloc(sizeof(*keys) * BTRFS_MAX_LEVEL, GFP_NOFS);
3511 btrfs_extent_post_op(trans, found_root); 4394 BUG_ON(!keys);
3512 if (needs_lock) 4395 nodes = kzalloc(sizeof(*nodes) * BTRFS_MAX_LEVEL, GFP_NOFS);
3513 mutex_unlock(&extent_root->fs_info->alloc_mutex); 4396 BUG_ON(!nodes);
4397
4398 mutex_lock(&root->fs_info->tree_reloc_mutex);
4399 ret = init_reloc_tree(trans, root);
4400 BUG_ON(ret);
4401 reloc_root = root->reloc_root;
4402
4403 path->lowest_level = lowest_level;
4404 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 0);
4405 BUG_ON(ret);
4406 /*
4407 * get relocation mapping for tree blocks in the path
4408 */
4409 lowest_merge = BTRFS_MAX_LEVEL;
4410 for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) {
4411 u64 new_bytenr;
4412 eb = path->nodes[level];
4413 if (!eb || eb == reloc_root->node)
4414 continue;
4415 ret = btrfs_get_reloc_mapping(reloc_root, eb->start, eb->len,
4416 &new_bytenr);
4417 if (ret)
4418 continue;
4419 if (level == 0)
4420 btrfs_item_key_to_cpu(eb, &keys[level], 0);
4421 else
4422 btrfs_node_key_to_cpu(eb, &keys[level], 0);
4423 nodes[level] = new_bytenr;
4424 lowest_merge = level;
4425 }
3514 4426
3515 btrfs_end_transaction(trans, found_root); 4427 update_refs = 0;
4428 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4429 eb = path->nodes[0];
4430 if (btrfs_header_generation(eb) < trans->transid)
4431 update_refs = 1;
4432 }
3516 4433
4434 btrfs_release_path(reloc_root, path);
4435 /*
4436 * merge tree blocks that already relocated in other reloc trees
4437 */
4438 if (lowest_merge != BTRFS_MAX_LEVEL) {
4439 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
4440 lowest_merge);
4441 BUG_ON(ret < 0);
3517 } 4442 }
3518out: 4443 /*
3519 mutex_lock(&extent_root->fs_info->alloc_mutex); 4444 * cow any tree blocks that still haven't been relocated
4445 */
4446 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 1);
4447 BUG_ON(ret);
4448 /*
4449 * if we are relocating data block group, update extent pointers
4450 * in the newly created tree leaf.
4451 */
4452 eb = path->nodes[0];
4453 if (update_refs && nodes[0] != eb->start) {
4454 ret = replace_extents_in_leaf(trans, reloc_root, eb, group,
4455 reloc_inode);
4456 BUG_ON(ret);
4457 }
4458
4459 memset(keys, 0, sizeof(*keys) * BTRFS_MAX_LEVEL);
4460 memset(nodes, 0, sizeof(*nodes) * BTRFS_MAX_LEVEL);
4461 for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) {
4462 eb = path->nodes[level];
4463 if (!eb || eb == reloc_root->node)
4464 continue;
4465 BUG_ON(btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID);
4466 nodes[level] = eb->start;
4467 if (level == 0)
4468 btrfs_item_key_to_cpu(eb, &keys[level], 0);
4469 else
4470 btrfs_node_key_to_cpu(eb, &keys[level], 0);
4471 }
4472
4473 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4474 eb = path->nodes[0];
4475 extent_buffer_get(eb);
4476 }
4477 btrfs_release_path(reloc_root, path);
4478 /*
4479 * replace tree blocks in the fs tree with tree blocks in
4480 * the reloc tree.
4481 */
4482 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
4483 BUG_ON(ret < 0);
4484
4485 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4486 ret = invalidate_extent_cache(reloc_root, eb, group, root);
4487 BUG_ON(ret);
4488 free_extent_buffer(eb);
4489 }
4490 mutex_unlock(&root->fs_info->tree_reloc_mutex);
4491
4492 path->lowest_level = 0;
4493 kfree(nodes);
4494 kfree(keys);
4495 return 0;
4496}
4497
4498static int noinline relocate_tree_block(struct btrfs_trans_handle *trans,
4499 struct btrfs_root *root,
4500 struct btrfs_path *path,
4501 struct btrfs_key *first_key,
4502 struct btrfs_ref_path *ref_path)
4503{
4504 int ret;
4505 int needs_lock = 0;
4506
4507 if (root == root->fs_info->extent_root ||
4508 root == root->fs_info->chunk_root ||
4509 root == root->fs_info->dev_root) {
4510 needs_lock = 1;
4511 mutex_lock(&root->fs_info->alloc_mutex);
4512 }
4513
4514 ret = relocate_one_path(trans, root, path, first_key,
4515 ref_path, NULL, NULL);
4516 BUG_ON(ret);
4517
4518 if (root == root->fs_info->extent_root)
4519 btrfs_extent_post_op(trans, root);
4520 if (needs_lock)
4521 mutex_unlock(&root->fs_info->alloc_mutex);
4522
3520 return 0; 4523 return 0;
3521} 4524}
3522 4525
3523static int noinline del_extent_zero(struct btrfs_root *extent_root, 4526static int noinline del_extent_zero(struct btrfs_trans_handle *trans,
4527 struct btrfs_root *extent_root,
3524 struct btrfs_path *path, 4528 struct btrfs_path *path,
3525 struct btrfs_key *extent_key) 4529 struct btrfs_key *extent_key)
3526{ 4530{
3527 int ret; 4531 int ret;
3528 struct btrfs_trans_handle *trans;
3529 4532
3530 trans = btrfs_start_transaction(extent_root, 1); 4533 mutex_lock(&extent_root->fs_info->alloc_mutex);
3531 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); 4534 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
3532 if (ret > 0) { 4535 if (ret)
3533 ret = -EIO;
3534 goto out;
3535 }
3536 if (ret < 0)
3537 goto out; 4536 goto out;
3538 ret = btrfs_del_item(trans, extent_root, path); 4537 ret = btrfs_del_item(trans, extent_root, path);
3539out: 4538out:
3540 btrfs_end_transaction(trans, extent_root); 4539 btrfs_release_path(extent_root, path);
4540 mutex_unlock(&extent_root->fs_info->alloc_mutex);
3541 return ret; 4541 return ret;
3542} 4542}
3543 4543
4544static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info,
4545 struct btrfs_ref_path *ref_path)
4546{
4547 struct btrfs_key root_key;
4548
4549 root_key.objectid = ref_path->root_objectid;
4550 root_key.type = BTRFS_ROOT_ITEM_KEY;
4551 if (is_cowonly_root(ref_path->root_objectid))
4552 root_key.offset = 0;
4553 else
4554 root_key.offset = (u64)-1;
4555
4556 return btrfs_read_fs_root_no_name(fs_info, &root_key);
4557}
4558
3544static int noinline relocate_one_extent(struct btrfs_root *extent_root, 4559static int noinline relocate_one_extent(struct btrfs_root *extent_root,
3545 struct btrfs_path *path, 4560 struct btrfs_path *path,
3546 struct btrfs_key *extent_key) 4561 struct btrfs_key *extent_key,
4562 struct btrfs_block_group_cache *group,
4563 struct inode *reloc_inode, int pass)
3547{ 4564{
3548 struct btrfs_key key; 4565 struct btrfs_trans_handle *trans;
3549 struct btrfs_key found_key; 4566 struct btrfs_root *found_root;
3550 struct extent_buffer *leaf; 4567 struct btrfs_ref_path *ref_path = NULL;
3551 u64 last_file_objectid = 0; 4568 struct disk_extent *new_extents = NULL;
3552 u64 last_file_root = 0; 4569 int nr_extents = 0;
3553 u64 last_file_offset = (u64)-1; 4570 int loops;
3554 u64 last_extent = 0; 4571 int ret;
3555 u32 nritems; 4572 int level;
3556 u32 item_size; 4573 struct btrfs_key first_key;
3557 int ret = 0; 4574 u64 prev_block = 0;
4575
4576 mutex_unlock(&extent_root->fs_info->alloc_mutex);
4577
4578 trans = btrfs_start_transaction(extent_root, 1);
4579 BUG_ON(!trans);
3558 4580
3559 if (extent_key->objectid == 0) { 4581 if (extent_key->objectid == 0) {
3560 ret = del_extent_zero(extent_root, path, extent_key); 4582 ret = del_extent_zero(trans, extent_root, path, extent_key);
3561 goto out; 4583 goto out;
3562 } 4584 }
3563 key.objectid = extent_key->objectid;
3564 key.type = BTRFS_EXTENT_REF_KEY;
3565 key.offset = 0;
3566 4585
3567 while(1) { 4586 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
3568 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 4587 if (!ref_path) {
4588 ret = -ENOMEM;
4589 goto out;
4590 }
3569 4591
4592 for (loops = 0; ; loops++) {
4593 if (loops == 0) {
4594 ret = btrfs_first_ref_path(trans, extent_root, ref_path,
4595 extent_key->objectid);
4596 } else {
4597 ret = btrfs_next_ref_path(trans, extent_root, ref_path);
4598 }
3570 if (ret < 0) 4599 if (ret < 0)
3571 goto out; 4600 goto out;
4601 if (ret > 0)
4602 break;
3572 4603
3573 ret = 0; 4604 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
3574 leaf = path->nodes[0]; 4605 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
3575 nritems = btrfs_header_nritems(leaf); 4606 continue;
3576 if (path->slots[0] == nritems) { 4607
3577 ret = btrfs_next_leaf(extent_root, path); 4608 found_root = read_ref_root(extent_root->fs_info, ref_path);
3578 if (ret > 0) { 4609 BUG_ON(!found_root);
3579 ret = 0; 4610 /*
3580 goto out; 4611 * for reference counted tree, only process reference paths
4612 * rooted at the latest committed root.
4613 */
4614 if (found_root->ref_cows &&
4615 ref_path->root_generation != found_root->root_key.offset)
4616 continue;
4617
4618 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
4619 if (pass == 0) {
4620 /*
4621 * copy data extents to new locations
4622 */
4623 u64 group_start = group->key.objectid;
4624 ret = relocate_data_extent(reloc_inode,
4625 extent_key,
4626 group_start);
4627 if (ret < 0)
4628 goto out;
4629 break;
3581 } 4630 }
3582 if (ret < 0) 4631 level = 0;
3583 goto out; 4632 } else {
3584 leaf = path->nodes[0]; 4633 level = ref_path->owner_objectid;
3585 } 4634 }
3586 4635
3587 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4636 if (prev_block != ref_path->nodes[level]) {
3588 if (found_key.objectid != extent_key->objectid) { 4637 struct extent_buffer *eb;
3589 break; 4638 u64 block_start = ref_path->nodes[level];
3590 } 4639 u64 block_size = btrfs_level_size(found_root, level);
3591 4640
3592 if (found_key.type != BTRFS_EXTENT_REF_KEY) { 4641 eb = read_tree_block(found_root, block_start,
3593 break; 4642 block_size, 0);
4643 btrfs_tree_lock(eb);
4644 BUG_ON(level != btrfs_header_level(eb));
4645
4646 if (level == 0)
4647 btrfs_item_key_to_cpu(eb, &first_key, 0);
4648 else
4649 btrfs_node_key_to_cpu(eb, &first_key, 0);
4650
4651 btrfs_tree_unlock(eb);
4652 free_extent_buffer(eb);
4653 prev_block = block_start;
3594 } 4654 }
3595 4655
3596 key.offset = found_key.offset + 1; 4656 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3597 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 4657 pass >= 2) {
4658 /*
4659 * use fallback method to process the remaining
4660 * references.
4661 */
4662 if (!new_extents) {
4663 u64 group_start = group->key.objectid;
4664 ret = get_new_locations(reloc_inode,
4665 extent_key,
4666 group_start, 0,
4667 &new_extents,
4668 &nr_extents);
4669 if (ret < 0)
4670 goto out;
4671 }
4672 btrfs_record_root_in_trans(found_root);
4673 ret = replace_one_extent(trans, found_root,
4674 path, extent_key,
4675 &first_key, ref_path,
4676 new_extents, nr_extents);
4677 if (ret < 0)
4678 goto out;
4679 continue;
4680 }
3598 4681
3599 ret = relocate_one_reference(extent_root, path, extent_key, 4682 btrfs_record_root_in_trans(found_root);
3600 &last_file_objectid, 4683 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
3601 &last_file_offset, 4684 ret = relocate_tree_block(trans, found_root, path,
3602 &last_file_root, last_extent); 4685 &first_key, ref_path);
3603 if (ret) 4686 } else {
4687 /*
4688 * try to update data extent references while
4689 * keeping metadata shared between snapshots.
4690 */
4691 ret = relocate_one_path(trans, found_root, path,
4692 &first_key, ref_path,
4693 group, reloc_inode);
4694 }
4695 if (ret < 0)
3604 goto out; 4696 goto out;
3605 last_extent = extent_key->objectid;
3606 } 4697 }
3607 ret = 0; 4698 ret = 0;
3608out: 4699out:
3609 btrfs_release_path(extent_root, path); 4700 btrfs_end_transaction(trans, extent_root);
4701 kfree(new_extents);
4702 kfree(ref_path);
4703 mutex_lock(&extent_root->fs_info->alloc_mutex);
3610 return ret; 4704 return ret;
3611} 4705}
3612 4706
@@ -3686,84 +4780,155 @@ int __alloc_chunk_for_shrink(struct btrfs_root *root,
3686 return 0; 4780 return 0;
3687} 4781}
3688 4782
3689int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) 4783static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
4784 struct btrfs_root *root,
4785 u64 objectid, u64 size)
4786{
4787 struct btrfs_path *path;
4788 struct btrfs_inode_item *item;
4789 struct extent_buffer *leaf;
4790 int ret;
4791
4792 path = btrfs_alloc_path();
4793 if (!path)
4794 return -ENOMEM;
4795
4796 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
4797 if (ret)
4798 goto out;
4799
4800 leaf = path->nodes[0];
4801 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
4802 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
4803 btrfs_set_inode_generation(leaf, item, 1);
4804 btrfs_set_inode_size(leaf, item, size);
4805 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
4806 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM);
4807 btrfs_mark_buffer_dirty(leaf);
4808 btrfs_release_path(root, path);
4809out:
4810 btrfs_free_path(path);
4811 return ret;
4812}
4813
4814static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info,
4815 struct btrfs_block_group_cache *group)
4816{
4817 struct inode *inode = NULL;
4818 struct btrfs_trans_handle *trans;
4819 struct btrfs_root *root;
4820 struct btrfs_key root_key;
4821 u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
4822 int err = 0;
4823
4824 root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
4825 root_key.type = BTRFS_ROOT_ITEM_KEY;
4826 root_key.offset = (u64)-1;
4827 root = btrfs_read_fs_root_no_name(fs_info, &root_key);
4828 if (IS_ERR(root))
4829 return ERR_CAST(root);
4830
4831 trans = btrfs_start_transaction(root, 1);
4832 BUG_ON(!trans);
4833
4834 err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
4835 if (err)
4836 goto out;
4837
4838 err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
4839 BUG_ON(err);
4840
4841 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
4842 group->key.offset, 0);
4843 BUG_ON(err);
4844
4845 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
4846 if (inode->i_state & I_NEW) {
4847 BTRFS_I(inode)->root = root;
4848 BTRFS_I(inode)->location.objectid = objectid;
4849 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
4850 BTRFS_I(inode)->location.offset = 0;
4851 btrfs_read_locked_inode(inode);
4852 unlock_new_inode(inode);
4853 BUG_ON(is_bad_inode(inode));
4854 } else {
4855 BUG_ON(1);
4856 }
4857
4858 err = btrfs_orphan_add(trans, inode);
4859out:
4860 btrfs_end_transaction(trans, root);
4861 if (err) {
4862 if (inode)
4863 iput(inode);
4864 inode = ERR_PTR(err);
4865 }
4866 return inode;
4867}
4868
4869int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
3690{ 4870{
3691 struct btrfs_trans_handle *trans; 4871 struct btrfs_trans_handle *trans;
3692 struct btrfs_root *tree_root = root->fs_info->tree_root;
3693 struct btrfs_path *path; 4872 struct btrfs_path *path;
4873 struct btrfs_fs_info *info = root->fs_info;
4874 struct extent_buffer *leaf;
4875 struct inode *reloc_inode;
4876 struct btrfs_block_group_cache *block_group;
4877 struct btrfs_key key;
3694 u64 cur_byte; 4878 u64 cur_byte;
3695 u64 total_found; 4879 u64 total_found;
3696 u64 shrink_last_byte;
3697 struct btrfs_block_group_cache *shrink_block_group;
3698 struct btrfs_key key;
3699 struct btrfs_key found_key;
3700 struct extent_buffer *leaf;
3701 u32 nritems; 4880 u32 nritems;
3702 int ret; 4881 int ret;
3703 int progress; 4882 int progress;
4883 int pass = 0;
3704 4884
3705 mutex_lock(&root->fs_info->alloc_mutex); 4885 root = root->fs_info->extent_root;
3706 shrink_block_group = btrfs_lookup_block_group(root->fs_info,
3707 shrink_start);
3708 BUG_ON(!shrink_block_group);
3709 4886
3710 shrink_last_byte = shrink_block_group->key.objectid + 4887 block_group = btrfs_lookup_block_group(info, group_start);
3711 shrink_block_group->key.offset; 4888 BUG_ON(!block_group);
4889
4890 printk("btrfs relocating block group %llu flags %llu\n",
4891 (unsigned long long)block_group->key.objectid,
4892 (unsigned long long)block_group->flags);
3712 4893
3713 shrink_block_group->space_info->total_bytes -=
3714 shrink_block_group->key.offset;
3715 path = btrfs_alloc_path(); 4894 path = btrfs_alloc_path();
3716 root = root->fs_info->extent_root; 4895 BUG_ON(!path);
3717 path->reada = 2;
3718 4896
3719 printk("btrfs relocating block group %llu flags %llu\n", 4897 reloc_inode = create_reloc_inode(info, block_group);
3720 (unsigned long long)shrink_start, 4898 BUG_ON(IS_ERR(reloc_inode));
3721 (unsigned long long)shrink_block_group->flags);
3722 4899
3723 __alloc_chunk_for_shrink(root, shrink_block_group, 1); 4900 mutex_lock(&root->fs_info->alloc_mutex);
3724 4901
3725again: 4902 __alloc_chunk_for_shrink(root, block_group, 1);
4903 block_group->ro = 1;
4904 block_group->space_info->total_bytes -= block_group->key.offset;
3726 4905
3727 shrink_block_group->ro = 1; 4906 mutex_unlock(&root->fs_info->alloc_mutex);
3728 4907
4908 btrfs_start_delalloc_inodes(info->tree_root);
4909 btrfs_wait_ordered_extents(info->tree_root, 0);
4910again:
3729 total_found = 0; 4911 total_found = 0;
3730 progress = 0; 4912 progress = 0;
3731 key.objectid = shrink_start; 4913 key.objectid = block_group->key.objectid;
3732 key.offset = 0; 4914 key.offset = 0;
3733 key.type = 0; 4915 key.type = 0;
3734 cur_byte = key.objectid; 4916 cur_byte = key.objectid;
3735 4917
3736 mutex_unlock(&root->fs_info->alloc_mutex); 4918 trans = btrfs_start_transaction(info->tree_root, 1);
4919 btrfs_commit_transaction(trans, info->tree_root);
3737 4920
3738 btrfs_start_delalloc_inodes(root); 4921 mutex_lock(&root->fs_info->cleaner_mutex);
3739 btrfs_wait_ordered_extents(tree_root, 0); 4922 btrfs_clean_old_snapshots(info->tree_root);
4923 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
4924 mutex_unlock(&root->fs_info->cleaner_mutex);
3740 4925
3741 mutex_lock(&root->fs_info->alloc_mutex); 4926 mutex_lock(&root->fs_info->alloc_mutex);
3742 4927
3743 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3744 if (ret < 0)
3745 goto out;
3746
3747 ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
3748 if (ret < 0)
3749 goto out;
3750
3751 if (ret == 0) {
3752 leaf = path->nodes[0];
3753 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3754 if (found_key.objectid + found_key.offset > shrink_start &&
3755 found_key.objectid < shrink_last_byte) {
3756 cur_byte = found_key.objectid;
3757 key.objectid = cur_byte;
3758 }
3759 }
3760 btrfs_release_path(root, path);
3761
3762 while(1) { 4928 while(1) {
3763 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4929 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3764 if (ret < 0) 4930 if (ret < 0)
3765 goto out; 4931 goto out;
3766
3767next: 4932next:
3768 leaf = path->nodes[0]; 4933 leaf = path->nodes[0];
3769 nritems = btrfs_header_nritems(leaf); 4934 nritems = btrfs_header_nritems(leaf);
@@ -3779,109 +4944,76 @@ next:
3779 nritems = btrfs_header_nritems(leaf); 4944 nritems = btrfs_header_nritems(leaf);
3780 } 4945 }
3781 4946
3782 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4947 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3783 4948
3784 if (found_key.objectid >= shrink_last_byte) 4949 if (key.objectid >= block_group->key.objectid +
4950 block_group->key.offset)
3785 break; 4951 break;
3786 4952
3787 if (progress && need_resched()) { 4953 if (progress && need_resched()) {
3788 memcpy(&key, &found_key, sizeof(key));
3789 cond_resched();
3790 btrfs_release_path(root, path); 4954 btrfs_release_path(root, path);
3791 btrfs_search_slot(NULL, root, &key, path, 0, 0); 4955 mutex_unlock(&root->fs_info->alloc_mutex);
4956 cond_resched();
4957 mutex_lock(&root->fs_info->alloc_mutex);
3792 progress = 0; 4958 progress = 0;
3793 goto next; 4959 continue;
3794 } 4960 }
3795 progress = 1; 4961 progress = 1;
3796 4962
3797 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY || 4963 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
3798 found_key.objectid + found_key.offset <= cur_byte) { 4964 key.objectid + key.offset <= cur_byte) {
3799 memcpy(&key, &found_key, sizeof(key));
3800 key.offset++;
3801 path->slots[0]++; 4965 path->slots[0]++;
3802 goto next; 4966 goto next;
3803 } 4967 }
3804 4968
3805 total_found++; 4969 total_found++;
3806 cur_byte = found_key.objectid + found_key.offset; 4970 cur_byte = key.objectid + key.offset;
3807 key.objectid = cur_byte;
3808 btrfs_release_path(root, path); 4971 btrfs_release_path(root, path);
3809 ret = relocate_one_extent(root, path, &found_key);
3810 __alloc_chunk_for_shrink(root, shrink_block_group, 0);
3811 }
3812
3813 btrfs_release_path(root, path);
3814
3815 if (total_found > 0) {
3816 printk("btrfs relocate found %llu last extent was %llu\n",
3817 (unsigned long long)total_found,
3818 (unsigned long long)found_key.objectid);
3819 mutex_unlock(&root->fs_info->alloc_mutex);
3820 trans = btrfs_start_transaction(tree_root, 1);
3821 btrfs_commit_transaction(trans, tree_root);
3822
3823 btrfs_clean_old_snapshots(tree_root);
3824 4972
3825 btrfs_start_delalloc_inodes(root); 4973 __alloc_chunk_for_shrink(root, block_group, 0);
3826 btrfs_wait_ordered_extents(tree_root, 0); 4974 ret = relocate_one_extent(root, path, &key, block_group,
4975 reloc_inode, pass);
4976 BUG_ON(ret < 0);
3827 4977
3828 trans = btrfs_start_transaction(tree_root, 1); 4978 key.objectid = cur_byte;
3829 btrfs_commit_transaction(trans, tree_root); 4979 key.type = 0;
3830 mutex_lock(&root->fs_info->alloc_mutex); 4980 key.offset = 0;
3831 goto again;
3832 } 4981 }
3833 4982
3834 /* 4983 btrfs_release_path(root, path);
3835 * we've freed all the extents, now remove the block
3836 * group item from the tree
3837 */
3838 mutex_unlock(&root->fs_info->alloc_mutex); 4984 mutex_unlock(&root->fs_info->alloc_mutex);
3839 4985
3840 trans = btrfs_start_transaction(root, 1); 4986 if (pass == 0) {
3841 4987 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
3842 mutex_lock(&root->fs_info->alloc_mutex); 4988 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
3843 memcpy(&key, &shrink_block_group->key, sizeof(key)); 4989 WARN_ON(reloc_inode->i_mapping->nrpages);
3844
3845 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3846 if (ret > 0)
3847 ret = -EIO;
3848 if (ret < 0) {
3849 btrfs_end_transaction(trans, root);
3850 goto out;
3851 } 4990 }
3852 4991
3853 spin_lock(&root->fs_info->block_group_cache_lock); 4992 if (total_found > 0) {
3854 rb_erase(&shrink_block_group->cache_node, 4993 printk("btrfs found %llu extents in pass %d\n",
3855 &root->fs_info->block_group_cache_tree); 4994 (unsigned long long)total_found, pass);
3856 spin_unlock(&root->fs_info->block_group_cache_lock); 4995 pass++;
3857 4996 goto again;
3858 ret = btrfs_remove_free_space(shrink_block_group, key.objectid,
3859 key.offset);
3860 if (ret) {
3861 btrfs_end_transaction(trans, root);
3862 goto out;
3863 } 4997 }
3864 /*
3865 memset(shrink_block_group, 0, sizeof(*shrink_block_group));
3866 kfree(shrink_block_group);
3867 */
3868 4998
3869 btrfs_del_item(trans, root, path); 4999 /* delete reloc_inode */
3870 btrfs_release_path(root, path); 5000 iput(reloc_inode);
3871 mutex_unlock(&root->fs_info->alloc_mutex); 5001
3872 btrfs_commit_transaction(trans, root); 5002 /* unpin extents in this range */
5003 trans = btrfs_start_transaction(info->tree_root, 1);
5004 btrfs_commit_transaction(trans, info->tree_root);
3873 5005
3874 mutex_lock(&root->fs_info->alloc_mutex); 5006 mutex_lock(&root->fs_info->alloc_mutex);
3875 5007
3876 /* the code to unpin extents might set a few bits in the free 5008 spin_lock(&block_group->lock);
3877 * space cache for this range again 5009 WARN_ON(block_group->pinned > 0);
3878 */ 5010 WARN_ON(block_group->reserved > 0);
3879 /* XXX? */ 5011 WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
3880 ret = btrfs_remove_free_space(shrink_block_group, key.objectid, 5012 spin_unlock(&block_group->lock);
3881 key.offset); 5013 ret = 0;
3882out: 5014out:
3883 btrfs_free_path(path);
3884 mutex_unlock(&root->fs_info->alloc_mutex); 5015 mutex_unlock(&root->fs_info->alloc_mutex);
5016 btrfs_free_path(path);
3885 return ret; 5017 return ret;
3886} 5018}
3887 5019
@@ -3922,6 +5054,33 @@ out:
3922 return ret; 5054 return ret;
3923} 5055}
3924 5056
5057int btrfs_free_block_groups(struct btrfs_fs_info *info)
5058{
5059 struct btrfs_block_group_cache *block_group;
5060 struct rb_node *n;
5061
5062 mutex_lock(&info->alloc_mutex);
5063 spin_lock(&info->block_group_cache_lock);
5064 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
5065 block_group = rb_entry(n, struct btrfs_block_group_cache,
5066 cache_node);
5067
5068 spin_unlock(&info->block_group_cache_lock);
5069 btrfs_remove_free_space_cache(block_group);
5070 spin_lock(&info->block_group_cache_lock);
5071
5072 rb_erase(&block_group->cache_node,
5073 &info->block_group_cache_tree);
5074 spin_lock(&block_group->space_info->lock);
5075 list_del(&block_group->list);
5076 spin_unlock(&block_group->space_info->lock);
5077 kfree(block_group);
5078 }
5079 spin_unlock(&info->block_group_cache_lock);
5080 mutex_unlock(&info->alloc_mutex);
5081 return 0;
5082}
5083
3925int btrfs_read_block_groups(struct btrfs_root *root) 5084int btrfs_read_block_groups(struct btrfs_root *root)
3926{ 5085{
3927 struct btrfs_path *path; 5086 struct btrfs_path *path;
@@ -4039,3 +5198,46 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
4039 5198
4040 return 0; 5199 return 0;
4041} 5200}
5201
5202int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5203 struct btrfs_root *root, u64 group_start)
5204{
5205 struct btrfs_path *path;
5206 struct btrfs_block_group_cache *block_group;
5207 struct btrfs_key key;
5208 int ret;
5209
5210 BUG_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
5211 root = root->fs_info->extent_root;
5212
5213 block_group = btrfs_lookup_block_group(root->fs_info, group_start);
5214 BUG_ON(!block_group);
5215
5216 memcpy(&key, &block_group->key, sizeof(key));
5217
5218 path = btrfs_alloc_path();
5219 BUG_ON(!path);
5220
5221 btrfs_remove_free_space_cache(block_group);
5222 rb_erase(&block_group->cache_node,
5223 &root->fs_info->block_group_cache_tree);
5224 spin_lock(&block_group->space_info->lock);
5225 list_del(&block_group->list);
5226 spin_unlock(&block_group->space_info->lock);
5227
5228 /*
5229 memset(shrink_block_group, 0, sizeof(*shrink_block_group));
5230 kfree(shrink_block_group);
5231 */
5232
5233 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5234 if (ret > 0)
5235 ret = -EIO;
5236 if (ret < 0)
5237 goto out;
5238
5239 ret = btrfs_del_item(trans, root, path);
5240out:
5241 btrfs_free_path(path);
5242 return ret;
5243}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index e3984f902e71..0091c01abb06 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -210,7 +210,10 @@ again:
210 goto err; 210 goto err;
211 } 211 }
212 212
213 ret = btrfs_add_dead_root(dead_root, latest); 213 if (objectid == BTRFS_TREE_RELOC_OBJECTID)
214 ret = btrfs_add_dead_reloc_root(dead_root);
215 else
216 ret = btrfs_add_dead_root(dead_root, latest);
214 if (ret) 217 if (ret)
215 goto err; 218 goto err;
216 goto again; 219 goto again;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8c83cf464c83..444abe0796ae 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -477,6 +477,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
477 dirty = root->dirty_root; 477 dirty = root->dirty_root;
478 478
479 btrfs_free_log(trans, root); 479 btrfs_free_log(trans, root);
480 btrfs_free_reloc_root(root);
480 481
481 if (root->commit_root == root->node) { 482 if (root->commit_root == root->node) {
482 WARN_ON(root->node->start != 483 WARN_ON(root->node->start !=
@@ -855,6 +856,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
855 * with the tree-log code. 856 * with the tree-log code.
856 */ 857 */
857 mutex_lock(&root->fs_info->tree_log_mutex); 858 mutex_lock(&root->fs_info->tree_log_mutex);
859 /*
860 * keep tree reloc code from adding new reloc trees
861 */
862 mutex_lock(&root->fs_info->tree_reloc_mutex);
863
858 864
859 ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, 865 ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
860 &dirty_fs_roots); 866 &dirty_fs_roots);
@@ -865,6 +871,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
865 */ 871 */
866 btrfs_free_log_root_tree(trans, root->fs_info); 872 btrfs_free_log_root_tree(trans, root->fs_info);
867 873
874 btrfs_free_reloc_mappings(root);
875
868 ret = btrfs_commit_tree_roots(trans, root); 876 ret = btrfs_commit_tree_roots(trans, root);
869 BUG_ON(ret); 877 BUG_ON(ret);
870 878
@@ -910,10 +918,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
910 mutex_unlock(&root->fs_info->tree_log_mutex); 918 mutex_unlock(&root->fs_info->tree_log_mutex);
911 919
912 btrfs_finish_extent_commit(trans, root, pinned_copy); 920 btrfs_finish_extent_commit(trans, root, pinned_copy);
913 mutex_lock(&root->fs_info->trans_mutex);
914
915 kfree(pinned_copy); 921 kfree(pinned_copy);
916 922
923 btrfs_drop_dead_reloc_roots(root);
924 mutex_unlock(&root->fs_info->tree_reloc_mutex);
925
926 mutex_lock(&root->fs_info->trans_mutex);
927
917 cur_trans->commit_done = 1; 928 cur_trans->commit_done = 1;
918 root->fs_info->last_trans_committed = cur_trans->transid; 929 root->fs_info->last_trans_committed = cur_trans->transid;
919 wake_up(&cur_trans->commit_wait); 930 wake_up(&cur_trans->commit_wait);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ddf89626498a..51f113119b25 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1268,7 +1268,7 @@ int btrfs_relocate_chunk(struct btrfs_root *root,
1268 em_tree = &root->fs_info->mapping_tree.map_tree; 1268 em_tree = &root->fs_info->mapping_tree.map_tree;
1269 1269
1270 /* step one, relocate all the extents inside this chunk */ 1270 /* step one, relocate all the extents inside this chunk */
1271 ret = btrfs_shrink_extent_tree(extent_root, chunk_offset); 1271 ret = btrfs_relocate_block_group(extent_root, chunk_offset);
1272 BUG_ON(ret); 1272 BUG_ON(ret);
1273 1273
1274 trans = btrfs_start_transaction(root, 1); 1274 trans = btrfs_start_transaction(root, 1);
@@ -1308,15 +1308,18 @@ int btrfs_relocate_chunk(struct btrfs_root *root,
1308 BUG_ON(ret); 1308 BUG_ON(ret);
1309 } 1309 }
1310 1310
1311 ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
1312 BUG_ON(ret);
1313
1311 spin_lock(&em_tree->lock); 1314 spin_lock(&em_tree->lock);
1312 remove_extent_mapping(em_tree, em); 1315 remove_extent_mapping(em_tree, em);
1316 spin_unlock(&em_tree->lock);
1317
1313 kfree(map); 1318 kfree(map);
1314 em->bdev = NULL; 1319 em->bdev = NULL;
1315 1320
1316 /* once for the tree */ 1321 /* once for the tree */
1317 free_extent_map(em); 1322 free_extent_map(em);
1318 spin_unlock(&em_tree->lock);
1319
1320 /* once for us */ 1323 /* once for us */
1321 free_extent_map(em); 1324 free_extent_map(em);
1322 1325