aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
authorZheng Yan <zheng.yan@oracle.com>2008-09-26 10:09:34 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-26 10:09:34 -0400
commit1a40e23b95da45051ee4d74374c58ae87a14051c (patch)
tree77faffd3f9d3a26c22e6cf03b83762c95d687596 /fs/btrfs/ctree.c
parent5b21f2ed3f2947b5195b65c9fdbdd9e52904cc03 (diff)
Btrfs: update space balancing code
This patch updates the space balancing code to utilize the new backref format. Before, btrfs-vol -b would break any COW links on data blocks or metadata. This was slow and caused the amount of space used to explode if a large number of snapshots were present. The new code can keeps the sharing of all data extents and most of the tree blocks. To maintain the sharing of data extents, the space balance code uses a seperate inode hold data extent pointers, then updates the references to point to the new location. To maintain the sharing of tree blocks, the space balance code uses reloc trees to relocate tree blocks in reference counted roots. There is one reloc tree for each subvol, and all reloc trees share same root key objectid. Reloc trees are snapshots of the latest committed roots of subvols (root->commit_root). To relocate a tree block referenced by a subvol, there are two steps. COW the block through subvol's reloc tree, then update block pointer in the subvol to point to the new block. Since all reloc trees share same root key objectid, doing special handing for tree blocks owned by them is easy. Once a tree block has been COWed in one reloc tree, we can use the resulting new block directly when the same block is required to COW again through other reloc trees. In this way, relocated tree blocks are shared between reloc trees, so they are also shared between subvols. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c155
1 files changed, 153 insertions, 2 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index f9cd40967d04..50e81f43e6d4 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -179,7 +179,6 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
179 struct extent_buffer *cow; 179 struct extent_buffer *cow;
180 u32 nritems; 180 u32 nritems;
181 int ret = 0; 181 int ret = 0;
182 int different_trans = 0;
183 int level; 182 int level;
184 int unlock_orig = 0; 183 int unlock_orig = 0;
185 184
@@ -233,13 +232,33 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
233 WARN_ON(btrfs_header_generation(buf) > trans->transid); 232 WARN_ON(btrfs_header_generation(buf) > trans->transid);
234 if (btrfs_header_generation(buf) != trans->transid) { 233 if (btrfs_header_generation(buf) != trans->transid) {
235 u32 nr_extents; 234 u32 nr_extents;
236 different_trans = 1;
237 ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents); 235 ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
238 if (ret) 236 if (ret)
239 return ret; 237 return ret;
240 238
241 ret = btrfs_cache_ref(trans, root, buf, nr_extents); 239 ret = btrfs_cache_ref(trans, root, buf, nr_extents);
242 WARN_ON(ret); 240 WARN_ON(ret);
241 } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
242 /*
243 * There are only two places that can drop reference to
244 * tree blocks owned by living reloc trees, one is here,
245 * the other place is btrfs_merge_path. In both places,
246 * we check reference count while tree block is locked.
247 * Furthermore, if reference count is one, it won't get
248 * increased by someone else.
249 */
250 u32 refs;
251 ret = btrfs_lookup_extent_ref(trans, root, buf->start,
252 buf->len, &refs);
253 BUG_ON(ret);
254 if (refs == 1) {
255 ret = btrfs_update_ref(trans, root, buf, cow,
256 0, nritems);
257 clean_tree_block(trans, root, buf);
258 } else {
259 ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
260 }
261 BUG_ON(ret);
243 } else { 262 } else {
244 ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems); 263 ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
245 if (ret) 264 if (ret)
@@ -247,6 +266,14 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
247 clean_tree_block(trans, root, buf); 266 clean_tree_block(trans, root, buf);
248 } 267 }
249 268
269 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
270 ret = btrfs_add_reloc_mapping(root, buf->start,
271 buf->len, cow->start);
272 BUG_ON(ret);
273 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
274 WARN_ON(ret);
275 }
276
250 if (buf == root->node) { 277 if (buf == root->node) {
251 WARN_ON(parent && parent != buf); 278 WARN_ON(parent && parent != buf);
252 279
@@ -1466,6 +1493,130 @@ done:
1466 return ret; 1493 return ret;
1467} 1494}
1468 1495
1496int btrfs_merge_path(struct btrfs_trans_handle *trans,
1497 struct btrfs_root *root,
1498 struct btrfs_key *node_keys,
1499 u64 *nodes, int lowest_level)
1500{
1501 struct extent_buffer *eb;
1502 struct extent_buffer *parent;
1503 struct btrfs_key key;
1504 u64 bytenr;
1505 u64 generation;
1506 u32 blocksize;
1507 int level;
1508 int slot;
1509 int key_match;
1510 int ret;
1511
1512 eb = btrfs_lock_root_node(root);
1513 ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0);
1514 BUG_ON(ret);
1515
1516 parent = eb;
1517 while (1) {
1518 level = btrfs_header_level(parent);
1519 if (level == 0 || level <= lowest_level)
1520 break;
1521
1522 ret = bin_search(parent, &node_keys[lowest_level], level,
1523 &slot);
1524 if (ret && slot > 0)
1525 slot--;
1526
1527 bytenr = btrfs_node_blockptr(parent, slot);
1528 if (nodes[level - 1] == bytenr)
1529 break;
1530
1531 blocksize = btrfs_level_size(root, level - 1);
1532 generation = btrfs_node_ptr_generation(parent, slot);
1533 btrfs_node_key_to_cpu(eb, &key, slot);
1534 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
1535
1536 /*
1537 * if node keys match and node pointer hasn't been modified
1538 * in the running transaction, we can merge the path. for
1539 * reloc trees, the node pointer check is skipped, this is
1540 * because the reloc trees are fully controlled by the space
1541 * balance code, no one else can modify them.
1542 */
1543 if (!nodes[level - 1] || !key_match ||
1544 (generation == trans->transid &&
1545 root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) {
1546next_level:
1547 if (level == 1 || level == lowest_level + 1)
1548 break;
1549
1550 eb = read_tree_block(root, bytenr, blocksize,
1551 generation);
1552 btrfs_tree_lock(eb);
1553
1554 ret = btrfs_cow_block(trans, root, eb, parent, slot,
1555 &eb, 0);
1556 BUG_ON(ret);
1557
1558 btrfs_tree_unlock(parent);
1559 free_extent_buffer(parent);
1560 parent = eb;
1561 continue;
1562 }
1563
1564 if (generation == trans->transid) {
1565 u32 refs;
1566 BUG_ON(btrfs_header_owner(eb) !=
1567 BTRFS_TREE_RELOC_OBJECTID);
1568 /*
1569 * lock the block to keep __btrfs_cow_block from
1570 * changing the reference count.
1571 */
1572 eb = read_tree_block(root, bytenr, blocksize,
1573 generation);
1574 btrfs_tree_lock(eb);
1575
1576 ret = btrfs_lookup_extent_ref(trans, root, bytenr,
1577 blocksize, &refs);
1578 BUG_ON(ret);
1579 /*
1580 * if replace block whose reference count is one,
1581 * we have to "drop the subtree". so skip it for
1582 * simplicity
1583 */
1584 if (refs == 1) {
1585 btrfs_tree_unlock(eb);
1586 free_extent_buffer(eb);
1587 goto next_level;
1588 }
1589 }
1590
1591 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
1592 btrfs_set_node_ptr_generation(parent, slot, trans->transid);
1593 btrfs_mark_buffer_dirty(parent);
1594
1595 ret = btrfs_inc_extent_ref(trans, root,
1596 nodes[level - 1],
1597 blocksize, parent->start,
1598 btrfs_header_owner(parent),
1599 btrfs_header_generation(parent),
1600 level - 1, 0);
1601 BUG_ON(ret);
1602 ret = btrfs_free_extent(trans, root, bytenr,
1603 blocksize, parent->start,
1604 btrfs_header_owner(parent),
1605 btrfs_header_generation(parent),
1606 level - 1, 0, 1);
1607 BUG_ON(ret);
1608
1609 if (generation == trans->transid) {
1610 btrfs_tree_unlock(eb);
1611 free_extent_buffer(eb);
1612 }
1613 break;
1614 }
1615 btrfs_tree_unlock(parent);
1616 free_extent_buffer(parent);
1617 return 0;
1618}
1619
1469/* 1620/*
1470 * adjust the pointers going up the tree, starting at level 1621 * adjust the pointers going up the tree, starting at level
1471 * making sure the right key of each node is points to 'key'. 1622 * making sure the right key of each node is points to 'key'.