aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
authorYan Zheng <zheng.yan@oracle.com>2008-10-29 14:49:05 -0400
committerChris Mason <chris.mason@oracle.com>2008-10-29 14:49:05 -0400
commitf82d02d9d8222183b7945e893111a6d1bf67ae4a (patch)
tree70be1bb231f4cc2e673920774e759359f3dcf1a5 /fs/btrfs/ctree.c
parentc8b978188c9a0fd3d535c13debd19d522b726f1f (diff)
Btrfs: Improve space balancing code
This patch improves the space balancing code to keep more sharing of tree blocks. The only case that breaks sharing of tree blocks is data extents get fragmented during balancing. The main changes in this patch are: Add a 'drop sub-tree' function. This solves the problem in old code that BTRFS_HEADER_FLAG_WRITTEN check breaks sharing of tree block. Remove relocation mapping tree. Relocation mappings are stored in struct btrfs_ref_path and updated dynamically during walking up/down the reference path. This reduces CPU usage and simplifies code. This patch also fixes a bug. Root items for reloc trees should be updated in btrfs_free_reloc_root. Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c93
1 files changed, 47 insertions, 46 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9caeb377de63..73899d0f9d8f 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -287,7 +287,7 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
287 /* 287 /*
288 * There are only two places that can drop reference to 288 * There are only two places that can drop reference to
289 * tree blocks owned by living reloc trees, one is here, 289 * tree blocks owned by living reloc trees, one is here,
290 * the other place is btrfs_merge_path. In both places, 290 * the other place is btrfs_drop_subtree. In both places,
291 * we check reference count while tree block is locked. 291 * we check reference count while tree block is locked.
292 * Furthermore, if reference count is one, it won't get 292 * Furthermore, if reference count is one, it won't get
293 * increased by someone else. 293 * increased by someone else.
@@ -312,9 +312,6 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
312 } 312 }
313 313
314 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 314 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
315 ret = btrfs_add_reloc_mapping(root, buf->start,
316 buf->len, cow->start);
317 BUG_ON(ret);
318 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); 315 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
319 WARN_ON(ret); 316 WARN_ON(ret);
320 } 317 }
@@ -1627,61 +1624,57 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
1627 btrfs_node_key_to_cpu(eb, &key, slot); 1624 btrfs_node_key_to_cpu(eb, &key, slot);
1628 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); 1625 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
1629 1626
1627 if (generation == trans->transid) {
1628 eb = read_tree_block(root, bytenr, blocksize,
1629 generation);
1630 btrfs_tree_lock(eb);
1631 }
1632
1630 /* 1633 /*
1631 * if node keys match and node pointer hasn't been modified 1634 * if node keys match and node pointer hasn't been modified
1632 * in the running transaction, we can merge the path. for 1635 * in the running transaction, we can merge the path. for
1633 * reloc trees, the node pointer check is skipped, this is 1636 * blocks owened by reloc trees, the node pointer check is
1634 * because the reloc trees are fully controlled by the space 1637 * skipped, this is because these blocks are fully controlled
1635 * balance code, no one else can modify them. 1638 * by the space balance code, no one else can modify them.
1636 */ 1639 */
1637 if (!nodes[level - 1] || !key_match || 1640 if (!nodes[level - 1] || !key_match ||
1638 (generation == trans->transid && 1641 (generation == trans->transid &&
1639 root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) { 1642 btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
1640next_level: 1643 if (level == 1 || level == lowest_level + 1) {
1641 if (level == 1 || level == lowest_level + 1) 1644 if (generation == trans->transid) {
1645 btrfs_tree_unlock(eb);
1646 free_extent_buffer(eb);
1647 }
1642 break; 1648 break;
1649 }
1643 1650
1644 eb = read_tree_block(root, bytenr, blocksize, 1651 if (generation != trans->transid) {
1645 generation); 1652 eb = read_tree_block(root, bytenr, blocksize,
1646 btrfs_tree_lock(eb); 1653 generation);
1654 btrfs_tree_lock(eb);
1655 }
1647 1656
1648 ret = btrfs_cow_block(trans, root, eb, parent, slot, 1657 ret = btrfs_cow_block(trans, root, eb, parent, slot,
1649 &eb, 0); 1658 &eb, 0);
1650 BUG_ON(ret); 1659 BUG_ON(ret);
1651 1660
1661 if (root->root_key.objectid ==
1662 BTRFS_TREE_RELOC_OBJECTID) {
1663 if (!nodes[level - 1]) {
1664 nodes[level - 1] = eb->start;
1665 memcpy(&node_keys[level - 1], &key,
1666 sizeof(node_keys[0]));
1667 } else {
1668 WARN_ON(1);
1669 }
1670 }
1671
1652 btrfs_tree_unlock(parent); 1672 btrfs_tree_unlock(parent);
1653 free_extent_buffer(parent); 1673 free_extent_buffer(parent);
1654 parent = eb; 1674 parent = eb;
1655 continue; 1675 continue;
1656 } 1676 }
1657 1677
1658 if (generation == trans->transid) {
1659 u32 refs;
1660 BUG_ON(btrfs_header_owner(eb) !=
1661 BTRFS_TREE_RELOC_OBJECTID);
1662 /*
1663 * lock the block to keep __btrfs_cow_block from
1664 * changing the reference count.
1665 */
1666 eb = read_tree_block(root, bytenr, blocksize,
1667 generation);
1668 btrfs_tree_lock(eb);
1669
1670 ret = btrfs_lookup_extent_ref(trans, root, bytenr,
1671 blocksize, &refs);
1672 BUG_ON(ret);
1673 /*
1674 * if replace block whose reference count is one,
1675 * we have to "drop the subtree". so skip it for
1676 * simplicity
1677 */
1678 if (refs == 1) {
1679 btrfs_tree_unlock(eb);
1680 free_extent_buffer(eb);
1681 goto next_level;
1682 }
1683 }
1684
1685 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); 1678 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
1686 btrfs_set_node_ptr_generation(parent, slot, trans->transid); 1679 btrfs_set_node_ptr_generation(parent, slot, trans->transid);
1687 btrfs_mark_buffer_dirty(parent); 1680 btrfs_mark_buffer_dirty(parent);
@@ -1693,16 +1686,24 @@ next_level:
1693 btrfs_header_generation(parent), 1686 btrfs_header_generation(parent),
1694 level - 1); 1687 level - 1);
1695 BUG_ON(ret); 1688 BUG_ON(ret);
1696 ret = btrfs_free_extent(trans, root, bytenr,
1697 blocksize, parent->start,
1698 btrfs_header_owner(parent),
1699 btrfs_header_generation(parent),
1700 level - 1, 1);
1701 BUG_ON(ret);
1702 1689
1690 /*
1691 * If the block was created in the running transaction,
1692 * it's possible this is the last reference to it, so we
1693 * should drop the subtree.
1694 */
1703 if (generation == trans->transid) { 1695 if (generation == trans->transid) {
1696 ret = btrfs_drop_subtree(trans, root, eb, parent);
1697 BUG_ON(ret);
1704 btrfs_tree_unlock(eb); 1698 btrfs_tree_unlock(eb);
1705 free_extent_buffer(eb); 1699 free_extent_buffer(eb);
1700 } else {
1701 ret = btrfs_free_extent(trans, root, bytenr,
1702 blocksize, parent->start,
1703 btrfs_header_owner(parent),
1704 btrfs_header_generation(parent),
1705 level - 1, 1);
1706 BUG_ON(ret);
1706 } 1707 }
1707 break; 1708 break;
1708 } 1709 }