diff options
author | Yan Zheng <zheng.yan@oracle.com> | 2008-10-29 14:49:05 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-10-29 14:49:05 -0400 |
commit | f82d02d9d8222183b7945e893111a6d1bf67ae4a (patch) | |
tree | 70be1bb231f4cc2e673920774e759359f3dcf1a5 /fs/btrfs/ctree.c | |
parent | c8b978188c9a0fd3d535c13debd19d522b726f1f (diff) |
Btrfs: Improve space balancing code
This patch improves the space balancing code to keep more sharing
of tree blocks. The only case that breaks sharing of tree blocks is
data extents get fragmented during balancing. The main changes in
this patch are:
Add a 'drop sub-tree' function. This solves the problem in old code
that BTRFS_HEADER_FLAG_WRITTEN check breaks sharing of tree block.
Remove relocation mapping tree. Relocation mappings are stored in
struct btrfs_ref_path and updated dynamically during walking up/down
the reference path. This reduces CPU usage and simplifies code.
This patch also fixes a bug. Root items for reloc trees should be
updated in btrfs_free_reloc_root.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r-- | fs/btrfs/ctree.c | 93 |
1 files changed, 47 insertions, 46 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9caeb377de63..73899d0f9d8f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -287,7 +287,7 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
287 | /* | 287 | /* |
288 | * There are only two places that can drop reference to | 288 | * There are only two places that can drop reference to |
289 | * tree blocks owned by living reloc trees, one is here, | 289 | * tree blocks owned by living reloc trees, one is here, |
290 | * the other place is btrfs_merge_path. In both places, | 290 | * the other place is btrfs_drop_subtree. In both places, |
291 | * we check reference count while tree block is locked. | 291 | * we check reference count while tree block is locked. |
292 | * Furthermore, if reference count is one, it won't get | 292 | * Furthermore, if reference count is one, it won't get |
293 | * increased by someone else. | 293 | * increased by someone else. |
@@ -312,9 +312,6 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
312 | } | 312 | } |
313 | 313 | ||
314 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 314 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { |
315 | ret = btrfs_add_reloc_mapping(root, buf->start, | ||
316 | buf->len, cow->start); | ||
317 | BUG_ON(ret); | ||
318 | ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); | 315 | ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); |
319 | WARN_ON(ret); | 316 | WARN_ON(ret); |
320 | } | 317 | } |
@@ -1627,61 +1624,57 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1627 | btrfs_node_key_to_cpu(eb, &key, slot); | 1624 | btrfs_node_key_to_cpu(eb, &key, slot); |
1628 | key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); | 1625 | key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); |
1629 | 1626 | ||
1627 | if (generation == trans->transid) { | ||
1628 | eb = read_tree_block(root, bytenr, blocksize, | ||
1629 | generation); | ||
1630 | btrfs_tree_lock(eb); | ||
1631 | } | ||
1632 | |||
1630 | /* | 1633 | /* |
1631 | * if node keys match and node pointer hasn't been modified | 1634 | * if node keys match and node pointer hasn't been modified |
1632 | * in the running transaction, we can merge the path. for | 1635 | * in the running transaction, we can merge the path. for |
1633 | * reloc trees, the node pointer check is skipped, this is | 1636 | * blocks owened by reloc trees, the node pointer check is |
1634 | * because the reloc trees are fully controlled by the space | 1637 | * skipped, this is because these blocks are fully controlled |
1635 | * balance code, no one else can modify them. | 1638 | * by the space balance code, no one else can modify them. |
1636 | */ | 1639 | */ |
1637 | if (!nodes[level - 1] || !key_match || | 1640 | if (!nodes[level - 1] || !key_match || |
1638 | (generation == trans->transid && | 1641 | (generation == trans->transid && |
1639 | root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) { | 1642 | btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) { |
1640 | next_level: | 1643 | if (level == 1 || level == lowest_level + 1) { |
1641 | if (level == 1 || level == lowest_level + 1) | 1644 | if (generation == trans->transid) { |
1645 | btrfs_tree_unlock(eb); | ||
1646 | free_extent_buffer(eb); | ||
1647 | } | ||
1642 | break; | 1648 | break; |
1649 | } | ||
1643 | 1650 | ||
1644 | eb = read_tree_block(root, bytenr, blocksize, | 1651 | if (generation != trans->transid) { |
1645 | generation); | 1652 | eb = read_tree_block(root, bytenr, blocksize, |
1646 | btrfs_tree_lock(eb); | 1653 | generation); |
1654 | btrfs_tree_lock(eb); | ||
1655 | } | ||
1647 | 1656 | ||
1648 | ret = btrfs_cow_block(trans, root, eb, parent, slot, | 1657 | ret = btrfs_cow_block(trans, root, eb, parent, slot, |
1649 | &eb, 0); | 1658 | &eb, 0); |
1650 | BUG_ON(ret); | 1659 | BUG_ON(ret); |
1651 | 1660 | ||
1661 | if (root->root_key.objectid == | ||
1662 | BTRFS_TREE_RELOC_OBJECTID) { | ||
1663 | if (!nodes[level - 1]) { | ||
1664 | nodes[level - 1] = eb->start; | ||
1665 | memcpy(&node_keys[level - 1], &key, | ||
1666 | sizeof(node_keys[0])); | ||
1667 | } else { | ||
1668 | WARN_ON(1); | ||
1669 | } | ||
1670 | } | ||
1671 | |||
1652 | btrfs_tree_unlock(parent); | 1672 | btrfs_tree_unlock(parent); |
1653 | free_extent_buffer(parent); | 1673 | free_extent_buffer(parent); |
1654 | parent = eb; | 1674 | parent = eb; |
1655 | continue; | 1675 | continue; |
1656 | } | 1676 | } |
1657 | 1677 | ||
1658 | if (generation == trans->transid) { | ||
1659 | u32 refs; | ||
1660 | BUG_ON(btrfs_header_owner(eb) != | ||
1661 | BTRFS_TREE_RELOC_OBJECTID); | ||
1662 | /* | ||
1663 | * lock the block to keep __btrfs_cow_block from | ||
1664 | * changing the reference count. | ||
1665 | */ | ||
1666 | eb = read_tree_block(root, bytenr, blocksize, | ||
1667 | generation); | ||
1668 | btrfs_tree_lock(eb); | ||
1669 | |||
1670 | ret = btrfs_lookup_extent_ref(trans, root, bytenr, | ||
1671 | blocksize, &refs); | ||
1672 | BUG_ON(ret); | ||
1673 | /* | ||
1674 | * if replace block whose reference count is one, | ||
1675 | * we have to "drop the subtree". so skip it for | ||
1676 | * simplicity | ||
1677 | */ | ||
1678 | if (refs == 1) { | ||
1679 | btrfs_tree_unlock(eb); | ||
1680 | free_extent_buffer(eb); | ||
1681 | goto next_level; | ||
1682 | } | ||
1683 | } | ||
1684 | |||
1685 | btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); | 1678 | btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); |
1686 | btrfs_set_node_ptr_generation(parent, slot, trans->transid); | 1679 | btrfs_set_node_ptr_generation(parent, slot, trans->transid); |
1687 | btrfs_mark_buffer_dirty(parent); | 1680 | btrfs_mark_buffer_dirty(parent); |
@@ -1693,16 +1686,24 @@ next_level: | |||
1693 | btrfs_header_generation(parent), | 1686 | btrfs_header_generation(parent), |
1694 | level - 1); | 1687 | level - 1); |
1695 | BUG_ON(ret); | 1688 | BUG_ON(ret); |
1696 | ret = btrfs_free_extent(trans, root, bytenr, | ||
1697 | blocksize, parent->start, | ||
1698 | btrfs_header_owner(parent), | ||
1699 | btrfs_header_generation(parent), | ||
1700 | level - 1, 1); | ||
1701 | BUG_ON(ret); | ||
1702 | 1689 | ||
1690 | /* | ||
1691 | * If the block was created in the running transaction, | ||
1692 | * it's possible this is the last reference to it, so we | ||
1693 | * should drop the subtree. | ||
1694 | */ | ||
1703 | if (generation == trans->transid) { | 1695 | if (generation == trans->transid) { |
1696 | ret = btrfs_drop_subtree(trans, root, eb, parent); | ||
1697 | BUG_ON(ret); | ||
1704 | btrfs_tree_unlock(eb); | 1698 | btrfs_tree_unlock(eb); |
1705 | free_extent_buffer(eb); | 1699 | free_extent_buffer(eb); |
1700 | } else { | ||
1701 | ret = btrfs_free_extent(trans, root, bytenr, | ||
1702 | blocksize, parent->start, | ||
1703 | btrfs_header_owner(parent), | ||
1704 | btrfs_header_generation(parent), | ||
1705 | level - 1, 1); | ||
1706 | BUG_ON(ret); | ||
1706 | } | 1707 | } |
1707 | break; | 1708 | break; |
1708 | } | 1709 | } |