diff options
author | Chris Mason <chris.mason@oracle.com> | 2009-03-13 10:10:06 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-03-24 16:14:25 -0400 |
commit | 56bec294dea971335d4466b30f2d959f28f6e36d (patch) | |
tree | fc0b5bbf4bb6ab35582a4c7f58f5ac88f71c38bf /fs/btrfs/ctree.h | |
parent | 9fa8cfe706f9c20067c042a064999d5825a35330 (diff) |
Btrfs: do extent allocation and reference count updates in the background
The extent allocation tree maintains a reference count and full
back reference information for every extent allocated in the
filesystem. For subvolume and snapshot trees, every time
a block goes through COW, the new copy of the block adds a reference
on every block it points to.
If a btree node points to 150 leaves, then the COW code needs to go
and add backrefs on 150 different extents, which might be spread all
over the extent allocation tree.
These updates currently happen during btrfs_cow_block, and most COWs
happen during btrfs_search_slot. btrfs_search_slot has locks held
on both the parent and the node we are COWing, and so we really want
to avoid IO during the COW if we can.
This commit adds an rbtree of pending reference count updates and extent
allocations. The tree is ordered by byte number of the extent and byte number
of the parent for the back reference. The tree allows us to:
1) Modify back references in something close to disk order, reducing seeks
2) Significantly reduce the number of modifications made as block pointers
are balanced around
3) Do all of the extent insertion and back reference modifications outside
of the performance critical btrfs_search_slot code.
#3 has the added benefit of greatly reducing the btrfs stack footprint.
The extent allocation tree modifications are done without the deep
(and somewhat recursive) call chains used in the past.
These delayed back reference updates must be done before the transaction
commits, and so the rbtree is tied to the transaction. Throttling is
implemented to help keep the queue of backrefs at a reasonable size.
Since there was a similar mechanism in place for the extent tree
extents, that is removed and replaced by the delayed reference tree.
Yan Zheng <yan.zheng@oracle.com> helped review and fixup this code.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.h')
-rw-r--r-- | fs/btrfs/ctree.h | 12 |
1 files changed, 3 insertions, 9 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3a37ba7a8d65..ced5fd85dc36 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -688,8 +688,6 @@ struct btrfs_fs_info { | |||
688 | struct rb_root block_group_cache_tree; | 688 | struct rb_root block_group_cache_tree; |
689 | 689 | ||
690 | struct extent_io_tree pinned_extents; | 690 | struct extent_io_tree pinned_extents; |
691 | struct extent_io_tree pending_del; | ||
692 | struct extent_io_tree extent_ins; | ||
693 | 691 | ||
694 | /* logical->physical extent mapping */ | 692 | /* logical->physical extent mapping */ |
695 | struct btrfs_mapping_tree mapping_tree; | 693 | struct btrfs_mapping_tree mapping_tree; |
@@ -717,7 +715,6 @@ struct btrfs_fs_info { | |||
717 | struct mutex tree_log_mutex; | 715 | struct mutex tree_log_mutex; |
718 | struct mutex transaction_kthread_mutex; | 716 | struct mutex transaction_kthread_mutex; |
719 | struct mutex cleaner_mutex; | 717 | struct mutex cleaner_mutex; |
720 | struct mutex extent_ins_mutex; | ||
721 | struct mutex pinned_mutex; | 718 | struct mutex pinned_mutex; |
722 | struct mutex chunk_mutex; | 719 | struct mutex chunk_mutex; |
723 | struct mutex drop_mutex; | 720 | struct mutex drop_mutex; |
@@ -1704,18 +1701,15 @@ static inline struct dentry *fdentry(struct file *file) | |||
1704 | } | 1701 | } |
1705 | 1702 | ||
1706 | /* extent-tree.c */ | 1703 | /* extent-tree.c */ |
1704 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | ||
1705 | struct btrfs_root *root, unsigned long count); | ||
1707 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1706 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
1708 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | ||
1709 | struct btrfs_root *root, u64 bytenr, | ||
1710 | u64 num_bytes, u32 *refs); | ||
1711 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 1707 | int btrfs_update_pinned_extents(struct btrfs_root *root, |
1712 | u64 bytenr, u64 num, int pin); | 1708 | u64 bytenr, u64 num, int pin); |
1713 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 1709 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
1714 | struct btrfs_root *root, struct extent_buffer *leaf); | 1710 | struct btrfs_root *root, struct extent_buffer *leaf); |
1715 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 1711 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
1716 | struct btrfs_root *root, u64 objectid, u64 bytenr); | 1712 | struct btrfs_root *root, u64 objectid, u64 bytenr); |
1717 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, | ||
1718 | struct btrfs_root *root); | ||
1719 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); | 1713 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); |
1720 | struct btrfs_block_group_cache *btrfs_lookup_block_group( | 1714 | struct btrfs_block_group_cache *btrfs_lookup_block_group( |
1721 | struct btrfs_fs_info *info, | 1715 | struct btrfs_fs_info *info, |
@@ -1777,7 +1771,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1777 | u64 root_objectid, u64 ref_generation, | 1771 | u64 root_objectid, u64 ref_generation, |
1778 | u64 owner_objectid); | 1772 | u64 owner_objectid); |
1779 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | 1773 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, |
1780 | struct btrfs_root *root, u64 bytenr, | 1774 | struct btrfs_root *root, u64 bytenr, u64 num_bytes, |
1781 | u64 orig_parent, u64 parent, | 1775 | u64 orig_parent, u64 parent, |
1782 | u64 root_objectid, u64 ref_generation, | 1776 | u64 root_objectid, u64 ref_generation, |
1783 | u64 owner_objectid); | 1777 | u64 owner_objectid); |