diff options
author | Chris Mason <chris.mason@oracle.com> | 2009-04-03 09:47:43 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-04-03 09:47:43 -0400 |
commit | fa9c0d795f7b57c76560b7fac703f5d341210e28 (patch) | |
tree | 74d9d9846e21ce5b99738f3cc13b855fb63d1eba /fs/btrfs/ctree.h | |
parent | 8e73f275011b3264a87339fd9f1690e944e381c9 (diff) |
Btrfs: rework allocation clustering
Because btrfs is copy-on-write, we end up picking new locations for
blocks very often. This makes it fairly difficult to maintain perfect
read patterns over time, but we can at least do some optimizations
for writes.
This is done today by remembering the last place we allocated and
trying to find a free space hole big enough to hold more than just one
allocation. The end result is that we tend to write sequentially to
the drive.
This happens all the time for metadata and it happens for data
when mounted -o ssd. But, the way we record it is fairly racey
and it tends to fragment the free space over time because we are trying
to allocate fairly large areas at once.
This commit gets rid of the races by adding a free space cluster object
with dedicated locking to make sure that only one process at a time
is out replacing the cluster.
The free space fragmentation is somewhat solved by allowing a cluster
to be comprised of smaller free space extents. This part definitely
adds some CPU time to the cluster allocations, but it allows the allocator
to consume the small holes left behind by cow.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.h')
-rw-r--r-- | fs/btrfs/ctree.h | 54 |
1 files changed, 35 insertions, 19 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index aaa049b8e134..b82931f97ef3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -633,11 +633,29 @@ struct btrfs_space_info { | |||
633 | struct rw_semaphore groups_sem; | 633 | struct rw_semaphore groups_sem; |
634 | }; | 634 | }; |
635 | 635 | ||
636 | struct btrfs_free_space { | 636 | /* |
637 | struct rb_node bytes_index; | 637 | * free clusters are used to claim free space in relatively large chunks, |
638 | struct rb_node offset_index; | 638 | * allowing us to do less seeky writes. They are used for all metadata |
639 | u64 offset; | 639 | * allocations and data allocations in ssd mode. |
640 | u64 bytes; | 640 | */ |
641 | struct btrfs_free_cluster { | ||
642 | spinlock_t lock; | ||
643 | spinlock_t refill_lock; | ||
644 | struct rb_root root; | ||
645 | |||
646 | /* largest extent in this cluster */ | ||
647 | u64 max_size; | ||
648 | |||
649 | /* first extent starting offset */ | ||
650 | u64 window_start; | ||
651 | |||
652 | struct btrfs_block_group_cache *block_group; | ||
653 | /* | ||
654 | * when a cluster is allocated from a block group, we put the | ||
655 | * cluster onto a list in the block group so that it can | ||
656 | * be freed before the block group is freed. | ||
657 | */ | ||
658 | struct list_head block_group_list; | ||
641 | }; | 659 | }; |
642 | 660 | ||
643 | struct btrfs_block_group_cache { | 661 | struct btrfs_block_group_cache { |
@@ -667,6 +685,11 @@ struct btrfs_block_group_cache { | |||
667 | 685 | ||
668 | /* usage count */ | 686 | /* usage count */ |
669 | atomic_t count; | 687 | atomic_t count; |
688 | |||
689 | /* List of struct btrfs_free_clusters for this block group. | ||
690 | * Today it will only have one thing on it, but that may change | ||
691 | */ | ||
692 | struct list_head cluster_list; | ||
670 | }; | 693 | }; |
671 | 694 | ||
672 | struct btrfs_leaf_ref_tree { | 695 | struct btrfs_leaf_ref_tree { |
@@ -838,8 +861,12 @@ struct btrfs_fs_info { | |||
838 | spinlock_t delalloc_lock; | 861 | spinlock_t delalloc_lock; |
839 | spinlock_t new_trans_lock; | 862 | spinlock_t new_trans_lock; |
840 | u64 delalloc_bytes; | 863 | u64 delalloc_bytes; |
841 | u64 last_alloc; | 864 | |
842 | u64 last_data_alloc; | 865 | /* data_alloc_cluster is only used in ssd mode */ |
866 | struct btrfs_free_cluster data_alloc_cluster; | ||
867 | |||
868 | /* all metadata allocations go through this cluster */ | ||
869 | struct btrfs_free_cluster meta_alloc_cluster; | ||
843 | 870 | ||
844 | spinlock_t ref_cache_lock; | 871 | spinlock_t ref_cache_lock; |
845 | u64 total_ref_cache_size; | 872 | u64 total_ref_cache_size; |
@@ -1747,6 +1774,7 @@ static inline struct dentry *fdentry(struct file *file) | |||
1747 | } | 1774 | } |
1748 | 1775 | ||
1749 | /* extent-tree.c */ | 1776 | /* extent-tree.c */ |
1777 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | ||
1750 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 1778 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
1751 | struct btrfs_root *root, unsigned long count); | 1779 | struct btrfs_root *root, unsigned long count); |
1752 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1780 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
@@ -2173,16 +2201,4 @@ int btrfs_check_acl(struct inode *inode, int mask); | |||
2173 | int btrfs_init_acl(struct inode *inode, struct inode *dir); | 2201 | int btrfs_init_acl(struct inode *inode, struct inode *dir); |
2174 | int btrfs_acl_chmod(struct inode *inode); | 2202 | int btrfs_acl_chmod(struct inode *inode); |
2175 | 2203 | ||
2176 | /* free-space-cache.c */ | ||
2177 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
2178 | u64 bytenr, u64 size); | ||
2179 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
2180 | u64 bytenr, u64 size); | ||
2181 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache | ||
2182 | *block_group); | ||
2183 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | ||
2184 | u64 offset, u64 bytes, u64 empty_size); | ||
2185 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | ||
2186 | u64 bytes); | ||
2187 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); | ||
2188 | #endif | 2204 | #endif |