aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.h
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-04-03 09:47:43 -0400
committerChris Mason <chris.mason@oracle.com>2009-04-03 09:47:43 -0400
commitfa9c0d795f7b57c76560b7fac703f5d341210e28 (patch)
tree74d9d9846e21ce5b99738f3cc13b855fb63d1eba /fs/btrfs/ctree.h
parent8e73f275011b3264a87339fd9f1690e944e381c9 (diff)
Btrfs: rework allocation clustering
Because btrfs is copy-on-write, we end up picking new locations for blocks very often. This makes it fairly difficult to maintain perfect read patterns over time, but we can at least do some optimizations for writes. This is done today by remembering the last place we allocated and trying to find a free space hole big enough to hold more than just one allocation. The end result is that we tend to write sequentially to the drive. This happens all the time for metadata and it happens for data when mounted -o ssd. But, the way we record it is fairly racey and it tends to fragment the free space over time because we are trying to allocate fairly large areas at once. This commit gets rid of the races by adding a free space cluster object with dedicated locking to make sure that only one process at a time is out replacing the cluster. The free space fragmentation is somewhat solved by allowing a cluster to be comprised of smaller free space extents. This part definitely adds some CPU time to the cluster allocations, but it allows the allocator to consume the small holes left behind by cow. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.h')
-rw-r--r--fs/btrfs/ctree.h54
1 files changed, 35 insertions, 19 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index aaa049b8e134..b82931f97ef3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -633,11 +633,29 @@ struct btrfs_space_info {
633 struct rw_semaphore groups_sem; 633 struct rw_semaphore groups_sem;
634}; 634};
635 635
636struct btrfs_free_space { 636/*
637 struct rb_node bytes_index; 637 * free clusters are used to claim free space in relatively large chunks,
638 struct rb_node offset_index; 638 * allowing us to do less seeky writes. They are used for all metadata
639 u64 offset; 639 * allocations and data allocations in ssd mode.
640 u64 bytes; 640 */
641struct btrfs_free_cluster {
642 spinlock_t lock;
643 spinlock_t refill_lock;
644 struct rb_root root;
645
646 /* largest extent in this cluster */
647 u64 max_size;
648
649 /* first extent starting offset */
650 u64 window_start;
651
652 struct btrfs_block_group_cache *block_group;
653 /*
654 * when a cluster is allocated from a block group, we put the
655 * cluster onto a list in the block group so that it can
656 * be freed before the block group is freed.
657 */
658 struct list_head block_group_list;
641}; 659};
642 660
643struct btrfs_block_group_cache { 661struct btrfs_block_group_cache {
@@ -667,6 +685,11 @@ struct btrfs_block_group_cache {
667 685
668 /* usage count */ 686 /* usage count */
669 atomic_t count; 687 atomic_t count;
688
689 /* List of struct btrfs_free_clusters for this block group.
690 * Today it will only have one thing on it, but that may change
691 */
692 struct list_head cluster_list;
670}; 693};
671 694
672struct btrfs_leaf_ref_tree { 695struct btrfs_leaf_ref_tree {
@@ -838,8 +861,12 @@ struct btrfs_fs_info {
838 spinlock_t delalloc_lock; 861 spinlock_t delalloc_lock;
839 spinlock_t new_trans_lock; 862 spinlock_t new_trans_lock;
840 u64 delalloc_bytes; 863 u64 delalloc_bytes;
841 u64 last_alloc; 864
842 u64 last_data_alloc; 865 /* data_alloc_cluster is only used in ssd mode */
866 struct btrfs_free_cluster data_alloc_cluster;
867
868 /* all metadata allocations go through this cluster */
869 struct btrfs_free_cluster meta_alloc_cluster;
843 870
844 spinlock_t ref_cache_lock; 871 spinlock_t ref_cache_lock;
845 u64 total_ref_cache_size; 872 u64 total_ref_cache_size;
@@ -1747,6 +1774,7 @@ static inline struct dentry *fdentry(struct file *file)
1747} 1774}
1748 1775
1749/* extent-tree.c */ 1776/* extent-tree.c */
1777void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
1750int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 1778int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1751 struct btrfs_root *root, unsigned long count); 1779 struct btrfs_root *root, unsigned long count);
1752int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); 1780int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
@@ -2173,16 +2201,4 @@ int btrfs_check_acl(struct inode *inode, int mask);
2173int btrfs_init_acl(struct inode *inode, struct inode *dir); 2201int btrfs_init_acl(struct inode *inode, struct inode *dir);
2174int btrfs_acl_chmod(struct inode *inode); 2202int btrfs_acl_chmod(struct inode *inode);
2175 2203
2176/* free-space-cache.c */
2177int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
2178 u64 bytenr, u64 size);
2179int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
2180 u64 bytenr, u64 size);
2181void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
2182 *block_group);
2183u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2184 u64 offset, u64 bytes, u64 empty_size);
2185void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
2186 u64 bytes);
2187u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
2188#endif 2204#endif