aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2009-07-13 21:29:25 -0400
committerChris Mason <chris.mason@oracle.com>2009-07-24 09:23:30 -0400
commit963030817060e4f109be1993b9ae8f81dbf5e11a (patch)
tree7d81121b7e68d3d5b3317afba53d36bc1bf8221a /fs/btrfs/extent-tree.c
parent83121942b28daffc9526b14b7843d8cdbd3db641 (diff)
Btrfs: use hybrid extents+bitmap rb tree for free space
Currently btrfs has a problem where it can use a ridiculous amount of RAM simply tracking free space. As free space gets fragmented, we end up with thousands of entries on an rb-tree per block group, which usually spans 1 gig of area. Since we currently don't ever flush free space cache back to disk this gets to be a bit unweildly on large fs's with lots of fragmentation. This patch solves this problem by using PAGE_SIZE bitmaps for parts of the free space cache. Initially we calculate a threshold of extent entries we can handle, which is however many extent entries we can cram into 16k of ram. The maximum amount of RAM that should ever be used to track 1 gigabyte of diskspace will be 32k of RAM, which scales much better than we did before. Once we pass the extent threshold, we start adding bitmaps and using those instead for tracking the free space. This patch also makes it so that any free space thats less than 4 * sectorsize we go ahead and put into a bitmap. This is nice since we try and allocate out of the front of a block group, so if the front of a block group is heavily fragmented and then has a huge chunk of free space at the end, we go ahead and add the fragmented areas to bitmaps and use a normal extent entry to track the big chunk at the back of the block group. I've also taken the opportunity to revamp how we search for free space. Previously we indexed free space via an offset indexed rb tree and a bytes indexed rb tree. I've dropped the bytes indexed rb tree and use only the offset indexed rb tree. This cuts the number of tree operations we were doing previously down by half, and gives us a little bit of a better allocation pattern since we will always start from a specific offset and search forward from there, instead of searching for the size we need and try and get it as close as possible to the offset we want. I've given this a healthy amount of testing pre-new format stuff, as well as post-new format stuff. I've booted up my fedora box which is installed on btrfs with this patch and ran with it for a few days without issues. I've not seen any performance regressions in any of my tests. Since the last patch Yan Zheng fixed a problem where we could have overlapping entries, so updating their offset inline would cause problems. Thanks, Signed-off-by: Josef Bacik <jbacik@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c25
1 files changed, 24 insertions, 1 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 62a332d34fdb..98697be6bdde 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3649,7 +3649,6 @@ refill_cluster:
3649 goto loop; 3649 goto loop;
3650checks: 3650checks:
3651 search_start = stripe_align(root, offset); 3651 search_start = stripe_align(root, offset);
3652
3653 /* move on to the next group */ 3652 /* move on to the next group */
3654 if (search_start + num_bytes >= search_end) { 3653 if (search_start + num_bytes >= search_end) {
3655 btrfs_add_free_space(block_group, offset, num_bytes); 3654 btrfs_add_free_space(block_group, offset, num_bytes);
@@ -7040,6 +7039,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7040 mutex_init(&cache->cache_mutex); 7039 mutex_init(&cache->cache_mutex);
7041 INIT_LIST_HEAD(&cache->list); 7040 INIT_LIST_HEAD(&cache->list);
7042 INIT_LIST_HEAD(&cache->cluster_list); 7041 INIT_LIST_HEAD(&cache->cluster_list);
7042 cache->sectorsize = root->sectorsize;
7043
7044 /*
7045 * we only want to have 32k of ram per block group for keeping
7046 * track of free space, and if we pass 1/2 of that we want to
7047 * start converting things over to using bitmaps
7048 */
7049 cache->extents_thresh = ((1024 * 32) / 2) /
7050 sizeof(struct btrfs_free_space);
7051
7043 read_extent_buffer(leaf, &cache->item, 7052 read_extent_buffer(leaf, &cache->item,
7044 btrfs_item_ptr_offset(leaf, path->slots[0]), 7053 btrfs_item_ptr_offset(leaf, path->slots[0]),
7045 sizeof(cache->item)); 7054 sizeof(cache->item));
@@ -7091,6 +7100,15 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7091 cache->key.objectid = chunk_offset; 7100 cache->key.objectid = chunk_offset;
7092 cache->key.offset = size; 7101 cache->key.offset = size;
7093 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7102 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
7103 cache->sectorsize = root->sectorsize;
7104
7105 /*
7106 * we only want to have 32k of ram per block group for keeping track
7107 * of free space, and if we pass 1/2 of that we want to start
7108 * converting things over to using bitmaps
7109 */
7110 cache->extents_thresh = ((1024 * 32) / 2) /
7111 sizeof(struct btrfs_free_space);
7094 atomic_set(&cache->count, 1); 7112 atomic_set(&cache->count, 1);
7095 spin_lock_init(&cache->lock); 7113 spin_lock_init(&cache->lock);
7096 spin_lock_init(&cache->tree_lock); 7114 spin_lock_init(&cache->tree_lock);
@@ -7103,6 +7121,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7103 cache->flags = type; 7121 cache->flags = type;
7104 btrfs_set_block_group_flags(&cache->item, type); 7122 btrfs_set_block_group_flags(&cache->item, type);
7105 7123
7124 cache->cached = 1;
7125 ret = btrfs_add_free_space(cache, chunk_offset, size);
7126 BUG_ON(ret);
7127 remove_sb_from_cache(root, cache);
7128
7106 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7129 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
7107 &cache->space_info); 7130 &cache->space_info);
7108 BUG_ON(ret); 7131 BUG_ON(ret);