aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.h
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2009-07-13 21:29:25 -0400
committerChris Mason <chris.mason@oracle.com>2009-07-24 09:23:30 -0400
commit963030817060e4f109be1993b9ae8f81dbf5e11a (patch)
tree7d81121b7e68d3d5b3317afba53d36bc1bf8221a /fs/btrfs/ctree.h
parent83121942b28daffc9526b14b7843d8cdbd3db641 (diff)
Btrfs: use hybrid extents+bitmap rb tree for free space
Currently btrfs has a problem where it can use a ridiculous amount of RAM simply tracking free space. As free space gets fragmented, we end up with thousands of entries on an rb-tree per block group, which usually spans 1 gig of area. Since we currently don't ever flush free space cache back to disk this gets to be a bit unweildly on large fs's with lots of fragmentation. This patch solves this problem by using PAGE_SIZE bitmaps for parts of the free space cache. Initially we calculate a threshold of extent entries we can handle, which is however many extent entries we can cram into 16k of ram. The maximum amount of RAM that should ever be used to track 1 gigabyte of diskspace will be 32k of RAM, which scales much better than we did before. Once we pass the extent threshold, we start adding bitmaps and using those instead for tracking the free space. This patch also makes it so that any free space thats less than 4 * sectorsize we go ahead and put into a bitmap. This is nice since we try and allocate out of the front of a block group, so if the front of a block group is heavily fragmented and then has a huge chunk of free space at the end, we go ahead and add the fragmented areas to bitmaps and use a normal extent entry to track the big chunk at the back of the block group. I've also taken the opportunity to revamp how we search for free space. Previously we indexed free space via an offset indexed rb tree and a bytes indexed rb tree. I've dropped the bytes indexed rb tree and use only the offset indexed rb tree. This cuts the number of tree operations we were doing previously down by half, and gives us a little bit of a better allocation pattern since we will always start from a specific offset and search forward from there, instead of searching for the size we need and try and get it as close as possible to the offset we want. I've given this a healthy amount of testing pre-new format stuff, as well as post-new format stuff. I've booted up my fedora box which is installed on btrfs with this patch and ran with it for a few days without issues. I've not seen any performance regressions in any of my tests. Since the last patch Yan Zheng fixed a problem where we could have overlapping entries, so updating their offset inline would cause problems. Thanks, Signed-off-by: Josef Bacik <jbacik@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.h')
-rw-r--r--fs/btrfs/ctree.h8
1 files changed, 7 insertions, 1 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index da0763135bf0..0cbf3491bb7c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -709,6 +709,9 @@ struct btrfs_free_cluster {
709 /* first extent starting offset */ 709 /* first extent starting offset */
710 u64 window_start; 710 u64 window_start;
711 711
712 /* if this cluster simply points at a bitmap in the block group */
713 bool points_to_bitmap;
714
712 struct btrfs_block_group_cache *block_group; 715 struct btrfs_block_group_cache *block_group;
713 /* 716 /*
714 * when a cluster is allocated from a block group, we put the 717 * when a cluster is allocated from a block group, we put the
@@ -726,6 +729,10 @@ struct btrfs_block_group_cache {
726 u64 pinned; 729 u64 pinned;
727 u64 reserved; 730 u64 reserved;
728 u64 flags; 731 u64 flags;
732 u64 sectorsize;
733 int extents_thresh;
734 int free_extents;
735 int total_bitmaps;
729 int cached; 736 int cached;
730 int ro; 737 int ro;
731 int dirty; 738 int dirty;
@@ -734,7 +741,6 @@ struct btrfs_block_group_cache {
734 741
735 /* free space cache stuff */ 742 /* free space cache stuff */
736 spinlock_t tree_lock; 743 spinlock_t tree_lock;
737 struct rb_root free_space_bytes;
738 struct rb_root free_space_offset; 744 struct rb_root free_space_offset;
739 745
740 /* block group cache stuff */ 746 /* block group cache stuff */