Btrfs: find ideal block group for caching

This patch changes a few things. Hopefully the comments are helpfull, but I'll try and be as verbose here. Problem: My fedora box was taking 1 minute and 21 seconds to boot with btrfs as root. Part of this problem was we pick the first block group we can find and start caching it, even if it may not have enough free space. The other problem is we only search for cached block groups the first time around, which we won't find any cached block groups because this is a newly mounted fs, so we end up caching several block groups during bootup, which with alot of fragmentation takes around 30-45 seconds to complete, which bogs down the system. So Solution: 1) Don't cache block groups willy-nilly at first. Instead try and figure out which block group has the most free, and therefore will take the least amount of time to cache. 2) Don't be so picky about cached block groups. The other problem is once we've filled up a cluster, if the block group isn't finished caching the next time we try and do the allocation we'll completely ignore the cluster and start searching from the beginning of the space, which makes us cache more block groups, which slows us down even more. So instead of skipping block groups that are not finished caching when we have a hint, only skip the block group if it hasn't started caching yet. There is one other tweak in here. Before if we allocated a chunk and still couldn't find new space, we'd end up switching the space info to force another chunk allocation. This could make us end up with way too many chunks, so keep track of this particular case. With this patch and my previous cluster fixes my fedora box now boots in 43 seconds, and according to the bootchart is not held up by our block group caching at all. Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
author: Josef Bacik <josef@redhat.com> 2009-11-10 21:23:48 -0500
committer: Chris Mason <chris.mason@oracle.com> 2009-11-11 14:20:19 -0500
commit: ccf0e72537a9f68611ca575121afd08e2b4d0fb0 (patch)
tree: d2fd54693847b6ed1307ed1eb5d3f87b95e31538 /fs/btrfs/extent-tree.c
parent: 4eb3991c5def39bcf553c14ebe2618fcb47b627f (diff)
1 files changed, 86 insertions, 23 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c56f91639dc1..2a4cdceeb575 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4101,7 +4101,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
 }
 enum btrfs_loop_type {
-        LOOP_CACHED_ONLY = 0,
+        LOOP_FIND_IDEAL = 0,
        LOOP_CACHING_NOWAIT = 1,
        LOOP_CACHING_WAIT = 2,
        LOOP_ALLOC_CHUNK = 3,
@@ -4130,12 +4130,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        struct btrfs_block_group_cache *block_group = NULL;
        int empty_cluster = 2 * 1024 * 1024;
        int allowed_chunk_alloc = 0;
+        int done_chunk_alloc = 0;
        struct btrfs_space_info *space_info;
        int last_ptr_loop = 0;
        int loop = 0;
        bool found_uncached_bg = false;
        bool failed_cluster_refill = false;
        bool failed_alloc = false;
+        u64 ideal_cache_percent = 0;
+        u64 ideal_cache_offset = 0;
        WARN_ON(num_bytes < root->sectorsize);
        btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -4171,14 +4174,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                empty_cluster = 0;
        if (search_start == hint_byte) {
+ideal_cache:
                block_group = btrfs_lookup_block_group(root->fs_info,
                                                       search_start);
                /*
                 * we don't want to use the block group if it doesn't match our
                 * allocation bits, or if its not cached.
+                 *
+                 * However if we are re-searching with an ideal block group
+                 * picked out then we don't care that the block group is cached.
                 */
                if (block_group && block_group_bits(block_group, data) &&
-                    block_group_cache_done(block_group)) {
+                    (block_group->cached != BTRFS_CACHE_NO ||
+                     search_start == ideal_cache_offset)) {
                        down_read(&space_info->groups_sem);
                        if (list_empty(&block_group->list) ||
                            block_group->ro) {
@@ -4190,13 +4198,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                                 */
                                btrfs_put_block_group(block_group);
                                up_read(&space_info->groups_sem);
-                        } else
+                        } else {
                                goto have_block_group;
+                        }
                } else if (block_group) {
                        btrfs_put_block_group(block_group);
                }
        }
 search:
        down_read(&space_info->groups_sem);
        list_for_each_entry(block_group, &space_info->block_groups, list) {
@@ -4208,28 +4216,45 @@ search:
 have_block_group:
                if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
+                        u64 free_percent;
+                        free_percent = btrfs_block_group_used(&block_group->item);
+                        free_percent *= 100;
+                        free_percent = div64_u64(free_percent,
+                                                 block_group->key.offset);
+                        free_percent = 100 - free_percent;
+                        if (free_percent > ideal_cache_percent &&
+                            likely(!block_group->ro)) {
+                                ideal_cache_offset = block_group->key.objectid;
+                                ideal_cache_percent = free_percent;
+                        }
                        /*
-                         * we want to start caching kthreads, but not too many
+                         * We only want to start kthread caching if we are at
-                         * right off the bat so we don't overwhelm the system,
+                         * the point where we will wait for caching to make
-                         * so only start them if there are less than 2 and we're
+                         * progress, or if our ideal search is over and we've
-                         * in the initial allocation phase.
+                         * found somebody to start caching.
                         */
                        if (loop > LOOP_CACHING_NOWAIT ||
-                            atomic_read(&space_info->caching_threads) < 2) {
+                            (loop > LOOP_FIND_IDEAL &&
+                             atomic_read(&space_info->caching_threads) < 2)) {
                                ret = cache_block_group(block_group);
                                BUG_ON(ret);
                        }
-                }
-                cached = block_group_cache_done(block_group);
-                if (unlikely(!cached)) {
                        found_uncached_bg = true;
-                        /* if we only want cached bgs, loop */
+                        /*
-                        if (loop == LOOP_CACHED_ONLY)
+                         * If loop is set for cached only, try the next block
+                         * group.
+                         */
+                        if (loop == LOOP_FIND_IDEAL)
                                goto loop;
                }
+                cached = block_group_cache_done(block_group);
+                if (unlikely(!cached))
+                        found_uncached_bg = true;
                if (unlikely(block_group->ro))
                        goto loop;
@@ -4409,9 +4434,11 @@ loop:
        }
        up_read(&space_info->groups_sem);
-        /* LOOP_CACHED_ONLY, only search fully cached block groups
+        /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
-         * LOOP_CACHING_NOWAIT, search partially cached block groups, but
+         *                      for them to make caching progress.  Also
-         *                      dont wait foR them to finish caching
+         *                      determine the best possible bg to cache
+         * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
+         *                      caching kthreads as we move along
         * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
         * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
         * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
@@ -4420,12 +4447,47 @@ loop:
        if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
            (found_uncached_bg || empty_size || empty_cluster ||
             allowed_chunk_alloc)) {
-                if (found_uncached_bg) {
+                if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
                        found_uncached_bg = false;
-                        if (loop < LOOP_CACHING_WAIT) {
+                        loop++;
-                                loop++;
+                        if (!ideal_cache_percent &&
+                            atomic_read(&space_info->caching_threads))
                                goto search;
-                        }
+                        /*
+                         * 1 of the following 2 things have happened so far
+                         *
+                         * 1) We found an ideal block group for caching that
+                         * is mostly full and will cache quickly, so we might
+                         * as well wait for it.
+                         *
+                         * 2) We searched for cached only and we didn't find
+                         * anything, and we didn't start any caching kthreads
+                         * either, so chances are we will loop through and
+                         * start a couple caching kthreads, and then come back
+                         * around and just wait for them.  This will be slower
+                         * because we will have 2 caching kthreads reading at
+                         * the same time when we could have just started one
+                         * and waited for it to get far enough to give us an
+                         * allocation, so go ahead and go to the wait caching
+                         * loop.
+                         */
+                        loop = LOOP_CACHING_WAIT;
+                        search_start = ideal_cache_offset;
+                        ideal_cache_percent = 0;
+                        goto ideal_cache;
+                } else if (loop == LOOP_FIND_IDEAL) {
+                        /*
+                         * Didn't find a uncached bg, wait on anything we find
+                         * next.
+                         */
+                        loop = LOOP_CACHING_WAIT;
+                        goto search;
+                }
+                if (loop < LOOP_CACHING_WAIT) {
+                        loop++;
+                        goto search;
                }
                if (loop == LOOP_ALLOC_CHUNK) {
@@ -4437,7 +4499,8 @@ loop:
                        ret = do_chunk_alloc(trans, root, num_bytes +
                                             2 * 1024 * 1024, data, 1);
                        allowed_chunk_alloc = 0;
-                } else {
+                        done_chunk_alloc = 1;
+                } else if (!done_chunk_alloc) {
                        space_info->force_alloc = 1;
                }
author	Josef Bacik <josef@redhat.com>	2009-11-10 21:23:48 -0500
committer	Chris Mason <chris.mason@oracle.com>	2009-11-11 14:20:19 -0500
commit	ccf0e72537a9f68611ca575121afd08e2b4d0fb0 (patch)
tree	d2fd54693847b6ed1307ed1eb5d3f87b95e31538 /fs/btrfs/extent-tree.c
parent	4eb3991c5def39bcf553c14ebe2618fcb47b627f (diff)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c56f91639dc1..2a4cdceeb575 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c
@@ -4101,7 +4101,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
4101	}	4101	}
4102		4102
4103	enum btrfs_loop_type {	4103	enum btrfs_loop_type {
4104	LOOP_CACHED_ONLY = 0,	4104	LOOP_FIND_IDEAL = 0,
4105	LOOP_CACHING_NOWAIT = 1,	4105	LOOP_CACHING_NOWAIT = 1,
4106	LOOP_CACHING_WAIT = 2,	4106	LOOP_CACHING_WAIT = 2,
4107	LOOP_ALLOC_CHUNK = 3,	4107	LOOP_ALLOC_CHUNK = 3,
@@ -4130,12 +4130,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4130	struct btrfs_block_group_cache *block_group = NULL;	4130	struct btrfs_block_group_cache *block_group = NULL;
4131	int empty_cluster = 2 * 1024 * 1024;	4131	int empty_cluster = 2 * 1024 * 1024;
4132	int allowed_chunk_alloc = 0;	4132	int allowed_chunk_alloc = 0;
		4133	int done_chunk_alloc = 0;
4133	struct btrfs_space_info *space_info;	4134	struct btrfs_space_info *space_info;
4134	int last_ptr_loop = 0;	4135	int last_ptr_loop = 0;
4135	int loop = 0;	4136	int loop = 0;
4136	bool found_uncached_bg = false;	4137	bool found_uncached_bg = false;
4137	bool failed_cluster_refill = false;	4138	bool failed_cluster_refill = false;
4138	bool failed_alloc = false;	4139	bool failed_alloc = false;
		4140	u64 ideal_cache_percent = 0;
		4141	u64 ideal_cache_offset = 0;
4139		4142
4140	WARN_ON(num_bytes < root->sectorsize);	4143	WARN_ON(num_bytes < root->sectorsize);
4141	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);	4144	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -4171,14 +4174,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4171	empty_cluster = 0;	4174	empty_cluster = 0;
4172		4175
4173	if (search_start == hint_byte) {	4176	if (search_start == hint_byte) {
		4177	ideal_cache:
4174	block_group = btrfs_lookup_block_group(root->fs_info,	4178	block_group = btrfs_lookup_block_group(root->fs_info,
4175	search_start);	4179	search_start);
4176	/*	4180	/*
4177	* we don't want to use the block group if it doesn't match our	4181	* we don't want to use the block group if it doesn't match our
4178	* allocation bits, or if its not cached.	4182	* allocation bits, or if its not cached.
		4183	*
		4184	* However if we are re-searching with an ideal block group
		4185	* picked out then we don't care that the block group is cached.
4179	*/	4186	*/
4180	if (block_group && block_group_bits(block_group, data) &&	4187	if (block_group && block_group_bits(block_group, data) &&
4181	block_group_cache_done(block_group)) {	4188	(block_group->cached != BTRFS_CACHE_NO \|\|
		4189	search_start == ideal_cache_offset)) {
4182	down_read(&space_info->groups_sem);	4190	down_read(&space_info->groups_sem);
4183	if (list_empty(&block_group->list) \|\|	4191	if (list_empty(&block_group->list) \|\|
4184	block_group->ro) {	4192	block_group->ro) {
@@ -4190,13 +4198,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4190	*/	4198	*/
4191	btrfs_put_block_group(block_group);	4199	btrfs_put_block_group(block_group);
4192	up_read(&space_info->groups_sem);	4200	up_read(&space_info->groups_sem);
4193	} else	4201	} else {
4194	goto have_block_group;	4202	goto have_block_group;
		4203	}
4195	} else if (block_group) {	4204	} else if (block_group) {
4196	btrfs_put_block_group(block_group);	4205	btrfs_put_block_group(block_group);
4197	}	4206	}
4198	}	4207	}
4199
4200	search:	4208	search:
4201	down_read(&space_info->groups_sem);	4209	down_read(&space_info->groups_sem);
4202	list_for_each_entry(block_group, &space_info->block_groups, list) {	4210	list_for_each_entry(block_group, &space_info->block_groups, list) {
@@ -4208,28 +4216,45 @@ search:
4208		4216
4209	have_block_group:	4217	have_block_group:
4210	if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {	4218	if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
		4219	u64 free_percent;
		4220
		4221	free_percent = btrfs_block_group_used(&block_group->item);
		4222	free_percent *= 100;
		4223	free_percent = div64_u64(free_percent,
		4224	block_group->key.offset);
		4225	free_percent = 100 - free_percent;
		4226	if (free_percent > ideal_cache_percent &&
		4227	likely(!block_group->ro)) {
		4228	ideal_cache_offset = block_group->key.objectid;
		4229	ideal_cache_percent = free_percent;
		4230	}
		4231
4211	/*	4232	/*
4212	* we want to start caching kthreads, but not too many	4233	* We only want to start kthread caching if we are at
4213	* right off the bat so we don't overwhelm the system,	4234	* the point where we will wait for caching to make
4214	* so only start them if there are less than 2 and we're	4235	* progress, or if our ideal search is over and we've
4215	* in the initial allocation phase.	4236	* found somebody to start caching.
4216	*/	4237	*/
4217	if (loop > LOOP_CACHING_NOWAIT \|\|	4238	if (loop > LOOP_CACHING_NOWAIT \|\|
4218	atomic_read(&space_info->caching_threads) < 2) {	4239	(loop > LOOP_FIND_IDEAL &&
		4240	atomic_read(&space_info->caching_threads) < 2)) {
4219	ret = cache_block_group(block_group);	4241	ret = cache_block_group(block_group);
4220	BUG_ON(ret);	4242	BUG_ON(ret);
4221	}	4243	}
4222	}
4223
4224	cached = block_group_cache_done(block_group);
4225	if (unlikely(!cached)) {
4226	found_uncached_bg = true;	4244	found_uncached_bg = true;
4227		4245
4228	/* if we only want cached bgs, loop */	4246	/*
4229	if (loop == LOOP_CACHED_ONLY)	4247	* If loop is set for cached only, try the next block
		4248	* group.
		4249	*/
		4250	if (loop == LOOP_FIND_IDEAL)
4230	goto loop;	4251	goto loop;
4231	}	4252	}
4232		4253
		4254	cached = block_group_cache_done(block_group);
		4255	if (unlikely(!cached))
		4256	found_uncached_bg = true;
		4257
4233	if (unlikely(block_group->ro))	4258	if (unlikely(block_group->ro))
4234	goto loop;	4259	goto loop;
4235		4260
@@ -4409,9 +4434,11 @@ loop:
4409	}	4434	}
4410	up_read(&space_info->groups_sem);	4435	up_read(&space_info->groups_sem);
4411		4436
4412	/* LOOP_CACHED_ONLY, only search fully cached block groups	4437	/* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
4413	* LOOP_CACHING_NOWAIT, search partially cached block groups, but	4438	* for them to make caching progress. Also
4414	* dont wait foR them to finish caching	4439	* determine the best possible bg to cache
		4440	* LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
		4441	* caching kthreads as we move along
4415	* LOOP_CACHING_WAIT, search everything, and wait if our bg is caching	4442	* LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
4416	* LOOP_ALLOC_CHUNK, force a chunk allocation and try again	4443	* LOOP_ALLOC_CHUNK, force a chunk allocation and try again
4417	* LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try	4444	* LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
@@ -4420,12 +4447,47 @@ loop:
4420	if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&	4447	if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
4421	(found_uncached_bg \|\| empty_size \|\| empty_cluster \|\|	4448	(found_uncached_bg \|\| empty_size \|\| empty_cluster \|\|
4422	allowed_chunk_alloc)) {	4449	allowed_chunk_alloc)) {
4423	if (found_uncached_bg) {	4450	if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
4424	found_uncached_bg = false;	4451	found_uncached_bg = false;
4425	if (loop < LOOP_CACHING_WAIT) {	4452	loop++;
4426	loop++;	4453	if (!ideal_cache_percent &&
		4454	atomic_read(&space_info->caching_threads))
4427	goto search;	4455	goto search;
4428	}	4456
		4457	/*
		4458	* 1 of the following 2 things have happened so far
		4459	*
		4460	* 1) We found an ideal block group for caching that
		4461	* is mostly full and will cache quickly, so we might
		4462	* as well wait for it.
		4463	*
		4464	* 2) We searched for cached only and we didn't find
		4465	* anything, and we didn't start any caching kthreads
		4466	* either, so chances are we will loop through and
		4467	* start a couple caching kthreads, and then come back
		4468	* around and just wait for them. This will be slower
		4469	* because we will have 2 caching kthreads reading at
		4470	* the same time when we could have just started one
		4471	* and waited for it to get far enough to give us an
		4472	* allocation, so go ahead and go to the wait caching
		4473	* loop.
		4474	*/
		4475	loop = LOOP_CACHING_WAIT;
		4476	search_start = ideal_cache_offset;
		4477	ideal_cache_percent = 0;
		4478	goto ideal_cache;
		4479	} else if (loop == LOOP_FIND_IDEAL) {
		4480	/*
		4481	* Didn't find a uncached bg, wait on anything we find
		4482	* next.
		4483	*/
		4484	loop = LOOP_CACHING_WAIT;
		4485	goto search;
		4486	}
		4487
		4488	if (loop < LOOP_CACHING_WAIT) {
		4489	loop++;
		4490	goto search;
4429	}	4491	}
4430		4492
4431	if (loop == LOOP_ALLOC_CHUNK) {	4493	if (loop == LOOP_ALLOC_CHUNK) {
@@ -4437,7 +4499,8 @@ loop:
4437	ret = do_chunk_alloc(trans, root, num_bytes +	4499	ret = do_chunk_alloc(trans, root, num_bytes +
4438	2 * 1024 * 1024, data, 1);	4500	2 * 1024 * 1024, data, 1);
4439	allowed_chunk_alloc = 0;	4501	allowed_chunk_alloc = 0;
4440	} else {	4502	done_chunk_alloc = 1;
		4503	} else if (!done_chunk_alloc) {
4441	space_info->force_alloc = 1;	4504	space_info->force_alloc = 1;
4442	}	4505	}
4443		4506