aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2009-11-10 21:23:48 -0500
committerChris Mason <chris.mason@oracle.com>2009-11-11 14:20:19 -0500
commitccf0e72537a9f68611ca575121afd08e2b4d0fb0 (patch)
treed2fd54693847b6ed1307ed1eb5d3f87b95e31538
parent4eb3991c5def39bcf553c14ebe2618fcb47b627f (diff)
Btrfs: find ideal block group for caching
This patch changes a few things. Hopefully the comments are helpfull, but I'll try and be as verbose here. Problem: My fedora box was taking 1 minute and 21 seconds to boot with btrfs as root. Part of this problem was we pick the first block group we can find and start caching it, even if it may not have enough free space. The other problem is we only search for cached block groups the first time around, which we won't find any cached block groups because this is a newly mounted fs, so we end up caching several block groups during bootup, which with alot of fragmentation takes around 30-45 seconds to complete, which bogs down the system. So Solution: 1) Don't cache block groups willy-nilly at first. Instead try and figure out which block group has the most free, and therefore will take the least amount of time to cache. 2) Don't be so picky about cached block groups. The other problem is once we've filled up a cluster, if the block group isn't finished caching the next time we try and do the allocation we'll completely ignore the cluster and start searching from the beginning of the space, which makes us cache more block groups, which slows us down even more. So instead of skipping block groups that are not finished caching when we have a hint, only skip the block group if it hasn't started caching yet. There is one other tweak in here. Before if we allocated a chunk and still couldn't find new space, we'd end up switching the space info to force another chunk allocation. This could make us end up with way too many chunks, so keep track of this particular case. With this patch and my previous cluster fixes my fedora box now boots in 43 seconds, and according to the bootchart is not held up by our block group caching at all. Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/extent-tree.c109
1 files changed, 86 insertions, 23 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c56f91639dc1..2a4cdceeb575 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4101,7 +4101,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
4101} 4101}
4102 4102
4103enum btrfs_loop_type { 4103enum btrfs_loop_type {
4104 LOOP_CACHED_ONLY = 0, 4104 LOOP_FIND_IDEAL = 0,
4105 LOOP_CACHING_NOWAIT = 1, 4105 LOOP_CACHING_NOWAIT = 1,
4106 LOOP_CACHING_WAIT = 2, 4106 LOOP_CACHING_WAIT = 2,
4107 LOOP_ALLOC_CHUNK = 3, 4107 LOOP_ALLOC_CHUNK = 3,
@@ -4130,12 +4130,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4130 struct btrfs_block_group_cache *block_group = NULL; 4130 struct btrfs_block_group_cache *block_group = NULL;
4131 int empty_cluster = 2 * 1024 * 1024; 4131 int empty_cluster = 2 * 1024 * 1024;
4132 int allowed_chunk_alloc = 0; 4132 int allowed_chunk_alloc = 0;
4133 int done_chunk_alloc = 0;
4133 struct btrfs_space_info *space_info; 4134 struct btrfs_space_info *space_info;
4134 int last_ptr_loop = 0; 4135 int last_ptr_loop = 0;
4135 int loop = 0; 4136 int loop = 0;
4136 bool found_uncached_bg = false; 4137 bool found_uncached_bg = false;
4137 bool failed_cluster_refill = false; 4138 bool failed_cluster_refill = false;
4138 bool failed_alloc = false; 4139 bool failed_alloc = false;
4140 u64 ideal_cache_percent = 0;
4141 u64 ideal_cache_offset = 0;
4139 4142
4140 WARN_ON(num_bytes < root->sectorsize); 4143 WARN_ON(num_bytes < root->sectorsize);
4141 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4144 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -4171,14 +4174,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4171 empty_cluster = 0; 4174 empty_cluster = 0;
4172 4175
4173 if (search_start == hint_byte) { 4176 if (search_start == hint_byte) {
4177ideal_cache:
4174 block_group = btrfs_lookup_block_group(root->fs_info, 4178 block_group = btrfs_lookup_block_group(root->fs_info,
4175 search_start); 4179 search_start);
4176 /* 4180 /*
4177 * we don't want to use the block group if it doesn't match our 4181 * we don't want to use the block group if it doesn't match our
4178 * allocation bits, or if its not cached. 4182 * allocation bits, or if its not cached.
4183 *
4184 * However if we are re-searching with an ideal block group
4185 * picked out then we don't care that the block group is cached.
4179 */ 4186 */
4180 if (block_group && block_group_bits(block_group, data) && 4187 if (block_group && block_group_bits(block_group, data) &&
4181 block_group_cache_done(block_group)) { 4188 (block_group->cached != BTRFS_CACHE_NO ||
4189 search_start == ideal_cache_offset)) {
4182 down_read(&space_info->groups_sem); 4190 down_read(&space_info->groups_sem);
4183 if (list_empty(&block_group->list) || 4191 if (list_empty(&block_group->list) ||
4184 block_group->ro) { 4192 block_group->ro) {
@@ -4190,13 +4198,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4190 */ 4198 */
4191 btrfs_put_block_group(block_group); 4199 btrfs_put_block_group(block_group);
4192 up_read(&space_info->groups_sem); 4200 up_read(&space_info->groups_sem);
4193 } else 4201 } else {
4194 goto have_block_group; 4202 goto have_block_group;
4203 }
4195 } else if (block_group) { 4204 } else if (block_group) {
4196 btrfs_put_block_group(block_group); 4205 btrfs_put_block_group(block_group);
4197 } 4206 }
4198 } 4207 }
4199
4200search: 4208search:
4201 down_read(&space_info->groups_sem); 4209 down_read(&space_info->groups_sem);
4202 list_for_each_entry(block_group, &space_info->block_groups, list) { 4210 list_for_each_entry(block_group, &space_info->block_groups, list) {
@@ -4208,28 +4216,45 @@ search:
4208 4216
4209have_block_group: 4217have_block_group:
4210 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4218 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4219 u64 free_percent;
4220
4221 free_percent = btrfs_block_group_used(&block_group->item);
4222 free_percent *= 100;
4223 free_percent = div64_u64(free_percent,
4224 block_group->key.offset);
4225 free_percent = 100 - free_percent;
4226 if (free_percent > ideal_cache_percent &&
4227 likely(!block_group->ro)) {
4228 ideal_cache_offset = block_group->key.objectid;
4229 ideal_cache_percent = free_percent;
4230 }
4231
4211 /* 4232 /*
4212 * we want to start caching kthreads, but not too many 4233 * We only want to start kthread caching if we are at
4213 * right off the bat so we don't overwhelm the system, 4234 * the point where we will wait for caching to make
4214 * so only start them if there are less than 2 and we're 4235 * progress, or if our ideal search is over and we've
4215 * in the initial allocation phase. 4236 * found somebody to start caching.
4216 */ 4237 */
4217 if (loop > LOOP_CACHING_NOWAIT || 4238 if (loop > LOOP_CACHING_NOWAIT ||
4218 atomic_read(&space_info->caching_threads) < 2) { 4239 (loop > LOOP_FIND_IDEAL &&
4240 atomic_read(&space_info->caching_threads) < 2)) {
4219 ret = cache_block_group(block_group); 4241 ret = cache_block_group(block_group);
4220 BUG_ON(ret); 4242 BUG_ON(ret);
4221 } 4243 }
4222 }
4223
4224 cached = block_group_cache_done(block_group);
4225 if (unlikely(!cached)) {
4226 found_uncached_bg = true; 4244 found_uncached_bg = true;
4227 4245
4228 /* if we only want cached bgs, loop */ 4246 /*
4229 if (loop == LOOP_CACHED_ONLY) 4247 * If loop is set for cached only, try the next block
4248 * group.
4249 */
4250 if (loop == LOOP_FIND_IDEAL)
4230 goto loop; 4251 goto loop;
4231 } 4252 }
4232 4253
4254 cached = block_group_cache_done(block_group);
4255 if (unlikely(!cached))
4256 found_uncached_bg = true;
4257
4233 if (unlikely(block_group->ro)) 4258 if (unlikely(block_group->ro))
4234 goto loop; 4259 goto loop;
4235 4260
@@ -4409,9 +4434,11 @@ loop:
4409 } 4434 }
4410 up_read(&space_info->groups_sem); 4435 up_read(&space_info->groups_sem);
4411 4436
4412 /* LOOP_CACHED_ONLY, only search fully cached block groups 4437 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
4413 * LOOP_CACHING_NOWAIT, search partially cached block groups, but 4438 * for them to make caching progress. Also
4414 * dont wait foR them to finish caching 4439 * determine the best possible bg to cache
4440 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
4441 * caching kthreads as we move along
4415 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 4442 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
4416 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 4443 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
4417 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 4444 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
@@ -4420,12 +4447,47 @@ loop:
4420 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 4447 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
4421 (found_uncached_bg || empty_size || empty_cluster || 4448 (found_uncached_bg || empty_size || empty_cluster ||
4422 allowed_chunk_alloc)) { 4449 allowed_chunk_alloc)) {
4423 if (found_uncached_bg) { 4450 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
4424 found_uncached_bg = false; 4451 found_uncached_bg = false;
4425 if (loop < LOOP_CACHING_WAIT) { 4452 loop++;
4426 loop++; 4453 if (!ideal_cache_percent &&
4454 atomic_read(&space_info->caching_threads))
4427 goto search; 4455 goto search;
4428 } 4456
4457 /*
4458 * 1 of the following 2 things have happened so far
4459 *
4460 * 1) We found an ideal block group for caching that
4461 * is mostly full and will cache quickly, so we might
4462 * as well wait for it.
4463 *
4464 * 2) We searched for cached only and we didn't find
4465 * anything, and we didn't start any caching kthreads
4466 * either, so chances are we will loop through and
4467 * start a couple caching kthreads, and then come back
4468 * around and just wait for them. This will be slower
4469 * because we will have 2 caching kthreads reading at
4470 * the same time when we could have just started one
4471 * and waited for it to get far enough to give us an
4472 * allocation, so go ahead and go to the wait caching
4473 * loop.
4474 */
4475 loop = LOOP_CACHING_WAIT;
4476 search_start = ideal_cache_offset;
4477 ideal_cache_percent = 0;
4478 goto ideal_cache;
4479 } else if (loop == LOOP_FIND_IDEAL) {
4480 /*
4481 * Didn't find a uncached bg, wait on anything we find
4482 * next.
4483 */
4484 loop = LOOP_CACHING_WAIT;
4485 goto search;
4486 }
4487
4488 if (loop < LOOP_CACHING_WAIT) {
4489 loop++;
4490 goto search;
4429 } 4491 }
4430 4492
4431 if (loop == LOOP_ALLOC_CHUNK) { 4493 if (loop == LOOP_ALLOC_CHUNK) {
@@ -4437,7 +4499,8 @@ loop:
4437 ret = do_chunk_alloc(trans, root, num_bytes + 4499 ret = do_chunk_alloc(trans, root, num_bytes +
4438 2 * 1024 * 1024, data, 1); 4500 2 * 1024 * 1024, data, 1);
4439 allowed_chunk_alloc = 0; 4501 allowed_chunk_alloc = 0;
4440 } else { 4502 done_chunk_alloc = 1;
4503 } else if (!done_chunk_alloc) {
4441 space_info->force_alloc = 1; 4504 space_info->force_alloc = 1;
4442 } 4505 }
4443 4506