diff options
author | Josef Bacik <josef@redhat.com> | 2009-11-10 21:23:48 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-11-11 14:20:19 -0500 |
commit | ccf0e72537a9f68611ca575121afd08e2b4d0fb0 (patch) | |
tree | d2fd54693847b6ed1307ed1eb5d3f87b95e31538 /fs/btrfs/extent-tree.c | |
parent | 4eb3991c5def39bcf553c14ebe2618fcb47b627f (diff) |
Btrfs: find ideal block group for caching
This patch changes a few things. Hopefully the comments are helpfull, but
I'll try and be as verbose here.
Problem:
My fedora box was taking 1 minute and 21 seconds to boot with btrfs as root.
Part of this problem was we pick the first block group we can find and start
caching it, even if it may not have enough free space. The other problem is
we only search for cached block groups the first time around, which we won't
find any cached block groups because this is a newly mounted fs, so we end up
caching several block groups during bootup, which with alot of fragmentation
takes around 30-45 seconds to complete, which bogs down the system. So
Solution:
1) Don't cache block groups willy-nilly at first. Instead try and figure out
which block group has the most free, and therefore will take the least amount
of time to cache.
2) Don't be so picky about cached block groups. The other problem is once
we've filled up a cluster, if the block group isn't finished caching the next
time we try and do the allocation we'll completely ignore the cluster and
start searching from the beginning of the space, which makes us cache more
block groups, which slows us down even more. So instead of skipping block
groups that are not finished caching when we have a hint, only skip the block
group if it hasn't started caching yet.
There is one other tweak in here. Before if we allocated a chunk and still
couldn't find new space, we'd end up switching the space info to force another
chunk allocation. This could make us end up with way too many chunks, so keep
track of this particular case.
With this patch and my previous cluster fixes my fedora box now boots in 43
seconds, and according to the bootchart is not held up by our block group
caching at all.
Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 109 |
1 files changed, 86 insertions, 23 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c56f91639dc1..2a4cdceeb575 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -4101,7 +4101,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
4101 | } | 4101 | } |
4102 | 4102 | ||
4103 | enum btrfs_loop_type { | 4103 | enum btrfs_loop_type { |
4104 | LOOP_CACHED_ONLY = 0, | 4104 | LOOP_FIND_IDEAL = 0, |
4105 | LOOP_CACHING_NOWAIT = 1, | 4105 | LOOP_CACHING_NOWAIT = 1, |
4106 | LOOP_CACHING_WAIT = 2, | 4106 | LOOP_CACHING_WAIT = 2, |
4107 | LOOP_ALLOC_CHUNK = 3, | 4107 | LOOP_ALLOC_CHUNK = 3, |
@@ -4130,12 +4130,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4130 | struct btrfs_block_group_cache *block_group = NULL; | 4130 | struct btrfs_block_group_cache *block_group = NULL; |
4131 | int empty_cluster = 2 * 1024 * 1024; | 4131 | int empty_cluster = 2 * 1024 * 1024; |
4132 | int allowed_chunk_alloc = 0; | 4132 | int allowed_chunk_alloc = 0; |
4133 | int done_chunk_alloc = 0; | ||
4133 | struct btrfs_space_info *space_info; | 4134 | struct btrfs_space_info *space_info; |
4134 | int last_ptr_loop = 0; | 4135 | int last_ptr_loop = 0; |
4135 | int loop = 0; | 4136 | int loop = 0; |
4136 | bool found_uncached_bg = false; | 4137 | bool found_uncached_bg = false; |
4137 | bool failed_cluster_refill = false; | 4138 | bool failed_cluster_refill = false; |
4138 | bool failed_alloc = false; | 4139 | bool failed_alloc = false; |
4140 | u64 ideal_cache_percent = 0; | ||
4141 | u64 ideal_cache_offset = 0; | ||
4139 | 4142 | ||
4140 | WARN_ON(num_bytes < root->sectorsize); | 4143 | WARN_ON(num_bytes < root->sectorsize); |
4141 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 4144 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
@@ -4171,14 +4174,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4171 | empty_cluster = 0; | 4174 | empty_cluster = 0; |
4172 | 4175 | ||
4173 | if (search_start == hint_byte) { | 4176 | if (search_start == hint_byte) { |
4177 | ideal_cache: | ||
4174 | block_group = btrfs_lookup_block_group(root->fs_info, | 4178 | block_group = btrfs_lookup_block_group(root->fs_info, |
4175 | search_start); | 4179 | search_start); |
4176 | /* | 4180 | /* |
4177 | * we don't want to use the block group if it doesn't match our | 4181 | * we don't want to use the block group if it doesn't match our |
4178 | * allocation bits, or if its not cached. | 4182 | * allocation bits, or if its not cached. |
4183 | * | ||
4184 | * However if we are re-searching with an ideal block group | ||
4185 | * picked out then we don't care that the block group is cached. | ||
4179 | */ | 4186 | */ |
4180 | if (block_group && block_group_bits(block_group, data) && | 4187 | if (block_group && block_group_bits(block_group, data) && |
4181 | block_group_cache_done(block_group)) { | 4188 | (block_group->cached != BTRFS_CACHE_NO || |
4189 | search_start == ideal_cache_offset)) { | ||
4182 | down_read(&space_info->groups_sem); | 4190 | down_read(&space_info->groups_sem); |
4183 | if (list_empty(&block_group->list) || | 4191 | if (list_empty(&block_group->list) || |
4184 | block_group->ro) { | 4192 | block_group->ro) { |
@@ -4190,13 +4198,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4190 | */ | 4198 | */ |
4191 | btrfs_put_block_group(block_group); | 4199 | btrfs_put_block_group(block_group); |
4192 | up_read(&space_info->groups_sem); | 4200 | up_read(&space_info->groups_sem); |
4193 | } else | 4201 | } else { |
4194 | goto have_block_group; | 4202 | goto have_block_group; |
4203 | } | ||
4195 | } else if (block_group) { | 4204 | } else if (block_group) { |
4196 | btrfs_put_block_group(block_group); | 4205 | btrfs_put_block_group(block_group); |
4197 | } | 4206 | } |
4198 | } | 4207 | } |
4199 | |||
4200 | search: | 4208 | search: |
4201 | down_read(&space_info->groups_sem); | 4209 | down_read(&space_info->groups_sem); |
4202 | list_for_each_entry(block_group, &space_info->block_groups, list) { | 4210 | list_for_each_entry(block_group, &space_info->block_groups, list) { |
@@ -4208,28 +4216,45 @@ search: | |||
4208 | 4216 | ||
4209 | have_block_group: | 4217 | have_block_group: |
4210 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { | 4218 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
4219 | u64 free_percent; | ||
4220 | |||
4221 | free_percent = btrfs_block_group_used(&block_group->item); | ||
4222 | free_percent *= 100; | ||
4223 | free_percent = div64_u64(free_percent, | ||
4224 | block_group->key.offset); | ||
4225 | free_percent = 100 - free_percent; | ||
4226 | if (free_percent > ideal_cache_percent && | ||
4227 | likely(!block_group->ro)) { | ||
4228 | ideal_cache_offset = block_group->key.objectid; | ||
4229 | ideal_cache_percent = free_percent; | ||
4230 | } | ||
4231 | |||
4211 | /* | 4232 | /* |
4212 | * we want to start caching kthreads, but not too many | 4233 | * We only want to start kthread caching if we are at |
4213 | * right off the bat so we don't overwhelm the system, | 4234 | * the point where we will wait for caching to make |
4214 | * so only start them if there are less than 2 and we're | 4235 | * progress, or if our ideal search is over and we've |
4215 | * in the initial allocation phase. | 4236 | * found somebody to start caching. |
4216 | */ | 4237 | */ |
4217 | if (loop > LOOP_CACHING_NOWAIT || | 4238 | if (loop > LOOP_CACHING_NOWAIT || |
4218 | atomic_read(&space_info->caching_threads) < 2) { | 4239 | (loop > LOOP_FIND_IDEAL && |
4240 | atomic_read(&space_info->caching_threads) < 2)) { | ||
4219 | ret = cache_block_group(block_group); | 4241 | ret = cache_block_group(block_group); |
4220 | BUG_ON(ret); | 4242 | BUG_ON(ret); |
4221 | } | 4243 | } |
4222 | } | ||
4223 | |||
4224 | cached = block_group_cache_done(block_group); | ||
4225 | if (unlikely(!cached)) { | ||
4226 | found_uncached_bg = true; | 4244 | found_uncached_bg = true; |
4227 | 4245 | ||
4228 | /* if we only want cached bgs, loop */ | 4246 | /* |
4229 | if (loop == LOOP_CACHED_ONLY) | 4247 | * If loop is set for cached only, try the next block |
4248 | * group. | ||
4249 | */ | ||
4250 | if (loop == LOOP_FIND_IDEAL) | ||
4230 | goto loop; | 4251 | goto loop; |
4231 | } | 4252 | } |
4232 | 4253 | ||
4254 | cached = block_group_cache_done(block_group); | ||
4255 | if (unlikely(!cached)) | ||
4256 | found_uncached_bg = true; | ||
4257 | |||
4233 | if (unlikely(block_group->ro)) | 4258 | if (unlikely(block_group->ro)) |
4234 | goto loop; | 4259 | goto loop; |
4235 | 4260 | ||
@@ -4409,9 +4434,11 @@ loop: | |||
4409 | } | 4434 | } |
4410 | up_read(&space_info->groups_sem); | 4435 | up_read(&space_info->groups_sem); |
4411 | 4436 | ||
4412 | /* LOOP_CACHED_ONLY, only search fully cached block groups | 4437 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for |
4413 | * LOOP_CACHING_NOWAIT, search partially cached block groups, but | 4438 | * for them to make caching progress. Also |
4414 | * dont wait foR them to finish caching | 4439 | * determine the best possible bg to cache |
4440 | * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking | ||
4441 | * caching kthreads as we move along | ||
4415 | * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching | 4442 | * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching |
4416 | * LOOP_ALLOC_CHUNK, force a chunk allocation and try again | 4443 | * LOOP_ALLOC_CHUNK, force a chunk allocation and try again |
4417 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try | 4444 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try |
@@ -4420,12 +4447,47 @@ loop: | |||
4420 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && | 4447 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && |
4421 | (found_uncached_bg || empty_size || empty_cluster || | 4448 | (found_uncached_bg || empty_size || empty_cluster || |
4422 | allowed_chunk_alloc)) { | 4449 | allowed_chunk_alloc)) { |
4423 | if (found_uncached_bg) { | 4450 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
4424 | found_uncached_bg = false; | 4451 | found_uncached_bg = false; |
4425 | if (loop < LOOP_CACHING_WAIT) { | 4452 | loop++; |
4426 | loop++; | 4453 | if (!ideal_cache_percent && |
4454 | atomic_read(&space_info->caching_threads)) | ||
4427 | goto search; | 4455 | goto search; |
4428 | } | 4456 | |
4457 | /* | ||
4458 | * 1 of the following 2 things have happened so far | ||
4459 | * | ||
4460 | * 1) We found an ideal block group for caching that | ||
4461 | * is mostly full and will cache quickly, so we might | ||
4462 | * as well wait for it. | ||
4463 | * | ||
4464 | * 2) We searched for cached only and we didn't find | ||
4465 | * anything, and we didn't start any caching kthreads | ||
4466 | * either, so chances are we will loop through and | ||
4467 | * start a couple caching kthreads, and then come back | ||
4468 | * around and just wait for them. This will be slower | ||
4469 | * because we will have 2 caching kthreads reading at | ||
4470 | * the same time when we could have just started one | ||
4471 | * and waited for it to get far enough to give us an | ||
4472 | * allocation, so go ahead and go to the wait caching | ||
4473 | * loop. | ||
4474 | */ | ||
4475 | loop = LOOP_CACHING_WAIT; | ||
4476 | search_start = ideal_cache_offset; | ||
4477 | ideal_cache_percent = 0; | ||
4478 | goto ideal_cache; | ||
4479 | } else if (loop == LOOP_FIND_IDEAL) { | ||
4480 | /* | ||
4481 | * Didn't find a uncached bg, wait on anything we find | ||
4482 | * next. | ||
4483 | */ | ||
4484 | loop = LOOP_CACHING_WAIT; | ||
4485 | goto search; | ||
4486 | } | ||
4487 | |||
4488 | if (loop < LOOP_CACHING_WAIT) { | ||
4489 | loop++; | ||
4490 | goto search; | ||
4429 | } | 4491 | } |
4430 | 4492 | ||
4431 | if (loop == LOOP_ALLOC_CHUNK) { | 4493 | if (loop == LOOP_ALLOC_CHUNK) { |
@@ -4437,7 +4499,8 @@ loop: | |||
4437 | ret = do_chunk_alloc(trans, root, num_bytes + | 4499 | ret = do_chunk_alloc(trans, root, num_bytes + |
4438 | 2 * 1024 * 1024, data, 1); | 4500 | 2 * 1024 * 1024, data, 1); |
4439 | allowed_chunk_alloc = 0; | 4501 | allowed_chunk_alloc = 0; |
4440 | } else { | 4502 | done_chunk_alloc = 1; |
4503 | } else if (!done_chunk_alloc) { | ||
4441 | space_info->force_alloc = 1; | 4504 | space_info->force_alloc = 1; |
4442 | } | 4505 | } |
4443 | 4506 | ||