diff options
author | Alexandre Oliva <oliva@lsd.ic.unicamp.br> | 2011-12-07 20:08:40 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2011-12-08 08:55:40 -0500 |
commit | 274bd4fb3ed6b72c1d77ef8850511f09fc6b8e4d (patch) | |
tree | c9a637653e547f24ed488d0d5956b314b4257597 | |
parent | 062c05c46bd4358aad7a0e0cb5ffeb98ab935286 (diff) |
Btrfs: try cluster but don't advance in search list
When we find an existing cluster, we switch to its block group as the
current block group, possibly skipping multiple blocks in the process.
Furthermore, under heavy contention, multiple threads may fail to
allocate from a cluster and then release just-created clusters just to
proceed to create new ones in a different block group.
This patch tries to allocate from an existing cluster regardless of its
block group, and doesn't switch to that group, instead proceeding to
try to allocate a cluster from the group it was iterating before the
attempt.
Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/extent-tree.c | 74 |
1 files changed, 31 insertions, 43 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index db0b23b14f20..05e1386b8bec 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -5106,11 +5106,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
5106 | struct btrfs_root *root = orig_root->fs_info->extent_root; | 5106 | struct btrfs_root *root = orig_root->fs_info->extent_root; |
5107 | struct btrfs_free_cluster *last_ptr = NULL; | 5107 | struct btrfs_free_cluster *last_ptr = NULL; |
5108 | struct btrfs_block_group_cache *block_group = NULL; | 5108 | struct btrfs_block_group_cache *block_group = NULL; |
5109 | struct btrfs_block_group_cache *used_block_group; | ||
5109 | int empty_cluster = 2 * 1024 * 1024; | 5110 | int empty_cluster = 2 * 1024 * 1024; |
5110 | int allowed_chunk_alloc = 0; | 5111 | int allowed_chunk_alloc = 0; |
5111 | int done_chunk_alloc = 0; | 5112 | int done_chunk_alloc = 0; |
5112 | struct btrfs_space_info *space_info; | 5113 | struct btrfs_space_info *space_info; |
5113 | int last_ptr_loop = 0; | ||
5114 | int loop = 0; | 5114 | int loop = 0; |
5115 | int index = 0; | 5115 | int index = 0; |
5116 | int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? | 5116 | int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? |
@@ -5172,6 +5172,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
5172 | ideal_cache: | 5172 | ideal_cache: |
5173 | block_group = btrfs_lookup_block_group(root->fs_info, | 5173 | block_group = btrfs_lookup_block_group(root->fs_info, |
5174 | search_start); | 5174 | search_start); |
5175 | used_block_group = block_group; | ||
5175 | /* | 5176 | /* |
5176 | * we don't want to use the block group if it doesn't match our | 5177 | * we don't want to use the block group if it doesn't match our |
5177 | * allocation bits, or if its not cached. | 5178 | * allocation bits, or if its not cached. |
@@ -5209,6 +5210,7 @@ search: | |||
5209 | u64 offset; | 5210 | u64 offset; |
5210 | int cached; | 5211 | int cached; |
5211 | 5212 | ||
5213 | used_block_group = block_group; | ||
5212 | btrfs_get_block_group(block_group); | 5214 | btrfs_get_block_group(block_group); |
5213 | search_start = block_group->key.objectid; | 5215 | search_start = block_group->key.objectid; |
5214 | 5216 | ||
@@ -5294,49 +5296,33 @@ alloc: | |||
5294 | * people trying to start a new cluster | 5296 | * people trying to start a new cluster |
5295 | */ | 5297 | */ |
5296 | spin_lock(&last_ptr->refill_lock); | 5298 | spin_lock(&last_ptr->refill_lock); |
5297 | if (!last_ptr->block_group || | 5299 | used_block_group = last_ptr->block_group; |
5298 | last_ptr->block_group->ro || | 5300 | if (used_block_group != block_group && |
5299 | !block_group_bits(last_ptr->block_group, data)) | 5301 | (!used_block_group || |
5302 | used_block_group->ro || | ||
5303 | !block_group_bits(used_block_group, data))) { | ||
5304 | used_block_group = block_group; | ||
5300 | goto refill_cluster; | 5305 | goto refill_cluster; |
5306 | } | ||
5307 | |||
5308 | if (used_block_group != block_group) | ||
5309 | btrfs_get_block_group(used_block_group); | ||
5301 | 5310 | ||
5302 | offset = btrfs_alloc_from_cluster(block_group, last_ptr, | 5311 | offset = btrfs_alloc_from_cluster(used_block_group, |
5303 | num_bytes, search_start); | 5312 | last_ptr, num_bytes, used_block_group->key.objectid); |
5304 | if (offset) { | 5313 | if (offset) { |
5305 | /* we have a block, we're done */ | 5314 | /* we have a block, we're done */ |
5306 | spin_unlock(&last_ptr->refill_lock); | 5315 | spin_unlock(&last_ptr->refill_lock); |
5307 | goto checks; | 5316 | goto checks; |
5308 | } | 5317 | } |
5309 | 5318 | ||
5310 | spin_lock(&last_ptr->lock); | 5319 | WARN_ON(last_ptr->block_group != used_block_group); |
5311 | /* | 5320 | if (used_block_group != block_group) { |
5312 | * whoops, this cluster doesn't actually point to | 5321 | btrfs_put_block_group(used_block_group); |
5313 | * this block group. Get a ref on the block | 5322 | used_block_group = block_group; |
5314 | * group is does point to and try again | ||
5315 | */ | ||
5316 | if (!last_ptr_loop && last_ptr->block_group && | ||
5317 | last_ptr->block_group != block_group && | ||
5318 | index <= | ||
5319 | get_block_group_index(last_ptr->block_group)) { | ||
5320 | |||
5321 | btrfs_put_block_group(block_group); | ||
5322 | block_group = last_ptr->block_group; | ||
5323 | btrfs_get_block_group(block_group); | ||
5324 | spin_unlock(&last_ptr->lock); | ||
5325 | spin_unlock(&last_ptr->refill_lock); | ||
5326 | |||
5327 | last_ptr_loop = 1; | ||
5328 | search_start = block_group->key.objectid; | ||
5329 | /* | ||
5330 | * we know this block group is properly | ||
5331 | * in the list because | ||
5332 | * btrfs_remove_block_group, drops the | ||
5333 | * cluster before it removes the block | ||
5334 | * group from the list | ||
5335 | */ | ||
5336 | goto have_block_group; | ||
5337 | } | 5323 | } |
5338 | spin_unlock(&last_ptr->lock); | ||
5339 | refill_cluster: | 5324 | refill_cluster: |
5325 | BUG_ON(used_block_group != block_group); | ||
5340 | /* If we are on LOOP_NO_EMPTY_SIZE, we can't | 5326 | /* If we are on LOOP_NO_EMPTY_SIZE, we can't |
5341 | * set up a new clusters, so lets just skip it | 5327 | * set up a new clusters, so lets just skip it |
5342 | * and let the allocator find whatever block | 5328 | * and let the allocator find whatever block |
@@ -5357,8 +5343,6 @@ refill_cluster: | |||
5357 | */ | 5343 | */ |
5358 | btrfs_return_cluster_to_free_space(NULL, last_ptr); | 5344 | btrfs_return_cluster_to_free_space(NULL, last_ptr); |
5359 | 5345 | ||
5360 | last_ptr_loop = 0; | ||
5361 | |||
5362 | /* allocate a cluster in this block group */ | 5346 | /* allocate a cluster in this block group */ |
5363 | ret = btrfs_find_space_cluster(trans, root, | 5347 | ret = btrfs_find_space_cluster(trans, root, |
5364 | block_group, last_ptr, | 5348 | block_group, last_ptr, |
@@ -5425,14 +5409,14 @@ checks: | |||
5425 | search_start = stripe_align(root, offset); | 5409 | search_start = stripe_align(root, offset); |
5426 | /* move on to the next group */ | 5410 | /* move on to the next group */ |
5427 | if (search_start + num_bytes >= search_end) { | 5411 | if (search_start + num_bytes >= search_end) { |
5428 | btrfs_add_free_space(block_group, offset, num_bytes); | 5412 | btrfs_add_free_space(used_block_group, offset, num_bytes); |
5429 | goto loop; | 5413 | goto loop; |
5430 | } | 5414 | } |
5431 | 5415 | ||
5432 | /* move on to the next group */ | 5416 | /* move on to the next group */ |
5433 | if (search_start + num_bytes > | 5417 | if (search_start + num_bytes > |
5434 | block_group->key.objectid + block_group->key.offset) { | 5418 | used_block_group->key.objectid + used_block_group->key.offset) { |
5435 | btrfs_add_free_space(block_group, offset, num_bytes); | 5419 | btrfs_add_free_space(used_block_group, offset, num_bytes); |
5436 | goto loop; | 5420 | goto loop; |
5437 | } | 5421 | } |
5438 | 5422 | ||
@@ -5440,14 +5424,14 @@ checks: | |||
5440 | ins->offset = num_bytes; | 5424 | ins->offset = num_bytes; |
5441 | 5425 | ||
5442 | if (offset < search_start) | 5426 | if (offset < search_start) |
5443 | btrfs_add_free_space(block_group, offset, | 5427 | btrfs_add_free_space(used_block_group, offset, |
5444 | search_start - offset); | 5428 | search_start - offset); |
5445 | BUG_ON(offset > search_start); | 5429 | BUG_ON(offset > search_start); |
5446 | 5430 | ||
5447 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, | 5431 | ret = btrfs_update_reserved_bytes(used_block_group, num_bytes, |
5448 | alloc_type); | 5432 | alloc_type); |
5449 | if (ret == -EAGAIN) { | 5433 | if (ret == -EAGAIN) { |
5450 | btrfs_add_free_space(block_group, offset, num_bytes); | 5434 | btrfs_add_free_space(used_block_group, offset, num_bytes); |
5451 | goto loop; | 5435 | goto loop; |
5452 | } | 5436 | } |
5453 | 5437 | ||
@@ -5456,15 +5440,19 @@ checks: | |||
5456 | ins->offset = num_bytes; | 5440 | ins->offset = num_bytes; |
5457 | 5441 | ||
5458 | if (offset < search_start) | 5442 | if (offset < search_start) |
5459 | btrfs_add_free_space(block_group, offset, | 5443 | btrfs_add_free_space(used_block_group, offset, |
5460 | search_start - offset); | 5444 | search_start - offset); |
5461 | BUG_ON(offset > search_start); | 5445 | BUG_ON(offset > search_start); |
5446 | if (used_block_group != block_group) | ||
5447 | btrfs_put_block_group(used_block_group); | ||
5462 | btrfs_put_block_group(block_group); | 5448 | btrfs_put_block_group(block_group); |
5463 | break; | 5449 | break; |
5464 | loop: | 5450 | loop: |
5465 | failed_cluster_refill = false; | 5451 | failed_cluster_refill = false; |
5466 | failed_alloc = false; | 5452 | failed_alloc = false; |
5467 | BUG_ON(index != get_block_group_index(block_group)); | 5453 | BUG_ON(index != get_block_group_index(block_group)); |
5454 | if (used_block_group != block_group) | ||
5455 | btrfs_put_block_group(used_block_group); | ||
5468 | btrfs_put_block_group(block_group); | 5456 | btrfs_put_block_group(block_group); |
5469 | } | 5457 | } |
5470 | up_read(&space_info->groups_sem); | 5458 | up_read(&space_info->groups_sem); |