diff options
author | Josef Bacik <josef@redhat.com> | 2009-09-11 16:11:20 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-09-21 19:23:49 -0400 |
commit | 0a24325e6d8cfb150eba0aa279615ef27b5f6aec (patch) | |
tree | f0ca16ab43d87d526a1f918c7e960711256fd47f | |
parent | ba1bf4818baf68d914ef9e3b06fbea6acb674fe4 (diff) |
Btrfs: don't keep retrying a block group if we fail to allocate a cluster
The box can get locked up in the allocator if we happen upon a block group
under these conditions:
1) During a commit, so caching threads cannot make progress
2) Our block group currently is in the middle of being cached
3) Our block group currently has plenty of free space in it
4) Our block group is so fragmented that it ends up having no free space chunks
larger than min_bytes calculated by btrfs_find_space_cluster.
What happens is we try and do btrfs_find_space_cluster, which fails because it
is unable to find enough free space chunks that are large than min_bytes and
are close enough together. Since the block group is not cached we do a
wait_block_group_cache_progress, which waits for the number of bytes we need,
except the block group already has _plenty_ of free space, its just severely
fragmented, so we loop and try again, ad infinitum. This patch keeps us from
waiting on the block group to finish caching if we failed to find a free space
cluster before. It also makes sure that we don't even try to find a free space
cluster if we are on our last loop in the allocator, since we will have tried
everything at this point at it is futile.
Signed-off-by: Josef Bacik <jbacik@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/extent-tree.c | 25 |
1 files changed, 17 insertions, 8 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4c7c9467f224..0f41da2c2f08 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3726,6 +3726,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3726 | int last_ptr_loop = 0; | 3726 | int last_ptr_loop = 0; |
3727 | int loop = 0; | 3727 | int loop = 0; |
3728 | bool found_uncached_bg = false; | 3728 | bool found_uncached_bg = false; |
3729 | bool failed_cluster_refill = false; | ||
3729 | 3730 | ||
3730 | WARN_ON(num_bytes < root->sectorsize); | 3731 | WARN_ON(num_bytes < root->sectorsize); |
3731 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 3732 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
@@ -3823,7 +3824,16 @@ have_block_group: | |||
3823 | if (unlikely(block_group->ro)) | 3824 | if (unlikely(block_group->ro)) |
3824 | goto loop; | 3825 | goto loop; |
3825 | 3826 | ||
3826 | if (last_ptr) { | 3827 | /* |
3828 | * Ok we want to try and use the cluster allocator, so lets look | ||
3829 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will | ||
3830 | * have tried the cluster allocator plenty of times at this | ||
3831 | * point and not have found anything, so we are likely way too | ||
3832 | * fragmented for the clustering stuff to find anything, so lets | ||
3833 | * just skip it and let the allocator find whatever block it can | ||
3834 | * find | ||
3835 | */ | ||
3836 | if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { | ||
3827 | /* | 3837 | /* |
3828 | * the refill lock keeps out other | 3838 | * the refill lock keeps out other |
3829 | * people trying to start a new cluster | 3839 | * people trying to start a new cluster |
@@ -3898,9 +3908,11 @@ refill_cluster: | |||
3898 | spin_unlock(&last_ptr->refill_lock); | 3908 | spin_unlock(&last_ptr->refill_lock); |
3899 | goto checks; | 3909 | goto checks; |
3900 | } | 3910 | } |
3901 | } else if (!cached && loop > LOOP_CACHING_NOWAIT) { | 3911 | } else if (!cached && loop > LOOP_CACHING_NOWAIT |
3912 | && !failed_cluster_refill) { | ||
3902 | spin_unlock(&last_ptr->refill_lock); | 3913 | spin_unlock(&last_ptr->refill_lock); |
3903 | 3914 | ||
3915 | failed_cluster_refill = true; | ||
3904 | wait_block_group_cache_progress(block_group, | 3916 | wait_block_group_cache_progress(block_group, |
3905 | num_bytes + empty_cluster + empty_size); | 3917 | num_bytes + empty_cluster + empty_size); |
3906 | goto have_block_group; | 3918 | goto have_block_group; |
@@ -3912,13 +3924,9 @@ refill_cluster: | |||
3912 | * cluster. Free the cluster we've been trying | 3924 | * cluster. Free the cluster we've been trying |
3913 | * to use, and go to the next block group | 3925 | * to use, and go to the next block group |
3914 | */ | 3926 | */ |
3915 | if (loop < LOOP_NO_EMPTY_SIZE) { | 3927 | btrfs_return_cluster_to_free_space(NULL, last_ptr); |
3916 | btrfs_return_cluster_to_free_space(NULL, | ||
3917 | last_ptr); | ||
3918 | spin_unlock(&last_ptr->refill_lock); | ||
3919 | goto loop; | ||
3920 | } | ||
3921 | spin_unlock(&last_ptr->refill_lock); | 3928 | spin_unlock(&last_ptr->refill_lock); |
3929 | goto loop; | ||
3922 | } | 3930 | } |
3923 | 3931 | ||
3924 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 3932 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
@@ -3977,6 +3985,7 @@ checks: | |||
3977 | /* we are all good, lets return */ | 3985 | /* we are all good, lets return */ |
3978 | break; | 3986 | break; |
3979 | loop: | 3987 | loop: |
3988 | failed_cluster_refill = false; | ||
3980 | btrfs_put_block_group(block_group); | 3989 | btrfs_put_block_group(block_group); |
3981 | } | 3990 | } |
3982 | up_read(&space_info->groups_sem); | 3991 | up_read(&space_info->groups_sem); |