aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2011-04-15 16:05:44 -0400
committerChris Mason <chris.mason@oracle.com>2011-04-15 16:05:44 -0400
commit0e4f8f888845f9dca540ad175884244e5db5eea2 (patch)
treecc43a4a6ff229a84f26376f27d9d938ac4dc1b26 /fs
parent329c5056be8774255db04b01242a9ff4f02eb8ea (diff)
Btrfs: don't force chunk allocation in find_free_extent
find_free_extent likes to allocate in contiguous clusters, which makes writeback faster, especially on SSD storage. As the FS fragments, these clusters become harder to find and we have to decide between allocating a new chunk to make more clusters or giving up on the cluster to allocate from the free space we have. Right now it creates too many chunks, and you can end up with a whole FS that is mostly empty metadata chunks. This commit changes the allocation code to be more strict and only allocate new chunks when we've made good use of the chunks we already have. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent-tree.c95
1 files changed, 73 insertions, 22 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f619c3cb13b7..26479484180d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,6 +33,25 @@
33#include "locking.h" 33#include "locking.h"
34#include "free-space-cache.h" 34#include "free-space-cache.h"
35 35
36/* control flags for do_chunk_alloc's force field
37 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
38 * if we really need one.
39 *
40 * CHUNK_ALLOC_FORCE means it must try to allocate one
41 *
42 * CHUNK_ALLOC_LIMITED means to only try and allocate one
43 * if we have very few chunks already allocated. This is
44 * used as part of the clustering code to help make sure
45 * we have a good pool of storage to cluster in, without
46 * filling the FS with empty chunks
47 *
48 */
49enum {
50 CHUNK_ALLOC_NO_FORCE = 0,
51 CHUNK_ALLOC_FORCE = 1,
52 CHUNK_ALLOC_LIMITED = 2,
53};
54
36static int update_block_group(struct btrfs_trans_handle *trans, 55static int update_block_group(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root, 56 struct btrfs_root *root,
38 u64 bytenr, u64 num_bytes, int alloc); 57 u64 bytenr, u64 num_bytes, int alloc);
@@ -3019,7 +3038,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3019 found->bytes_readonly = 0; 3038 found->bytes_readonly = 0;
3020 found->bytes_may_use = 0; 3039 found->bytes_may_use = 0;
3021 found->full = 0; 3040 found->full = 0;
3022 found->force_alloc = 0; 3041 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3023 *space_info = found; 3042 *space_info = found;
3024 list_add_rcu(&found->list, &info->space_info); 3043 list_add_rcu(&found->list, &info->space_info);
3025 atomic_set(&found->caching_threads, 0); 3044 atomic_set(&found->caching_threads, 0);
@@ -3150,7 +3169,7 @@ again:
3150 if (!data_sinfo->full && alloc_chunk) { 3169 if (!data_sinfo->full && alloc_chunk) {
3151 u64 alloc_target; 3170 u64 alloc_target;
3152 3171
3153 data_sinfo->force_alloc = 1; 3172 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
3154 spin_unlock(&data_sinfo->lock); 3173 spin_unlock(&data_sinfo->lock);
3155alloc: 3174alloc:
3156 alloc_target = btrfs_get_alloc_profile(root, 1); 3175 alloc_target = btrfs_get_alloc_profile(root, 1);
@@ -3160,7 +3179,8 @@ alloc:
3160 3179
3161 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3180 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3162 bytes + 2 * 1024 * 1024, 3181 bytes + 2 * 1024 * 1024,
3163 alloc_target, 0); 3182 alloc_target,
3183 CHUNK_ALLOC_NO_FORCE);
3164 btrfs_end_transaction(trans, root); 3184 btrfs_end_transaction(trans, root);
3165 if (ret < 0) { 3185 if (ret < 0) {
3166 if (ret != -ENOSPC) 3186 if (ret != -ENOSPC)
@@ -3239,31 +3259,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
3239 rcu_read_lock(); 3259 rcu_read_lock();
3240 list_for_each_entry_rcu(found, head, list) { 3260 list_for_each_entry_rcu(found, head, list) {
3241 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3261 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
3242 found->force_alloc = 1; 3262 found->force_alloc = CHUNK_ALLOC_FORCE;
3243 } 3263 }
3244 rcu_read_unlock(); 3264 rcu_read_unlock();
3245} 3265}
3246 3266
3247static int should_alloc_chunk(struct btrfs_root *root, 3267static int should_alloc_chunk(struct btrfs_root *root,
3248 struct btrfs_space_info *sinfo, u64 alloc_bytes) 3268 struct btrfs_space_info *sinfo, u64 alloc_bytes,
3269 int force)
3249{ 3270{
3250 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3271 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3272 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
3251 u64 thresh; 3273 u64 thresh;
3252 3274
3253 if (sinfo->bytes_used + sinfo->bytes_reserved + 3275 if (force == CHUNK_ALLOC_FORCE)
3254 alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3276 return 1;
3277
3278 /*
3279 * in limited mode, we want to have some free space up to
3280 * about 1% of the FS size.
3281 */
3282 if (force == CHUNK_ALLOC_LIMITED) {
3283 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3284 thresh = max_t(u64, 64 * 1024 * 1024,
3285 div_factor_fine(thresh, 1));
3286
3287 if (num_bytes - num_allocated < thresh)
3288 return 1;
3289 }
3290
3291 /*
3292 * we have two similar checks here, one based on percentage
3293 * and once based on a hard number of 256MB. The idea
3294 * is that if we have a good amount of free
3295 * room, don't allocate a chunk. A good mount is
3296 * less than 80% utilized of the chunks we have allocated,
3297 * or more than 256MB free
3298 */
3299 if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
3255 return 0; 3300 return 0;
3256 3301
3257 if (sinfo->bytes_used + sinfo->bytes_reserved + 3302 if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
3258 alloc_bytes < div_factor(num_bytes, 8))
3259 return 0; 3303 return 0;
3260 3304
3261 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3305 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3306
3307 /* 256MB or 5% of the FS */
3262 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); 3308 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
3263 3309
3264 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) 3310 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
3265 return 0; 3311 return 0;
3266
3267 return 1; 3312 return 1;
3268} 3313}
3269 3314
@@ -3289,17 +3334,17 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3289 3334
3290 spin_lock(&space_info->lock); 3335 spin_lock(&space_info->lock);
3291 if (space_info->force_alloc) 3336 if (space_info->force_alloc)
3292 force = 1; 3337 force = space_info->force_alloc;
3293 if (space_info->full) { 3338 if (space_info->full) {
3294 spin_unlock(&space_info->lock); 3339 spin_unlock(&space_info->lock);
3295 goto out; 3340 goto out;
3296 } 3341 }
3297 3342
3298 if (!force && !should_alloc_chunk(extent_root, space_info, 3343 if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
3299 alloc_bytes)) {
3300 spin_unlock(&space_info->lock); 3344 spin_unlock(&space_info->lock);
3301 goto out; 3345 goto out;
3302 } 3346 }
3347
3303 spin_unlock(&space_info->lock); 3348 spin_unlock(&space_info->lock);
3304 3349
3305 /* 3350 /*
@@ -3327,7 +3372,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3327 space_info->full = 1; 3372 space_info->full = 1;
3328 else 3373 else
3329 ret = 1; 3374 ret = 1;
3330 space_info->force_alloc = 0; 3375 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3331 spin_unlock(&space_info->lock); 3376 spin_unlock(&space_info->lock);
3332out: 3377out:
3333 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3378 mutex_unlock(&extent_root->fs_info->chunk_mutex);
@@ -5303,11 +5348,13 @@ loop:
5303 5348
5304 if (allowed_chunk_alloc) { 5349 if (allowed_chunk_alloc) {
5305 ret = do_chunk_alloc(trans, root, num_bytes + 5350 ret = do_chunk_alloc(trans, root, num_bytes +
5306 2 * 1024 * 1024, data, 1); 5351 2 * 1024 * 1024, data,
5352 CHUNK_ALLOC_LIMITED);
5307 allowed_chunk_alloc = 0; 5353 allowed_chunk_alloc = 0;
5308 done_chunk_alloc = 1; 5354 done_chunk_alloc = 1;
5309 } else if (!done_chunk_alloc) { 5355 } else if (!done_chunk_alloc &&
5310 space_info->force_alloc = 1; 5356 space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
5357 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5311 } 5358 }
5312 5359
5313 if (loop < LOOP_NO_EMPTY_SIZE) { 5360 if (loop < LOOP_NO_EMPTY_SIZE) {
@@ -5393,7 +5440,8 @@ again:
5393 */ 5440 */
5394 if (empty_size || root->ref_cows) 5441 if (empty_size || root->ref_cows)
5395 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 5442 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
5396 num_bytes + 2 * 1024 * 1024, data, 0); 5443 num_bytes + 2 * 1024 * 1024, data,
5444 CHUNK_ALLOC_NO_FORCE);
5397 5445
5398 WARN_ON(num_bytes < root->sectorsize); 5446 WARN_ON(num_bytes < root->sectorsize);
5399 ret = find_free_extent(trans, root, num_bytes, empty_size, 5447 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -5405,7 +5453,7 @@ again:
5405 num_bytes = num_bytes & ~(root->sectorsize - 1); 5453 num_bytes = num_bytes & ~(root->sectorsize - 1);
5406 num_bytes = max(num_bytes, min_alloc_size); 5454 num_bytes = max(num_bytes, min_alloc_size);
5407 do_chunk_alloc(trans, root->fs_info->extent_root, 5455 do_chunk_alloc(trans, root->fs_info->extent_root,
5408 num_bytes, data, 1); 5456 num_bytes, data, CHUNK_ALLOC_FORCE);
5409 goto again; 5457 goto again;
5410 } 5458 }
5411 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { 5459 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
@@ -8109,13 +8157,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8109 8157
8110 alloc_flags = update_block_group_flags(root, cache->flags); 8158 alloc_flags = update_block_group_flags(root, cache->flags);
8111 if (alloc_flags != cache->flags) 8159 if (alloc_flags != cache->flags)
8112 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8160 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8161 CHUNK_ALLOC_FORCE);
8113 8162
8114 ret = set_block_group_ro(cache); 8163 ret = set_block_group_ro(cache);
8115 if (!ret) 8164 if (!ret)
8116 goto out; 8165 goto out;
8117 alloc_flags = get_alloc_profile(root, cache->space_info->flags); 8166 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
8118 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8167 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8168 CHUNK_ALLOC_FORCE);
8119 if (ret < 0) 8169 if (ret < 0)
8120 goto out; 8170 goto out;
8121 ret = set_block_group_ro(cache); 8171 ret = set_block_group_ro(cache);
@@ -8128,7 +8178,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8128 struct btrfs_root *root, u64 type) 8178 struct btrfs_root *root, u64 type)
8129{ 8179{
8130 u64 alloc_flags = get_alloc_profile(root, type); 8180 u64 alloc_flags = get_alloc_profile(root, type);
8131 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8181 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8182 CHUNK_ALLOC_FORCE);
8132} 8183}
8133 8184
8134/* 8185/*