aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2011-09-26 17:12:22 -0400
committerJosef Bacik <josef@redhat.com>2011-10-19 15:12:50 -0400
commit2bf64758fd6290797a5ce97d4b9c698a4ed1cbad (patch)
tree61c7cedc6d7870d288c11333596da6ec673fae95 /fs/btrfs/extent-tree.c
parent8f6d7f4f45f18a5b669dbbf068c74b3d5be59dbf (diff)
Btrfs: allow us to overcommit our enospc reservations
One of the things that kills us is the fact that our ENOSPC reservations are horribly over the top in most normal cases. There isn't too much that can be done about this because when we are completely full we really need them to work like this so we don't under reserve. However if there is plenty of unallocated chunks on the disk we can use that to gauge how much we can overcommit. So this patch adds chunk free space accounting so we always know how much unallocated space we have. Then if we fail to make a reservation within our allocated space, check to see if we can overcommit. In the normal flushing case (like with delalloc metadata reservations) we'll take the free space and divide it by 2 if our metadata profile is setup for DUP or any of those, and then divide it by 8 to make sure we don't overcommit too much. Then if we're in a non-flushing case (we really need this reservation now!) we only limit ourselves to half of the free space. This makes this fio test [torrent] filename=torrent-test rw=randwrite size=4g ioengine=sync directory=/mnt/btrfs-test go from taking around 45 minutes to 10 seconds on my freshly formatted 3 TiB file system. This doesn't seem to break my other enospc tests, but could really use some more testing as this is a super scary change. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c61
1 files changed, 47 insertions, 14 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fd65f6bc676c..25b69d0f9135 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3410,6 +3410,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3410 * @block_rsv - the block_rsv we're allocating for 3410 * @block_rsv - the block_rsv we're allocating for
3411 * @orig_bytes - the number of bytes we want 3411 * @orig_bytes - the number of bytes we want
3412 * @flush - wether or not we can flush to make our reservation 3412 * @flush - wether or not we can flush to make our reservation
3413 * @check - wether this is just to check if we have enough space or not
3413 * 3414 *
3414 * This will reserve orgi_bytes number of bytes from the space info associated 3415 * This will reserve orgi_bytes number of bytes from the space info associated
3415 * with the block_rsv. If there is not enough space it will make an attempt to 3416 * with the block_rsv. If there is not enough space it will make an attempt to
@@ -3420,11 +3421,11 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3420 */ 3421 */
3421static int reserve_metadata_bytes(struct btrfs_root *root, 3422static int reserve_metadata_bytes(struct btrfs_root *root,
3422 struct btrfs_block_rsv *block_rsv, 3423 struct btrfs_block_rsv *block_rsv,
3423 u64 orig_bytes, int flush) 3424 u64 orig_bytes, int flush, int check)
3424{ 3425{
3425 struct btrfs_space_info *space_info = block_rsv->space_info; 3426 struct btrfs_space_info *space_info = block_rsv->space_info;
3426 struct btrfs_trans_handle *trans; 3427 struct btrfs_trans_handle *trans;
3427 u64 unused; 3428 u64 used;
3428 u64 num_bytes = orig_bytes; 3429 u64 num_bytes = orig_bytes;
3429 int retries = 0; 3430 int retries = 0;
3430 int ret = 0; 3431 int ret = 0;
@@ -3459,9 +3460,9 @@ again:
3459 } 3460 }
3460 3461
3461 ret = -ENOSPC; 3462 ret = -ENOSPC;
3462 unused = space_info->bytes_used + space_info->bytes_reserved + 3463 used = space_info->bytes_used + space_info->bytes_reserved +
3463 space_info->bytes_pinned + space_info->bytes_readonly + 3464 space_info->bytes_pinned + space_info->bytes_readonly +
3464 space_info->bytes_may_use; 3465 space_info->bytes_may_use;
3465 3466
3466 /* 3467 /*
3467 * The idea here is that we've not already over-reserved the block group 3468 * The idea here is that we've not already over-reserved the block group
@@ -3470,9 +3471,8 @@ again:
3470 * lets start flushing stuff first and then come back and try to make 3471 * lets start flushing stuff first and then come back and try to make
3471 * our reservation. 3472 * our reservation.
3472 */ 3473 */
3473 if (unused <= space_info->total_bytes) { 3474 if (used <= space_info->total_bytes) {
3474 unused = space_info->total_bytes - unused; 3475 if (used + orig_bytes <= space_info->total_bytes) {
3475 if (unused >= orig_bytes) {
3476 space_info->bytes_may_use += orig_bytes; 3476 space_info->bytes_may_use += orig_bytes;
3477 ret = 0; 3477 ret = 0;
3478 } else { 3478 } else {
@@ -3489,10 +3489,43 @@ again:
3489 * amount plus the amount of bytes that we need for this 3489 * amount plus the amount of bytes that we need for this
3490 * reservation. 3490 * reservation.
3491 */ 3491 */
3492 num_bytes = unused - space_info->total_bytes + 3492 num_bytes = used - space_info->total_bytes +
3493 (orig_bytes * (retries + 1)); 3493 (orig_bytes * (retries + 1));
3494 } 3494 }
3495 3495
3496 if (ret && !check) {
3497 u64 profile = btrfs_get_alloc_profile(root, 0);
3498 u64 avail;
3499
3500 spin_lock(&root->fs_info->free_chunk_lock);
3501 avail = root->fs_info->free_chunk_space;
3502
3503 /*
3504 * If we have dup, raid1 or raid10 then only half of the free
3505 * space is actually useable.
3506 */
3507 if (profile & (BTRFS_BLOCK_GROUP_DUP |
3508 BTRFS_BLOCK_GROUP_RAID1 |
3509 BTRFS_BLOCK_GROUP_RAID10))
3510 avail >>= 1;
3511
3512 /*
3513 * If we aren't flushing don't let us overcommit too much, say
3514 * 1/8th of the space. If we can flush, let it overcommit up to
3515 * 1/2 of the space.
3516 */
3517 if (flush)
3518 avail >>= 3;
3519 else
3520 avail >>= 1;
3521 spin_unlock(&root->fs_info->free_chunk_lock);
3522
3523 if (used + orig_bytes < space_info->total_bytes + avail) {
3524 space_info->bytes_may_use += orig_bytes;
3525 ret = 0;
3526 }
3527 }
3528
3496 /* 3529 /*
3497 * Couldn't make our reservation, save our place so while we're trying 3530 * Couldn't make our reservation, save our place so while we're trying
3498 * to reclaim space we can actually use it instead of somebody else 3531 * to reclaim space we can actually use it instead of somebody else
@@ -3703,7 +3736,7 @@ int btrfs_block_rsv_add(struct btrfs_root *root,
3703 if (num_bytes == 0) 3736 if (num_bytes == 0)
3704 return 0; 3737 return 0;
3705 3738
3706 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); 3739 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1, 0);
3707 if (!ret) { 3740 if (!ret) {
3708 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3741 block_rsv_add_bytes(block_rsv, num_bytes, 1);
3709 return 0; 3742 return 0;
@@ -3737,7 +3770,7 @@ int btrfs_block_rsv_check(struct btrfs_root *root,
3737 if (!ret) 3770 if (!ret)
3738 return 0; 3771 return 0;
3739 3772
3740 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); 3773 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush, !flush);
3741 if (!ret) { 3774 if (!ret) {
3742 block_rsv_add_bytes(block_rsv, num_bytes, 0); 3775 block_rsv_add_bytes(block_rsv, num_bytes, 0);
3743 return 0; 3776 return 0;
@@ -4037,7 +4070,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4037 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); 4070 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
4038 spin_unlock(&BTRFS_I(inode)->lock); 4071 spin_unlock(&BTRFS_I(inode)->lock);
4039 4072
4040 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 4073 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush, 0);
4041 if (ret) { 4074 if (ret) {
4042 u64 to_free = 0; 4075 u64 to_free = 0;
4043 unsigned dropped; 4076 unsigned dropped;
@@ -5692,7 +5725,7 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5692 block_rsv = get_block_rsv(trans, root); 5725 block_rsv = get_block_rsv(trans, root);
5693 5726
5694 if (block_rsv->size == 0) { 5727 if (block_rsv->size == 0) {
5695 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); 5728 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0, 0);
5696 /* 5729 /*
5697 * If we couldn't reserve metadata bytes try and use some from 5730 * If we couldn't reserve metadata bytes try and use some from
5698 * the global reserve. 5731 * the global reserve.
@@ -5713,7 +5746,7 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5713 return block_rsv; 5746 return block_rsv;
5714 if (ret) { 5747 if (ret) {
5715 WARN_ON(1); 5748 WARN_ON(1);
5716 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); 5749 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0, 0);
5717 if (!ret) { 5750 if (!ret) {
5718 return block_rsv; 5751 return block_rsv;
5719 } else if (ret && block_rsv != global_rsv) { 5752 } else if (ret && block_rsv != global_rsv) {