aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2010-10-15 16:52:49 -0400
committerJosef Bacik <josef@redhat.com>2010-10-22 15:55:01 -0400
commit8bb8ab2e93f9c3c9453e13be0f37d344a32a3a6d (patch)
tree619600c7458a3af18555f189d53efc4c092b9280 /fs/btrfs/extent-tree.c
parent14ed0ca6e8236f2d264c4a8faec9e3a2b3d04377 (diff)
Btrfs: rework how we reserve metadata bytes
With multi-threaded writes we were getting ENOSPC early because somebody would come in, start flushing delalloc because they couldn't make their reservation, and in the meantime other threads would come in and use the space that was getting freed up, so when the original thread went to check to see if they had space they didn't and they'd return ENOSPC. So instead if we have some free space but not enough for our reservation, take the reservation and then start doing the flushing. The only time we don't take reservations is when we've already overcommitted our space, that way we don't have people who come late to the party way overcommitting ourselves. This also moves all of the retrying and flushing code into reserve_metdata_bytes so it's all uniform. This keeps my fs_mark test from returning -ENOSPC as soon as it starts and actually lets me fill up the disk. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c238
1 files changed, 129 insertions, 109 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index aca3314ef8b9..180a50146ddf 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3078,38 +3078,6 @@ out:
3078 return ret; 3078 return ret;
3079} 3079}
3080 3080
3081static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
3082 struct btrfs_root *root,
3083 struct btrfs_space_info *sinfo, u64 num_bytes)
3084{
3085 int ret;
3086 int end_trans = 0;
3087
3088 if (sinfo->full)
3089 return 0;
3090
3091 spin_lock(&sinfo->lock);
3092 ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
3093 spin_unlock(&sinfo->lock);
3094 if (!ret)
3095 return 0;
3096
3097 if (!trans) {
3098 trans = btrfs_join_transaction(root, 1);
3099 BUG_ON(IS_ERR(trans));
3100 end_trans = 1;
3101 }
3102
3103 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3104 num_bytes + 2 * 1024 * 1024,
3105 get_alloc_profile(root, sinfo->flags), 0);
3106
3107 if (end_trans)
3108 btrfs_end_transaction(trans, root);
3109
3110 return ret == 1 ? 1 : 0;
3111}
3112
3113/* 3081/*
3114 * shrink metadata reservation for delalloc 3082 * shrink metadata reservation for delalloc
3115 */ 3083 */
@@ -3167,79 +3135,138 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3167 return reclaimed >= to_reclaim; 3135 return reclaimed >= to_reclaim;
3168} 3136}
3169 3137
3170static int should_retry_reserve(struct btrfs_trans_handle *trans, 3138/*
3171 struct btrfs_root *root, 3139 * Retries tells us how many times we've called reserve_metadata_bytes. The
3172 struct btrfs_block_rsv *block_rsv, 3140 * idea is if this is the first call (retries == 0) then we will add to our
3173 u64 num_bytes, int *retries) 3141 * reserved count if we can't make the allocation in order to hold our place
3142 * while we go and try and free up space. That way for retries > 1 we don't try
3143 * and add space, we just check to see if the amount of unused space is >= the
3144 * total space, meaning that our reservation is valid.
3145 *
3146 * However if we don't intend to retry this reservation, pass -1 as retries so
3147 * that it short circuits this logic.
3148 */
3149static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
3150 struct btrfs_root *root,
3151 struct btrfs_block_rsv *block_rsv,
3152 u64 orig_bytes, int flush)
3174{ 3153{
3175 struct btrfs_space_info *space_info = block_rsv->space_info; 3154 struct btrfs_space_info *space_info = block_rsv->space_info;
3176 int ret; 3155 u64 unused;
3156 u64 num_bytes = orig_bytes;
3157 int retries = 0;
3158 int ret = 0;
3159 bool reserved = false;
3177 3160
3178 if ((*retries) > 2) 3161again:
3179 return -ENOSPC; 3162 ret = -ENOSPC;
3163 if (reserved)
3164 num_bytes = 0;
3180 3165
3181 ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); 3166 spin_lock(&space_info->lock);
3182 if (ret) 3167 unused = space_info->bytes_used + space_info->bytes_reserved +
3183 return 1; 3168 space_info->bytes_pinned + space_info->bytes_readonly +
3169 space_info->bytes_may_use;
3184 3170
3185 if (trans && trans->transaction->in_commit) 3171 /*
3186 return -ENOSPC; 3172 * The idea here is that we've not already over-reserved the block group
3173 * then we can go ahead and save our reservation first and then start
3174 * flushing if we need to. Otherwise if we've already overcommitted
3175 * lets start flushing stuff first and then come back and try to make
3176 * our reservation.
3177 */
3178 if (unused <= space_info->total_bytes) {
3179 unused -= space_info->total_bytes;
3180 if (unused >= num_bytes) {
3181 if (!reserved)
3182 space_info->bytes_reserved += orig_bytes;
3183 ret = 0;
3184 } else {
3185 /*
3186 * Ok set num_bytes to orig_bytes since we aren't
3187 * overocmmitted, this way we only try and reclaim what
3188 * we need.
3189 */
3190 num_bytes = orig_bytes;
3191 }
3192 } else {
3193 /*
3194 * Ok we're over committed, set num_bytes to the overcommitted
3195 * amount plus the amount of bytes that we need for this
3196 * reservation.
3197 */
3198 num_bytes = unused - space_info->total_bytes +
3199 (orig_bytes * (retries + 1));
3200 }
3187 3201
3188 ret = shrink_delalloc(trans, root, num_bytes, 0); 3202 /*
3189 if (ret) 3203 * Couldn't make our reservation, save our place so while we're trying
3190 return ret; 3204 * to reclaim space we can actually use it instead of somebody else
3205 * stealing it from us.
3206 */
3207 if (ret && !reserved) {
3208 space_info->bytes_reserved += orig_bytes;
3209 reserved = true;
3210 }
3191 3211
3192 spin_lock(&space_info->lock);
3193 if (space_info->bytes_pinned < num_bytes)
3194 ret = 1;
3195 spin_unlock(&space_info->lock); 3212 spin_unlock(&space_info->lock);
3196 if (ret)
3197 return -ENOSPC;
3198
3199 (*retries)++;
3200 3213
3201 if (trans) 3214 if (!ret)
3202 return -EAGAIN; 3215 return 0;
3203 3216
3204 trans = btrfs_join_transaction(root, 1); 3217 if (!flush)
3205 BUG_ON(IS_ERR(trans)); 3218 goto out;
3206 ret = btrfs_commit_transaction(trans, root);
3207 BUG_ON(ret);
3208 3219
3209 return 1; 3220 /*
3210} 3221 * We do synchronous shrinking since we don't actually unreserve
3222 * metadata until after the IO is completed.
3223 */
3224 ret = shrink_delalloc(trans, root, num_bytes, 1);
3225 if (ret > 0)
3226 return 0;
3227 else if (ret < 0)
3228 goto out;
3211 3229
3212static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, 3230 /*
3213 u64 num_bytes) 3231 * So if we were overcommitted it's possible that somebody else flushed
3214{ 3232 * out enough space and we simply didn't have enough space to reclaim,
3215 struct btrfs_space_info *space_info = block_rsv->space_info; 3233 * so go back around and try again.
3216 u64 unused; 3234 */
3217 int ret = -ENOSPC; 3235 if (retries < 2) {
3236 retries++;
3237 goto again;
3238 }
3218 3239
3219 spin_lock(&space_info->lock); 3240 spin_lock(&space_info->lock);
3220 unused = space_info->bytes_used + space_info->bytes_reserved + 3241 /*
3221 space_info->bytes_pinned + space_info->bytes_readonly + 3242 * Not enough space to be reclaimed, don't bother committing the
3222 space_info->bytes_may_use; 3243 * transaction.
3244 */
3245 if (space_info->bytes_pinned < orig_bytes)
3246 ret = -ENOSPC;
3247 spin_unlock(&space_info->lock);
3248 if (ret)
3249 goto out;
3223 3250
3224 if (unused < space_info->total_bytes) 3251 ret = -EAGAIN;
3225 unused = space_info->total_bytes - unused; 3252 if (trans)
3226 else 3253 goto out;
3227 unused = 0;
3228 3254
3229 if (unused >= num_bytes) { 3255
3230 if (block_rsv->priority >= 10) { 3256 ret = -ENOSPC;
3231 space_info->bytes_reserved += num_bytes; 3257 trans = btrfs_join_transaction(root, 1);
3232 ret = 0; 3258 if (IS_ERR(trans))
3233 } else { 3259 goto out;
3234 if ((unused + block_rsv->reserved) * 3260 ret = btrfs_commit_transaction(trans, root);
3235 block_rsv->priority >= 3261 if (!ret)
3236 (num_bytes + block_rsv->reserved) * 10) { 3262 goto again;
3237 space_info->bytes_reserved += num_bytes; 3263
3238 ret = 0; 3264out:
3239 } 3265 if (reserved) {
3240 } 3266 spin_lock(&space_info->lock);
3267 space_info->bytes_reserved -= orig_bytes;
3268 spin_unlock(&space_info->lock);
3241 } 3269 }
3242 spin_unlock(&space_info->lock);
3243 3270
3244 return ret; 3271 return ret;
3245} 3272}
@@ -3383,23 +3410,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
3383int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 3410int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
3384 struct btrfs_root *root, 3411 struct btrfs_root *root,
3385 struct btrfs_block_rsv *block_rsv, 3412 struct btrfs_block_rsv *block_rsv,
3386 u64 num_bytes, int *retries) 3413 u64 num_bytes)
3387{ 3414{
3388 int ret; 3415 int ret;
3389 3416
3390 if (num_bytes == 0) 3417 if (num_bytes == 0)
3391 return 0; 3418 return 0;
3392again: 3419
3393 ret = reserve_metadata_bytes(block_rsv, num_bytes); 3420 ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
3394 if (!ret) { 3421 if (!ret) {
3395 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3422 block_rsv_add_bytes(block_rsv, num_bytes, 1);
3396 return 0; 3423 return 0;
3397 } 3424 }
3398 3425
3399 ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
3400 if (ret > 0)
3401 goto again;
3402
3403 return ret; 3426 return ret;
3404} 3427}
3405 3428
@@ -3434,7 +3457,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3434 return 0; 3457 return 0;
3435 3458
3436 if (block_rsv->refill_used) { 3459 if (block_rsv->refill_used) {
3437 ret = reserve_metadata_bytes(block_rsv, num_bytes); 3460 ret = reserve_metadata_bytes(trans, root, block_rsv,
3461 num_bytes, 0);
3438 if (!ret) { 3462 if (!ret) {
3439 block_rsv_add_bytes(block_rsv, num_bytes, 0); 3463 block_rsv_add_bytes(block_rsv, num_bytes, 0);
3440 return 0; 3464 return 0;
@@ -3614,7 +3638,7 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
3614 3638
3615int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3639int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3616 struct btrfs_root *root, 3640 struct btrfs_root *root,
3617 int num_items, int *retries) 3641 int num_items)
3618{ 3642{
3619 u64 num_bytes; 3643 u64 num_bytes;
3620 int ret; 3644 int ret;
@@ -3624,7 +3648,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3624 3648
3625 num_bytes = calc_trans_metadata_size(root, num_items); 3649 num_bytes = calc_trans_metadata_size(root, num_items);
3626 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3650 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
3627 num_bytes, retries); 3651 num_bytes);
3628 if (!ret) { 3652 if (!ret) {
3629 trans->bytes_reserved += num_bytes; 3653 trans->bytes_reserved += num_bytes;
3630 trans->block_rsv = &root->fs_info->trans_block_rsv; 3654 trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -3698,14 +3722,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3698 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3722 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
3699 u64 to_reserve; 3723 u64 to_reserve;
3700 int nr_extents; 3724 int nr_extents;
3701 int retries = 0;
3702 int ret; 3725 int ret;
3703 3726
3704 if (btrfs_transaction_in_commit(root->fs_info)) 3727 if (btrfs_transaction_in_commit(root->fs_info))
3705 schedule_timeout(1); 3728 schedule_timeout(1);
3706 3729
3707 num_bytes = ALIGN(num_bytes, root->sectorsize); 3730 num_bytes = ALIGN(num_bytes, root->sectorsize);
3708again: 3731
3709 spin_lock(&BTRFS_I(inode)->accounting_lock); 3732 spin_lock(&BTRFS_I(inode)->accounting_lock);
3710 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 3733 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
3711 if (nr_extents > BTRFS_I(inode)->reserved_extents) { 3734 if (nr_extents > BTRFS_I(inode)->reserved_extents) {
@@ -3715,18 +3738,14 @@ again:
3715 nr_extents = 0; 3738 nr_extents = 0;
3716 to_reserve = 0; 3739 to_reserve = 0;
3717 } 3740 }
3741 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3718 3742
3719 to_reserve += calc_csum_metadata_size(inode, num_bytes); 3743 to_reserve += calc_csum_metadata_size(inode, num_bytes);
3720 ret = reserve_metadata_bytes(block_rsv, to_reserve); 3744 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
3721 if (ret) { 3745 if (ret)
3722 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3723 ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
3724 &retries);
3725 if (ret > 0)
3726 goto again;
3727 return ret; 3746 return ret;
3728 }
3729 3747
3748 spin_lock(&BTRFS_I(inode)->accounting_lock);
3730 BTRFS_I(inode)->reserved_extents += nr_extents; 3749 BTRFS_I(inode)->reserved_extents += nr_extents;
3731 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 3750 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
3732 spin_unlock(&BTRFS_I(inode)->accounting_lock); 3751 spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -5325,7 +5344,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5325 block_rsv = get_block_rsv(trans, root); 5344 block_rsv = get_block_rsv(trans, root);
5326 5345
5327 if (block_rsv->size == 0) { 5346 if (block_rsv->size == 0) {
5328 ret = reserve_metadata_bytes(block_rsv, blocksize); 5347 ret = reserve_metadata_bytes(trans, root, block_rsv,
5348 blocksize, 0);
5329 if (ret) 5349 if (ret)
5330 return ERR_PTR(ret); 5350 return ERR_PTR(ret);
5331 return block_rsv; 5351 return block_rsv;