aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c668
1 files changed, 364 insertions, 304 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f5be06a2462f..4eb7d2ba38f8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -52,6 +52,21 @@ enum {
52 CHUNK_ALLOC_LIMITED = 2, 52 CHUNK_ALLOC_LIMITED = 2,
53}; 53};
54 54
55/*
56 * Control how reservations are dealt with.
57 *
58 * RESERVE_FREE - freeing a reservation.
59 * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
60 * ENOSPC accounting
61 * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
62 * bytes_may_use as the ENOSPC accounting is done elsewhere
63 */
64enum {
65 RESERVE_FREE = 0,
66 RESERVE_ALLOC = 1,
67 RESERVE_ALLOC_NO_ACCOUNT = 2,
68};
69
55static int update_block_group(struct btrfs_trans_handle *trans, 70static int update_block_group(struct btrfs_trans_handle *trans,
56 struct btrfs_root *root, 71 struct btrfs_root *root,
57 u64 bytenr, u64 num_bytes, int alloc); 72 u64 bytenr, u64 num_bytes, int alloc);
@@ -81,6 +96,8 @@ static int find_next_key(struct btrfs_path *path, int level,
81 struct btrfs_key *key); 96 struct btrfs_key *key);
82static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 97static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
83 int dump_block_groups); 98 int dump_block_groups);
99static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
100 u64 num_bytes, int reserve);
84 101
85static noinline int 102static noinline int
86block_group_cache_done(struct btrfs_block_group_cache *cache) 103block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -104,7 +121,6 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
104 if (atomic_dec_and_test(&cache->count)) { 121 if (atomic_dec_and_test(&cache->count)) {
105 WARN_ON(cache->pinned > 0); 122 WARN_ON(cache->pinned > 0);
106 WARN_ON(cache->reserved > 0); 123 WARN_ON(cache->reserved > 0);
107 WARN_ON(cache->reserved_pinned > 0);
108 kfree(cache->free_space_ctl); 124 kfree(cache->free_space_ctl);
109 kfree(cache); 125 kfree(cache);
110 } 126 }
@@ -465,7 +481,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
465 * we likely hold important locks. 481 * we likely hold important locks.
466 */ 482 */
467 if (trans && (!trans->transaction->in_commit) && 483 if (trans && (!trans->transaction->in_commit) &&
468 (root && root != root->fs_info->tree_root)) { 484 (root && root != root->fs_info->tree_root) &&
485 btrfs_test_opt(root, SPACE_CACHE)) {
469 spin_lock(&cache->lock); 486 spin_lock(&cache->lock);
470 if (cache->cached != BTRFS_CACHE_NO) { 487 if (cache->cached != BTRFS_CACHE_NO) {
471 spin_unlock(&cache->lock); 488 spin_unlock(&cache->lock);
@@ -2700,6 +2717,13 @@ again:
2700 goto again; 2717 goto again;
2701 } 2718 }
2702 2719
2720 /* We've already setup this transaction, go ahead and exit */
2721 if (block_group->cache_generation == trans->transid &&
2722 i_size_read(inode)) {
2723 dcs = BTRFS_DC_SETUP;
2724 goto out_put;
2725 }
2726
2703 /* 2727 /*
2704 * We want to set the generation to 0, that way if anything goes wrong 2728 * We want to set the generation to 0, that way if anything goes wrong
2705 * from here on out we know not to trust this cache when we load up next 2729 * from here on out we know not to trust this cache when we load up next
@@ -2749,12 +2773,15 @@ again:
2749 if (!ret) 2773 if (!ret)
2750 dcs = BTRFS_DC_SETUP; 2774 dcs = BTRFS_DC_SETUP;
2751 btrfs_free_reserved_data_space(inode, num_pages); 2775 btrfs_free_reserved_data_space(inode, num_pages);
2776
2752out_put: 2777out_put:
2753 iput(inode); 2778 iput(inode);
2754out_free: 2779out_free:
2755 btrfs_release_path(path); 2780 btrfs_release_path(path);
2756out: 2781out:
2757 spin_lock(&block_group->lock); 2782 spin_lock(&block_group->lock);
2783 if (!ret)
2784 block_group->cache_generation = trans->transid;
2758 block_group->disk_cache_state = dcs; 2785 block_group->disk_cache_state = dcs;
2759 spin_unlock(&block_group->lock); 2786 spin_unlock(&block_group->lock);
2760 2787
@@ -3122,16 +3149,13 @@ commit_trans:
3122 return -ENOSPC; 3149 return -ENOSPC;
3123 } 3150 }
3124 data_sinfo->bytes_may_use += bytes; 3151 data_sinfo->bytes_may_use += bytes;
3125 BTRFS_I(inode)->reserved_bytes += bytes;
3126 spin_unlock(&data_sinfo->lock); 3152 spin_unlock(&data_sinfo->lock);
3127 3153
3128 return 0; 3154 return 0;
3129} 3155}
3130 3156
3131/* 3157/*
3132 * called when we are clearing an delalloc extent from the 3158 * Called if we need to clear a data reservation for this inode.
3133 * inode's io_tree or there was an error for whatever reason
3134 * after calling btrfs_check_data_free_space
3135 */ 3159 */
3136void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) 3160void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3137{ 3161{
@@ -3144,7 +3168,6 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3144 data_sinfo = BTRFS_I(inode)->space_info; 3168 data_sinfo = BTRFS_I(inode)->space_info;
3145 spin_lock(&data_sinfo->lock); 3169 spin_lock(&data_sinfo->lock);
3146 data_sinfo->bytes_may_use -= bytes; 3170 data_sinfo->bytes_may_use -= bytes;
3147 BTRFS_I(inode)->reserved_bytes -= bytes;
3148 spin_unlock(&data_sinfo->lock); 3171 spin_unlock(&data_sinfo->lock);
3149} 3172}
3150 3173
@@ -3165,6 +3188,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
3165 struct btrfs_space_info *sinfo, u64 alloc_bytes, 3188 struct btrfs_space_info *sinfo, u64 alloc_bytes,
3166 int force) 3189 int force)
3167{ 3190{
3191 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
3168 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3192 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3169 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; 3193 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
3170 u64 thresh; 3194 u64 thresh;
@@ -3173,6 +3197,13 @@ static int should_alloc_chunk(struct btrfs_root *root,
3173 return 1; 3197 return 1;
3174 3198
3175 /* 3199 /*
3200 * We need to take into account the global rsv because for all intents
3201 * and purposes it's used space. Don't worry about locking the
3202 * global_rsv, it doesn't change except when the transaction commits.
3203 */
3204 num_allocated += global_rsv->size;
3205
3206 /*
3176 * in limited mode, we want to have some free space up to 3207 * in limited mode, we want to have some free space up to
3177 * about 1% of the FS size. 3208 * about 1% of the FS size.
3178 */ 3209 */
@@ -3303,7 +3334,8 @@ out:
3303 * shrink metadata reservation for delalloc 3334 * shrink metadata reservation for delalloc
3304 */ 3335 */
3305static int shrink_delalloc(struct btrfs_trans_handle *trans, 3336static int shrink_delalloc(struct btrfs_trans_handle *trans,
3306 struct btrfs_root *root, u64 to_reclaim, int sync) 3337 struct btrfs_root *root, u64 to_reclaim,
3338 bool wait_ordered)
3307{ 3339{
3308 struct btrfs_block_rsv *block_rsv; 3340 struct btrfs_block_rsv *block_rsv;
3309 struct btrfs_space_info *space_info; 3341 struct btrfs_space_info *space_info;
@@ -3311,7 +3343,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3311 u64 max_reclaim; 3343 u64 max_reclaim;
3312 u64 reclaimed = 0; 3344 u64 reclaimed = 0;
3313 long time_left; 3345 long time_left;
3314 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3346 unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3315 int loops = 0; 3347 int loops = 0;
3316 unsigned long progress; 3348 unsigned long progress;
3317 3349
@@ -3319,7 +3351,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3319 space_info = block_rsv->space_info; 3351 space_info = block_rsv->space_info;
3320 3352
3321 smp_mb(); 3353 smp_mb();
3322 reserved = space_info->bytes_reserved; 3354 reserved = space_info->bytes_may_use;
3323 progress = space_info->reservation_progress; 3355 progress = space_info->reservation_progress;
3324 3356
3325 if (reserved == 0) 3357 if (reserved == 0)
@@ -3334,7 +3366,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3334 } 3366 }
3335 3367
3336 max_reclaim = min(reserved, to_reclaim); 3368 max_reclaim = min(reserved, to_reclaim);
3337 3369 nr_pages = max_t(unsigned long, nr_pages,
3370 max_reclaim >> PAGE_CACHE_SHIFT);
3338 while (loops < 1024) { 3371 while (loops < 1024) {
3339 /* have the flusher threads jump in and do some IO */ 3372 /* have the flusher threads jump in and do some IO */
3340 smp_mb(); 3373 smp_mb();
@@ -3343,9 +3376,9 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3343 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3376 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3344 3377
3345 spin_lock(&space_info->lock); 3378 spin_lock(&space_info->lock);
3346 if (reserved > space_info->bytes_reserved) 3379 if (reserved > space_info->bytes_may_use)
3347 reclaimed += reserved - space_info->bytes_reserved; 3380 reclaimed += reserved - space_info->bytes_may_use;
3348 reserved = space_info->bytes_reserved; 3381 reserved = space_info->bytes_may_use;
3349 spin_unlock(&space_info->lock); 3382 spin_unlock(&space_info->lock);
3350 3383
3351 loops++; 3384 loops++;
@@ -3356,11 +3389,15 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3356 if (trans && trans->transaction->blocked) 3389 if (trans && trans->transaction->blocked)
3357 return -EAGAIN; 3390 return -EAGAIN;
3358 3391
3359 time_left = schedule_timeout_interruptible(1); 3392 if (wait_ordered && !trans) {
3393 btrfs_wait_ordered_extents(root, 0, 0);
3394 } else {
3395 time_left = schedule_timeout_interruptible(1);
3360 3396
3361 /* We were interrupted, exit */ 3397 /* We were interrupted, exit */
3362 if (time_left) 3398 if (time_left)
3363 break; 3399 break;
3400 }
3364 3401
3365 /* we've kicked the IO a few times, if anything has been freed, 3402 /* we've kicked the IO a few times, if anything has been freed,
3366 * exit. There is no sense in looping here for a long time 3403 * exit. There is no sense in looping here for a long time
@@ -3375,35 +3412,39 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3375 } 3412 }
3376 3413
3377 } 3414 }
3378 if (reclaimed >= to_reclaim && !trans) 3415
3379 btrfs_wait_ordered_extents(root, 0, 0);
3380 return reclaimed >= to_reclaim; 3416 return reclaimed >= to_reclaim;
3381} 3417}
3382 3418
3383/* 3419/**
3384 * Retries tells us how many times we've called reserve_metadata_bytes. The 3420 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
3385 * idea is if this is the first call (retries == 0) then we will add to our 3421 * @root - the root we're allocating for
3386 * reserved count if we can't make the allocation in order to hold our place 3422 * @block_rsv - the block_rsv we're allocating for
3387 * while we go and try and free up space. That way for retries > 1 we don't try 3423 * @orig_bytes - the number of bytes we want
3388 * and add space, we just check to see if the amount of unused space is >= the 3424 * @flush - wether or not we can flush to make our reservation
3389 * total space, meaning that our reservation is valid.
3390 * 3425 *
3391 * However if we don't intend to retry this reservation, pass -1 as retries so 3426 * This will reserve orgi_bytes number of bytes from the space info associated
3392 * that it short circuits this logic. 3427 * with the block_rsv. If there is not enough space it will make an attempt to
3428 * flush out space to make room. It will do this by flushing delalloc if
3429 * possible or committing the transaction. If flush is 0 then no attempts to
3430 * regain reservations will be made and this will fail if there is not enough
3431 * space already.
3393 */ 3432 */
3394static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, 3433static int reserve_metadata_bytes(struct btrfs_root *root,
3395 struct btrfs_root *root,
3396 struct btrfs_block_rsv *block_rsv, 3434 struct btrfs_block_rsv *block_rsv,
3397 u64 orig_bytes, int flush) 3435 u64 orig_bytes, int flush)
3398{ 3436{
3399 struct btrfs_space_info *space_info = block_rsv->space_info; 3437 struct btrfs_space_info *space_info = block_rsv->space_info;
3400 u64 unused; 3438 struct btrfs_trans_handle *trans;
3439 u64 used;
3401 u64 num_bytes = orig_bytes; 3440 u64 num_bytes = orig_bytes;
3402 int retries = 0; 3441 int retries = 0;
3403 int ret = 0; 3442 int ret = 0;
3404 bool committed = false; 3443 bool committed = false;
3405 bool flushing = false; 3444 bool flushing = false;
3445 bool wait_ordered = false;
3406 3446
3447 trans = (struct btrfs_trans_handle *)current->journal_info;
3407again: 3448again:
3408 ret = 0; 3449 ret = 0;
3409 spin_lock(&space_info->lock); 3450 spin_lock(&space_info->lock);
@@ -3431,9 +3472,9 @@ again:
3431 } 3472 }
3432 3473
3433 ret = -ENOSPC; 3474 ret = -ENOSPC;
3434 unused = space_info->bytes_used + space_info->bytes_reserved + 3475 used = space_info->bytes_used + space_info->bytes_reserved +
3435 space_info->bytes_pinned + space_info->bytes_readonly + 3476 space_info->bytes_pinned + space_info->bytes_readonly +
3436 space_info->bytes_may_use; 3477 space_info->bytes_may_use;
3437 3478
3438 /* 3479 /*
3439 * The idea here is that we've not already over-reserved the block group 3480 * The idea here is that we've not already over-reserved the block group
@@ -3442,10 +3483,9 @@ again:
3442 * lets start flushing stuff first and then come back and try to make 3483 * lets start flushing stuff first and then come back and try to make
3443 * our reservation. 3484 * our reservation.
3444 */ 3485 */
3445 if (unused <= space_info->total_bytes) { 3486 if (used <= space_info->total_bytes) {
3446 unused = space_info->total_bytes - unused; 3487 if (used + orig_bytes <= space_info->total_bytes) {
3447 if (unused >= num_bytes) { 3488 space_info->bytes_may_use += orig_bytes;
3448 space_info->bytes_reserved += orig_bytes;
3449 ret = 0; 3489 ret = 0;
3450 } else { 3490 } else {
3451 /* 3491 /*
@@ -3461,10 +3501,60 @@ again:
3461 * amount plus the amount of bytes that we need for this 3501 * amount plus the amount of bytes that we need for this
3462 * reservation. 3502 * reservation.
3463 */ 3503 */
3464 num_bytes = unused - space_info->total_bytes + 3504 wait_ordered = true;
3505 num_bytes = used - space_info->total_bytes +
3465 (orig_bytes * (retries + 1)); 3506 (orig_bytes * (retries + 1));
3466 } 3507 }
3467 3508
3509 if (ret) {
3510 u64 profile = btrfs_get_alloc_profile(root, 0);
3511 u64 avail;
3512
3513 /*
3514 * If we have a lot of space that's pinned, don't bother doing
3515 * the overcommit dance yet and just commit the transaction.
3516 */
3517 avail = (space_info->total_bytes - space_info->bytes_used) * 8;
3518 do_div(avail, 10);
3519 if (space_info->bytes_pinned >= avail && flush && !trans &&
3520 !committed) {
3521 space_info->flush = 1;
3522 flushing = true;
3523 spin_unlock(&space_info->lock);
3524 goto commit;
3525 }
3526
3527 spin_lock(&root->fs_info->free_chunk_lock);
3528 avail = root->fs_info->free_chunk_space;
3529
3530 /*
3531 * If we have dup, raid1 or raid10 then only half of the free
3532 * space is actually useable.
3533 */
3534 if (profile & (BTRFS_BLOCK_GROUP_DUP |
3535 BTRFS_BLOCK_GROUP_RAID1 |
3536 BTRFS_BLOCK_GROUP_RAID10))
3537 avail >>= 1;
3538
3539 /*
3540 * If we aren't flushing don't let us overcommit too much, say
3541 * 1/8th of the space. If we can flush, let it overcommit up to
3542 * 1/2 of the space.
3543 */
3544 if (flush)
3545 avail >>= 3;
3546 else
3547 avail >>= 1;
3548 spin_unlock(&root->fs_info->free_chunk_lock);
3549
3550 if (used + num_bytes < space_info->total_bytes + avail) {
3551 space_info->bytes_may_use += orig_bytes;
3552 ret = 0;
3553 } else {
3554 wait_ordered = true;
3555 }
3556 }
3557
3468 /* 3558 /*
3469 * Couldn't make our reservation, save our place so while we're trying 3559 * Couldn't make our reservation, save our place so while we're trying
3470 * to reclaim space we can actually use it instead of somebody else 3560 * to reclaim space we can actually use it instead of somebody else
@@ -3484,7 +3574,7 @@ again:
3484 * We do synchronous shrinking since we don't actually unreserve 3574 * We do synchronous shrinking since we don't actually unreserve
3485 * metadata until after the IO is completed. 3575 * metadata until after the IO is completed.
3486 */ 3576 */
3487 ret = shrink_delalloc(trans, root, num_bytes, 1); 3577 ret = shrink_delalloc(trans, root, num_bytes, wait_ordered);
3488 if (ret < 0) 3578 if (ret < 0)
3489 goto out; 3579 goto out;
3490 3580
@@ -3496,25 +3586,16 @@ again:
3496 * so go back around and try again. 3586 * so go back around and try again.
3497 */ 3587 */
3498 if (retries < 2) { 3588 if (retries < 2) {
3589 wait_ordered = true;
3499 retries++; 3590 retries++;
3500 goto again; 3591 goto again;
3501 } 3592 }
3502 3593
3503 /*
3504 * Not enough space to be reclaimed, don't bother committing the
3505 * transaction.
3506 */
3507 spin_lock(&space_info->lock);
3508 if (space_info->bytes_pinned < orig_bytes)
3509 ret = -ENOSPC;
3510 spin_unlock(&space_info->lock);
3511 if (ret)
3512 goto out;
3513
3514 ret = -EAGAIN; 3594 ret = -EAGAIN;
3515 if (trans) 3595 if (trans)
3516 goto out; 3596 goto out;
3517 3597
3598commit:
3518 ret = -ENOSPC; 3599 ret = -ENOSPC;
3519 if (committed) 3600 if (committed)
3520 goto out; 3601 goto out;
@@ -3542,10 +3623,12 @@ out:
3542static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, 3623static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
3543 struct btrfs_root *root) 3624 struct btrfs_root *root)
3544{ 3625{
3545 struct btrfs_block_rsv *block_rsv; 3626 struct btrfs_block_rsv *block_rsv = NULL;
3546 if (root->ref_cows) 3627
3628 if (root->ref_cows || root == root->fs_info->csum_root)
3547 block_rsv = trans->block_rsv; 3629 block_rsv = trans->block_rsv;
3548 else 3630
3631 if (!block_rsv)
3549 block_rsv = root->block_rsv; 3632 block_rsv = root->block_rsv;
3550 3633
3551 if (!block_rsv) 3634 if (!block_rsv)
@@ -3616,7 +3699,7 @@ static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3616 } 3699 }
3617 if (num_bytes) { 3700 if (num_bytes) {
3618 spin_lock(&space_info->lock); 3701 spin_lock(&space_info->lock);
3619 space_info->bytes_reserved -= num_bytes; 3702 space_info->bytes_may_use -= num_bytes;
3620 space_info->reservation_progress++; 3703 space_info->reservation_progress++;
3621 spin_unlock(&space_info->lock); 3704 spin_unlock(&space_info->lock);
3622 } 3705 }
@@ -3640,9 +3723,6 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
3640{ 3723{
3641 memset(rsv, 0, sizeof(*rsv)); 3724 memset(rsv, 0, sizeof(*rsv));
3642 spin_lock_init(&rsv->lock); 3725 spin_lock_init(&rsv->lock);
3643 atomic_set(&rsv->usage, 1);
3644 rsv->priority = 6;
3645 INIT_LIST_HEAD(&rsv->list);
3646} 3726}
3647 3727
3648struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) 3728struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
@@ -3663,29 +3743,11 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
3663void btrfs_free_block_rsv(struct btrfs_root *root, 3743void btrfs_free_block_rsv(struct btrfs_root *root,
3664 struct btrfs_block_rsv *rsv) 3744 struct btrfs_block_rsv *rsv)
3665{ 3745{
3666 if (rsv && atomic_dec_and_test(&rsv->usage)) { 3746 btrfs_block_rsv_release(root, rsv, (u64)-1);
3667 btrfs_block_rsv_release(root, rsv, (u64)-1); 3747 kfree(rsv);
3668 if (!rsv->durable)
3669 kfree(rsv);
3670 }
3671}
3672
3673/*
3674 * make the block_rsv struct be able to capture freed space.
3675 * the captured space will re-add to the the block_rsv struct
3676 * after transaction commit
3677 */
3678void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
3679 struct btrfs_block_rsv *block_rsv)
3680{
3681 block_rsv->durable = 1;
3682 mutex_lock(&fs_info->durable_block_rsv_mutex);
3683 list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list);
3684 mutex_unlock(&fs_info->durable_block_rsv_mutex);
3685} 3748}
3686 3749
3687int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 3750int btrfs_block_rsv_add(struct btrfs_root *root,
3688 struct btrfs_root *root,
3689 struct btrfs_block_rsv *block_rsv, 3751 struct btrfs_block_rsv *block_rsv,
3690 u64 num_bytes) 3752 u64 num_bytes)
3691{ 3753{
@@ -3694,7 +3756,7 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
3694 if (num_bytes == 0) 3756 if (num_bytes == 0)
3695 return 0; 3757 return 0;
3696 3758
3697 ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); 3759 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1);
3698 if (!ret) { 3760 if (!ret) {
3699 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3761 block_rsv_add_bytes(block_rsv, num_bytes, 1);
3700 return 0; 3762 return 0;
@@ -3703,55 +3765,52 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
3703 return ret; 3765 return ret;
3704} 3766}
3705 3767
3706int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, 3768int btrfs_block_rsv_check(struct btrfs_root *root,
3707 struct btrfs_root *root, 3769 struct btrfs_block_rsv *block_rsv, int min_factor)
3708 struct btrfs_block_rsv *block_rsv,
3709 u64 min_reserved, int min_factor)
3710{ 3770{
3711 u64 num_bytes = 0; 3771 u64 num_bytes = 0;
3712 int commit_trans = 0;
3713 int ret = -ENOSPC; 3772 int ret = -ENOSPC;
3714 3773
3715 if (!block_rsv) 3774 if (!block_rsv)
3716 return 0; 3775 return 0;
3717 3776
3718 spin_lock(&block_rsv->lock); 3777 spin_lock(&block_rsv->lock);
3719 if (min_factor > 0) 3778 num_bytes = div_factor(block_rsv->size, min_factor);
3720 num_bytes = div_factor(block_rsv->size, min_factor); 3779 if (block_rsv->reserved >= num_bytes)
3721 if (min_reserved > num_bytes) 3780 ret = 0;
3722 num_bytes = min_reserved; 3781 spin_unlock(&block_rsv->lock);
3723 3782
3724 if (block_rsv->reserved >= num_bytes) { 3783 return ret;
3784}
3785
3786int btrfs_block_rsv_refill(struct btrfs_root *root,
3787 struct btrfs_block_rsv *block_rsv,
3788 u64 min_reserved)
3789{
3790 u64 num_bytes = 0;
3791 int ret = -ENOSPC;
3792
3793 if (!block_rsv)
3794 return 0;
3795
3796 spin_lock(&block_rsv->lock);
3797 num_bytes = min_reserved;
3798 if (block_rsv->reserved >= num_bytes)
3725 ret = 0; 3799 ret = 0;
3726 } else { 3800 else
3727 num_bytes -= block_rsv->reserved; 3801 num_bytes -= block_rsv->reserved;
3728 if (block_rsv->durable &&
3729 block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes)
3730 commit_trans = 1;
3731 }
3732 spin_unlock(&block_rsv->lock); 3802 spin_unlock(&block_rsv->lock);
3803
3733 if (!ret) 3804 if (!ret)
3734 return 0; 3805 return 0;
3735 3806
3736 if (block_rsv->refill_used) { 3807 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1);
3737 ret = reserve_metadata_bytes(trans, root, block_rsv, 3808 if (!ret) {
3738 num_bytes, 0); 3809 block_rsv_add_bytes(block_rsv, num_bytes, 0);
3739 if (!ret) {
3740 block_rsv_add_bytes(block_rsv, num_bytes, 0);
3741 return 0;
3742 }
3743 }
3744
3745 if (commit_trans) {
3746 if (trans)
3747 return -EAGAIN;
3748 trans = btrfs_join_transaction(root);
3749 BUG_ON(IS_ERR(trans));
3750 ret = btrfs_commit_transaction(trans, root);
3751 return 0; 3810 return 0;
3752 } 3811 }
3753 3812
3754 return -ENOSPC; 3813 return ret;
3755} 3814}
3756 3815
3757int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, 3816int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
@@ -3827,12 +3886,12 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3827 if (sinfo->total_bytes > num_bytes) { 3886 if (sinfo->total_bytes > num_bytes) {
3828 num_bytes = sinfo->total_bytes - num_bytes; 3887 num_bytes = sinfo->total_bytes - num_bytes;
3829 block_rsv->reserved += num_bytes; 3888 block_rsv->reserved += num_bytes;
3830 sinfo->bytes_reserved += num_bytes; 3889 sinfo->bytes_may_use += num_bytes;
3831 } 3890 }
3832 3891
3833 if (block_rsv->reserved >= block_rsv->size) { 3892 if (block_rsv->reserved >= block_rsv->size) {
3834 num_bytes = block_rsv->reserved - block_rsv->size; 3893 num_bytes = block_rsv->reserved - block_rsv->size;
3835 sinfo->bytes_reserved -= num_bytes; 3894 sinfo->bytes_may_use -= num_bytes;
3836 sinfo->reservation_progress++; 3895 sinfo->reservation_progress++;
3837 block_rsv->reserved = block_rsv->size; 3896 block_rsv->reserved = block_rsv->size;
3838 block_rsv->full = 1; 3897 block_rsv->full = 1;
@@ -3848,16 +3907,12 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
3848 3907
3849 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); 3908 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
3850 fs_info->chunk_block_rsv.space_info = space_info; 3909 fs_info->chunk_block_rsv.space_info = space_info;
3851 fs_info->chunk_block_rsv.priority = 10;
3852 3910
3853 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3911 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
3854 fs_info->global_block_rsv.space_info = space_info; 3912 fs_info->global_block_rsv.space_info = space_info;
3855 fs_info->global_block_rsv.priority = 10;
3856 fs_info->global_block_rsv.refill_used = 1;
3857 fs_info->delalloc_block_rsv.space_info = space_info; 3913 fs_info->delalloc_block_rsv.space_info = space_info;
3858 fs_info->trans_block_rsv.space_info = space_info; 3914 fs_info->trans_block_rsv.space_info = space_info;
3859 fs_info->empty_block_rsv.space_info = space_info; 3915 fs_info->empty_block_rsv.space_info = space_info;
3860 fs_info->empty_block_rsv.priority = 10;
3861 3916
3862 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; 3917 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
3863 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; 3918 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
@@ -3865,10 +3920,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
3865 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; 3920 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
3866 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; 3921 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
3867 3922
3868 btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv);
3869
3870 btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv);
3871
3872 update_global_block_rsv(fs_info); 3923 update_global_block_rsv(fs_info);
3873} 3924}
3874 3925
@@ -3883,46 +3934,13 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3883 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3934 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3884} 3935}
3885 3936
3886int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
3887 struct btrfs_root *root,
3888 struct btrfs_block_rsv *rsv)
3889{
3890 struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv;
3891 u64 num_bytes;
3892 int ret;
3893
3894 /*
3895 * Truncate should be freeing data, but give us 2 items just in case it
3896 * needs to use some space. We may want to be smarter about this in the
3897 * future.
3898 */
3899 num_bytes = btrfs_calc_trans_metadata_size(root, 2);
3900
3901 /* We already have enough bytes, just return */
3902 if (rsv->reserved >= num_bytes)
3903 return 0;
3904
3905 num_bytes -= rsv->reserved;
3906
3907 /*
3908 * You should have reserved enough space before hand to do this, so this
3909 * should not fail.
3910 */
3911 ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes);
3912 BUG_ON(ret);
3913
3914 return 0;
3915}
3916
3917void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 3937void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
3918 struct btrfs_root *root) 3938 struct btrfs_root *root)
3919{ 3939{
3920 if (!trans->bytes_reserved) 3940 if (!trans->bytes_reserved)
3921 return; 3941 return;
3922 3942
3923 BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); 3943 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
3924 btrfs_block_rsv_release(root, trans->block_rsv,
3925 trans->bytes_reserved);
3926 trans->bytes_reserved = 0; 3944 trans->bytes_reserved = 0;
3927} 3945}
3928 3946
@@ -3964,11 +3982,19 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
3964 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3982 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3965} 3983}
3966 3984
3985/**
3986 * drop_outstanding_extent - drop an outstanding extent
3987 * @inode: the inode we're dropping the extent for
3988 *
3989 * This is called when we are freeing up an outstanding extent, either called
3990 * after an error or after an extent is written. This will return the number of
3991 * reserved extents that need to be freed. This must be called with
3992 * BTRFS_I(inode)->lock held.
3993 */
3967static unsigned drop_outstanding_extent(struct inode *inode) 3994static unsigned drop_outstanding_extent(struct inode *inode)
3968{ 3995{
3969 unsigned dropped_extents = 0; 3996 unsigned dropped_extents = 0;
3970 3997
3971 spin_lock(&BTRFS_I(inode)->lock);
3972 BUG_ON(!BTRFS_I(inode)->outstanding_extents); 3998 BUG_ON(!BTRFS_I(inode)->outstanding_extents);
3973 BTRFS_I(inode)->outstanding_extents--; 3999 BTRFS_I(inode)->outstanding_extents--;
3974 4000
@@ -3978,19 +4004,70 @@ static unsigned drop_outstanding_extent(struct inode *inode)
3978 */ 4004 */
3979 if (BTRFS_I(inode)->outstanding_extents >= 4005 if (BTRFS_I(inode)->outstanding_extents >=
3980 BTRFS_I(inode)->reserved_extents) 4006 BTRFS_I(inode)->reserved_extents)
3981 goto out; 4007 return 0;
3982 4008
3983 dropped_extents = BTRFS_I(inode)->reserved_extents - 4009 dropped_extents = BTRFS_I(inode)->reserved_extents -
3984 BTRFS_I(inode)->outstanding_extents; 4010 BTRFS_I(inode)->outstanding_extents;
3985 BTRFS_I(inode)->reserved_extents -= dropped_extents; 4011 BTRFS_I(inode)->reserved_extents -= dropped_extents;
3986out:
3987 spin_unlock(&BTRFS_I(inode)->lock);
3988 return dropped_extents; 4012 return dropped_extents;
3989} 4013}
3990 4014
3991static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) 4015/**
4016 * calc_csum_metadata_size - return the amount of metada space that must be
4017 * reserved/free'd for the given bytes.
4018 * @inode: the inode we're manipulating
4019 * @num_bytes: the number of bytes in question
4020 * @reserve: 1 if we are reserving space, 0 if we are freeing space
4021 *
4022 * This adjusts the number of csum_bytes in the inode and then returns the
4023 * correct amount of metadata that must either be reserved or freed. We
4024 * calculate how many checksums we can fit into one leaf and then divide the
4025 * number of bytes that will need to be checksumed by this value to figure out
4026 * how many checksums will be required. If we are adding bytes then the number
4027 * may go up and we will return the number of additional bytes that must be
4028 * reserved. If it is going down we will return the number of bytes that must
4029 * be freed.
4030 *
4031 * This must be called with BTRFS_I(inode)->lock held.
4032 */
4033static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
4034 int reserve)
3992{ 4035{
3993 return num_bytes >>= 3; 4036 struct btrfs_root *root = BTRFS_I(inode)->root;
4037 u64 csum_size;
4038 int num_csums_per_leaf;
4039 int num_csums;
4040 int old_csums;
4041
4042 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
4043 BTRFS_I(inode)->csum_bytes == 0)
4044 return 0;
4045
4046 old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
4047 if (reserve)
4048 BTRFS_I(inode)->csum_bytes += num_bytes;
4049 else
4050 BTRFS_I(inode)->csum_bytes -= num_bytes;
4051 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
4052 num_csums_per_leaf = (int)div64_u64(csum_size,
4053 sizeof(struct btrfs_csum_item) +
4054 sizeof(struct btrfs_disk_key));
4055 num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
4056 num_csums = num_csums + num_csums_per_leaf - 1;
4057 num_csums = num_csums / num_csums_per_leaf;
4058
4059 old_csums = old_csums + num_csums_per_leaf - 1;
4060 old_csums = old_csums / num_csums_per_leaf;
4061
4062 /* No change, no need to reserve more */
4063 if (old_csums == num_csums)
4064 return 0;
4065
4066 if (reserve)
4067 return btrfs_calc_trans_metadata_size(root,
4068 num_csums - old_csums);
4069
4070 return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
3994} 4071}
3995 4072
3996int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) 4073int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
@@ -3999,9 +4076,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3999 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 4076 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
4000 u64 to_reserve = 0; 4077 u64 to_reserve = 0;
4001 unsigned nr_extents = 0; 4078 unsigned nr_extents = 0;
4079 int flush = 1;
4002 int ret; 4080 int ret;
4003 4081
4004 if (btrfs_transaction_in_commit(root->fs_info)) 4082 if (btrfs_is_free_space_inode(root, inode))
4083 flush = 0;
4084
4085 if (flush && btrfs_transaction_in_commit(root->fs_info))
4005 schedule_timeout(1); 4086 schedule_timeout(1);
4006 4087
4007 num_bytes = ALIGN(num_bytes, root->sectorsize); 4088 num_bytes = ALIGN(num_bytes, root->sectorsize);
@@ -4017,18 +4098,29 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4017 4098
4018 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); 4099 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
4019 } 4100 }
4101 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
4020 spin_unlock(&BTRFS_I(inode)->lock); 4102 spin_unlock(&BTRFS_I(inode)->lock);
4021 4103
4022 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4104 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
4023 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
4024 if (ret) { 4105 if (ret) {
4106 u64 to_free = 0;
4025 unsigned dropped; 4107 unsigned dropped;
4108
4109 spin_lock(&BTRFS_I(inode)->lock);
4110 dropped = drop_outstanding_extent(inode);
4111 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4112 spin_unlock(&BTRFS_I(inode)->lock);
4113 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4114
4026 /* 4115 /*
4027 * We don't need the return value since our reservation failed, 4116 * Somebody could have come in and twiddled with the
4028 * we just need to clean up our counter. 4117 * reservation, so if we have to free more than we would have
4118 * reserved from this reservation go ahead and release those
4119 * bytes.
4029 */ 4120 */
4030 dropped = drop_outstanding_extent(inode); 4121 to_free -= to_reserve;
4031 WARN_ON(dropped > 1); 4122 if (to_free)
4123 btrfs_block_rsv_release(root, block_rsv, to_free);
4032 return ret; 4124 return ret;
4033 } 4125 }
4034 4126
@@ -4037,6 +4129,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4037 return 0; 4129 return 0;
4038} 4130}
4039 4131
4132/**
4133 * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
4134 * @inode: the inode to release the reservation for
4135 * @num_bytes: the number of bytes we're releasing
4136 *
4137 * This will release the metadata reservation for an inode. This can be called
4138 * once we complete IO for a given set of bytes to release their metadata
4139 * reservations.
4140 */
4040void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) 4141void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4041{ 4142{
4042 struct btrfs_root *root = BTRFS_I(inode)->root; 4143 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -4044,9 +4145,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4044 unsigned dropped; 4145 unsigned dropped;
4045 4146
4046 num_bytes = ALIGN(num_bytes, root->sectorsize); 4147 num_bytes = ALIGN(num_bytes, root->sectorsize);
4148 spin_lock(&BTRFS_I(inode)->lock);
4047 dropped = drop_outstanding_extent(inode); 4149 dropped = drop_outstanding_extent(inode);
4048 4150
4049 to_free = calc_csum_metadata_size(inode, num_bytes); 4151 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4152 spin_unlock(&BTRFS_I(inode)->lock);
4050 if (dropped > 0) 4153 if (dropped > 0)
4051 to_free += btrfs_calc_trans_metadata_size(root, dropped); 4154 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4052 4155
@@ -4054,6 +4157,21 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4054 to_free); 4157 to_free);
4055} 4158}
4056 4159
4160/**
4161 * btrfs_delalloc_reserve_space - reserve data and metadata space for delalloc
4162 * @inode: inode we're writing to
4163 * @num_bytes: the number of bytes we want to allocate
4164 *
4165 * This will do the following things
4166 *
4167 * o reserve space in the data space info for num_bytes
4168 * o reserve space in the metadata space info based on number of outstanding
4169 * extents and how much csums will be needed
4170 * o add to the inodes ->delalloc_bytes
4171 * o add it to the fs_info's delalloc inodes list.
4172 *
4173 * This will return 0 for success and -ENOSPC if there is no space left.
4174 */
4057int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) 4175int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
4058{ 4176{
4059 int ret; 4177 int ret;
@@ -4071,6 +4189,19 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
4071 return 0; 4189 return 0;
4072} 4190}
4073 4191
4192/**
4193 * btrfs_delalloc_release_space - release data and metadata space for delalloc
4194 * @inode: inode we're releasing space for
4195 * @num_bytes: the number of bytes we want to free up
4196 *
4197 * This must be matched with a call to btrfs_delalloc_reserve_space. This is
4198 * called in the case that we don't need the metadata AND data reservations
4199 * anymore. So if there is an error or we insert an inline extent.
4200 *
4201 * This function will release the metadata space that was not used and will
4202 * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
4203 * list if there are no delalloc bytes left.
4204 */
4074void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) 4205void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
4075{ 4206{
4076 btrfs_delalloc_release_metadata(inode, num_bytes); 4207 btrfs_delalloc_release_metadata(inode, num_bytes);
@@ -4123,7 +4254,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4123 spin_lock(&cache->space_info->lock); 4254 spin_lock(&cache->space_info->lock);
4124 spin_lock(&cache->lock); 4255 spin_lock(&cache->lock);
4125 4256
4126 if (btrfs_super_cache_generation(&info->super_copy) != 0 && 4257 if (btrfs_test_opt(root, SPACE_CACHE) &&
4127 cache->disk_cache_state < BTRFS_DC_CLEAR) 4258 cache->disk_cache_state < BTRFS_DC_CLEAR)
4128 cache->disk_cache_state = BTRFS_DC_CLEAR; 4259 cache->disk_cache_state = BTRFS_DC_CLEAR;
4129 4260
@@ -4135,7 +4266,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4135 btrfs_set_block_group_used(&cache->item, old_val); 4266 btrfs_set_block_group_used(&cache->item, old_val);
4136 cache->reserved -= num_bytes; 4267 cache->reserved -= num_bytes;
4137 cache->space_info->bytes_reserved -= num_bytes; 4268 cache->space_info->bytes_reserved -= num_bytes;
4138 cache->space_info->reservation_progress++;
4139 cache->space_info->bytes_used += num_bytes; 4269 cache->space_info->bytes_used += num_bytes;
4140 cache->space_info->disk_used += num_bytes * factor; 4270 cache->space_info->disk_used += num_bytes * factor;
4141 spin_unlock(&cache->lock); 4271 spin_unlock(&cache->lock);
@@ -4187,7 +4317,6 @@ static int pin_down_extent(struct btrfs_root *root,
4187 if (reserved) { 4317 if (reserved) {
4188 cache->reserved -= num_bytes; 4318 cache->reserved -= num_bytes;
4189 cache->space_info->bytes_reserved -= num_bytes; 4319 cache->space_info->bytes_reserved -= num_bytes;
4190 cache->space_info->reservation_progress++;
4191 } 4320 }
4192 spin_unlock(&cache->lock); 4321 spin_unlock(&cache->lock);
4193 spin_unlock(&cache->space_info->lock); 4322 spin_unlock(&cache->space_info->lock);
@@ -4214,46 +4343,55 @@ int btrfs_pin_extent(struct btrfs_root *root,
4214 return 0; 4343 return 0;
4215} 4344}
4216 4345
4217/* 4346/**
4218 * update size of reserved extents. this function may return -EAGAIN 4347 * btrfs_update_reserved_bytes - update the block_group and space info counters
4219 * if 'reserve' is true or 'sinfo' is false. 4348 * @cache: The cache we are manipulating
4349 * @num_bytes: The number of bytes in question
4350 * @reserve: One of the reservation enums
4351 *
4352 * This is called by the allocator when it reserves space, or by somebody who is
4353 * freeing space that was never actually used on disk. For example if you
4354 * reserve some space for a new leaf in transaction A and before transaction A
4355 * commits you free that leaf, you call this with reserve set to 0 in order to
4356 * clear the reservation.
4357 *
4358 * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
4359 * ENOSPC accounting. For data we handle the reservation through clearing the
4360 * delalloc bits in the io_tree. We have to do this since we could end up
4361 * allocating less disk space for the amount of data we have reserved in the
4362 * case of compression.
4363 *
4364 * If this is a reservation and the block group has become read only we cannot
4365 * make the reservation and return -EAGAIN, otherwise this function always
4366 * succeeds.
4220 */ 4367 */
4221int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 4368static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
4222 u64 num_bytes, int reserve, int sinfo) 4369 u64 num_bytes, int reserve)
4223{ 4370{
4371 struct btrfs_space_info *space_info = cache->space_info;
4224 int ret = 0; 4372 int ret = 0;
4225 if (sinfo) { 4373 spin_lock(&space_info->lock);
4226 struct btrfs_space_info *space_info = cache->space_info; 4374 spin_lock(&cache->lock);
4227 spin_lock(&space_info->lock); 4375 if (reserve != RESERVE_FREE) {
4228 spin_lock(&cache->lock);
4229 if (reserve) {
4230 if (cache->ro) {
4231 ret = -EAGAIN;
4232 } else {
4233 cache->reserved += num_bytes;
4234 space_info->bytes_reserved += num_bytes;
4235 }
4236 } else {
4237 if (cache->ro)
4238 space_info->bytes_readonly += num_bytes;
4239 cache->reserved -= num_bytes;
4240 space_info->bytes_reserved -= num_bytes;
4241 space_info->reservation_progress++;
4242 }
4243 spin_unlock(&cache->lock);
4244 spin_unlock(&space_info->lock);
4245 } else {
4246 spin_lock(&cache->lock);
4247 if (cache->ro) { 4376 if (cache->ro) {
4248 ret = -EAGAIN; 4377 ret = -EAGAIN;
4249 } else { 4378 } else {
4250 if (reserve) 4379 cache->reserved += num_bytes;
4251 cache->reserved += num_bytes; 4380 space_info->bytes_reserved += num_bytes;
4252 else 4381 if (reserve == RESERVE_ALLOC) {
4253 cache->reserved -= num_bytes; 4382 BUG_ON(space_info->bytes_may_use < num_bytes);
4383 space_info->bytes_may_use -= num_bytes;
4384 }
4254 } 4385 }
4255 spin_unlock(&cache->lock); 4386 } else {
4387 if (cache->ro)
4388 space_info->bytes_readonly += num_bytes;
4389 cache->reserved -= num_bytes;
4390 space_info->bytes_reserved -= num_bytes;
4391 space_info->reservation_progress++;
4256 } 4392 }
4393 spin_unlock(&cache->lock);
4394 spin_unlock(&space_info->lock);
4257 return ret; 4395 return ret;
4258} 4396}
4259 4397
@@ -4319,13 +4457,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
4319 spin_lock(&cache->lock); 4457 spin_lock(&cache->lock);
4320 cache->pinned -= len; 4458 cache->pinned -= len;
4321 cache->space_info->bytes_pinned -= len; 4459 cache->space_info->bytes_pinned -= len;
4322 if (cache->ro) { 4460 if (cache->ro)
4323 cache->space_info->bytes_readonly += len; 4461 cache->space_info->bytes_readonly += len;
4324 } else if (cache->reserved_pinned > 0) {
4325 len = min(len, cache->reserved_pinned);
4326 cache->reserved_pinned -= len;
4327 cache->space_info->bytes_reserved += len;
4328 }
4329 spin_unlock(&cache->lock); 4462 spin_unlock(&cache->lock);
4330 spin_unlock(&cache->space_info->lock); 4463 spin_unlock(&cache->space_info->lock);
4331 } 4464 }
@@ -4340,11 +4473,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
4340{ 4473{
4341 struct btrfs_fs_info *fs_info = root->fs_info; 4474 struct btrfs_fs_info *fs_info = root->fs_info;
4342 struct extent_io_tree *unpin; 4475 struct extent_io_tree *unpin;
4343 struct btrfs_block_rsv *block_rsv;
4344 struct btrfs_block_rsv *next_rsv;
4345 u64 start; 4476 u64 start;
4346 u64 end; 4477 u64 end;
4347 int idx;
4348 int ret; 4478 int ret;
4349 4479
4350 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 4480 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
@@ -4367,30 +4497,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
4367 cond_resched(); 4497 cond_resched();
4368 } 4498 }
4369 4499
4370 mutex_lock(&fs_info->durable_block_rsv_mutex);
4371 list_for_each_entry_safe(block_rsv, next_rsv,
4372 &fs_info->durable_block_rsv_list, list) {
4373
4374 idx = trans->transid & 0x1;
4375 if (block_rsv->freed[idx] > 0) {
4376 block_rsv_add_bytes(block_rsv,
4377 block_rsv->freed[idx], 0);
4378 block_rsv->freed[idx] = 0;
4379 }
4380 if (atomic_read(&block_rsv->usage) == 0) {
4381 btrfs_block_rsv_release(root, block_rsv, (u64)-1);
4382
4383 if (block_rsv->freed[0] == 0 &&
4384 block_rsv->freed[1] == 0) {
4385 list_del_init(&block_rsv->list);
4386 kfree(block_rsv);
4387 }
4388 } else {
4389 btrfs_block_rsv_release(root, block_rsv, 0);
4390 }
4391 }
4392 mutex_unlock(&fs_info->durable_block_rsv_mutex);
4393
4394 return 0; 4500 return 0;
4395} 4501}
4396 4502
@@ -4668,7 +4774,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4668 struct extent_buffer *buf, 4774 struct extent_buffer *buf,
4669 u64 parent, int last_ref) 4775 u64 parent, int last_ref)
4670{ 4776{
4671 struct btrfs_block_rsv *block_rsv;
4672 struct btrfs_block_group_cache *cache = NULL; 4777 struct btrfs_block_group_cache *cache = NULL;
4673 int ret; 4778 int ret;
4674 4779
@@ -4683,64 +4788,24 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4683 if (!last_ref) 4788 if (!last_ref)
4684 return; 4789 return;
4685 4790
4686 block_rsv = get_block_rsv(trans, root);
4687 cache = btrfs_lookup_block_group(root->fs_info, buf->start); 4791 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
4688 if (block_rsv->space_info != cache->space_info)
4689 goto out;
4690 4792
4691 if (btrfs_header_generation(buf) == trans->transid) { 4793 if (btrfs_header_generation(buf) == trans->transid) {
4692 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 4794 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4693 ret = check_ref_cleanup(trans, root, buf->start); 4795 ret = check_ref_cleanup(trans, root, buf->start);
4694 if (!ret) 4796 if (!ret)
4695 goto pin; 4797 goto out;
4696 } 4798 }
4697 4799
4698 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 4800 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
4699 pin_down_extent(root, cache, buf->start, buf->len, 1); 4801 pin_down_extent(root, cache, buf->start, buf->len, 1);
4700 goto pin; 4802 goto out;
4701 } 4803 }
4702 4804
4703 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 4805 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
4704 4806
4705 btrfs_add_free_space(cache, buf->start, buf->len); 4807 btrfs_add_free_space(cache, buf->start, buf->len);
4706 ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); 4808 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
4707 if (ret == -EAGAIN) {
4708 /* block group became read-only */
4709 btrfs_update_reserved_bytes(cache, buf->len, 0, 1);
4710 goto out;
4711 }
4712
4713 ret = 1;
4714 spin_lock(&block_rsv->lock);
4715 if (block_rsv->reserved < block_rsv->size) {
4716 block_rsv->reserved += buf->len;
4717 ret = 0;
4718 }
4719 spin_unlock(&block_rsv->lock);
4720
4721 if (ret) {
4722 spin_lock(&cache->space_info->lock);
4723 cache->space_info->bytes_reserved -= buf->len;
4724 cache->space_info->reservation_progress++;
4725 spin_unlock(&cache->space_info->lock);
4726 }
4727 goto out;
4728 }
4729pin:
4730 if (block_rsv->durable && !cache->ro) {
4731 ret = 0;
4732 spin_lock(&cache->lock);
4733 if (!cache->ro) {
4734 cache->reserved_pinned += buf->len;
4735 ret = 1;
4736 }
4737 spin_unlock(&cache->lock);
4738
4739 if (ret) {
4740 spin_lock(&block_rsv->lock);
4741 block_rsv->freed[trans->transid & 0x1] += buf->len;
4742 spin_unlock(&block_rsv->lock);
4743 }
4744 } 4809 }
4745out: 4810out:
4746 /* 4811 /*
@@ -4883,6 +4948,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4883 int last_ptr_loop = 0; 4948 int last_ptr_loop = 0;
4884 int loop = 0; 4949 int loop = 0;
4885 int index = 0; 4950 int index = 0;
4951 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
4952 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
4886 bool found_uncached_bg = false; 4953 bool found_uncached_bg = false;
4887 bool failed_cluster_refill = false; 4954 bool failed_cluster_refill = false;
4888 bool failed_alloc = false; 4955 bool failed_alloc = false;
@@ -5202,8 +5269,8 @@ checks:
5202 search_start - offset); 5269 search_start - offset);
5203 BUG_ON(offset > search_start); 5270 BUG_ON(offset > search_start);
5204 5271
5205 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, 5272 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
5206 (data & BTRFS_BLOCK_GROUP_DATA)); 5273 alloc_type);
5207 if (ret == -EAGAIN) { 5274 if (ret == -EAGAIN) {
5208 btrfs_add_free_space(block_group, offset, num_bytes); 5275 btrfs_add_free_space(block_group, offset, num_bytes);
5209 goto loop; 5276 goto loop;
@@ -5325,7 +5392,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
5325 int index = 0; 5392 int index = 0;
5326 5393
5327 spin_lock(&info->lock); 5394 spin_lock(&info->lock);
5328 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 5395 printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n",
5396 (unsigned long long)info->flags,
5329 (unsigned long long)(info->total_bytes - info->bytes_used - 5397 (unsigned long long)(info->total_bytes - info->bytes_used -
5330 info->bytes_pinned - info->bytes_reserved - 5398 info->bytes_pinned - info->bytes_reserved -
5331 info->bytes_readonly), 5399 info->bytes_readonly),
@@ -5427,7 +5495,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
5427 ret = btrfs_discard_extent(root, start, len, NULL); 5495 ret = btrfs_discard_extent(root, start, len, NULL);
5428 5496
5429 btrfs_add_free_space(cache, start, len); 5497 btrfs_add_free_space(cache, start, len);
5430 btrfs_update_reserved_bytes(cache, len, 0, 1); 5498 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
5431 btrfs_put_block_group(cache); 5499 btrfs_put_block_group(cache);
5432 5500
5433 trace_btrfs_reserved_extent_free(root, start, len); 5501 trace_btrfs_reserved_extent_free(root, start, len);
@@ -5630,7 +5698,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5630 put_caching_control(caching_ctl); 5698 put_caching_control(caching_ctl);
5631 } 5699 }
5632 5700
5633 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); 5701 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
5702 RESERVE_ALLOC_NO_ACCOUNT);
5634 BUG_ON(ret); 5703 BUG_ON(ret);
5635 btrfs_put_block_group(block_group); 5704 btrfs_put_block_group(block_group);
5636 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 5705 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
@@ -5687,8 +5756,7 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5687 block_rsv = get_block_rsv(trans, root); 5756 block_rsv = get_block_rsv(trans, root);
5688 5757
5689 if (block_rsv->size == 0) { 5758 if (block_rsv->size == 0) {
5690 ret = reserve_metadata_bytes(trans, root, block_rsv, 5759 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
5691 blocksize, 0);
5692 /* 5760 /*
5693 * If we couldn't reserve metadata bytes try and use some from 5761 * If we couldn't reserve metadata bytes try and use some from
5694 * the global reserve. 5762 * the global reserve.
@@ -5709,12 +5777,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5709 return block_rsv; 5777 return block_rsv;
5710 if (ret) { 5778 if (ret) {
5711 WARN_ON(1); 5779 WARN_ON(1);
5712 ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, 5780 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
5713 0);
5714 if (!ret) { 5781 if (!ret) {
5715 spin_lock(&block_rsv->lock);
5716 block_rsv->size += blocksize;
5717 spin_unlock(&block_rsv->lock);
5718 return block_rsv; 5782 return block_rsv;
5719 } else if (ret && block_rsv != global_rsv) { 5783 } else if (ret && block_rsv != global_rsv) {
5720 ret = block_rsv_use_bytes(global_rsv, blocksize); 5784 ret = block_rsv_use_bytes(global_rsv, blocksize);
@@ -6592,12 +6656,9 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
6592 cache->bytes_super - btrfs_block_group_used(&cache->item); 6656 cache->bytes_super - btrfs_block_group_used(&cache->item);
6593 6657
6594 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + 6658 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
6595 sinfo->bytes_may_use + sinfo->bytes_readonly + 6659 sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
6596 cache->reserved_pinned + num_bytes + min_allocable_bytes <= 6660 min_allocable_bytes <= sinfo->total_bytes) {
6597 sinfo->total_bytes) {
6598 sinfo->bytes_readonly += num_bytes; 6661 sinfo->bytes_readonly += num_bytes;
6599 sinfo->bytes_reserved += cache->reserved_pinned;
6600 cache->reserved_pinned = 0;
6601 cache->ro = 1; 6662 cache->ro = 1;
6602 ret = 0; 6663 ret = 0;
6603 } 6664 }
@@ -6964,7 +7025,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
6964 struct btrfs_space_info, 7025 struct btrfs_space_info,
6965 list); 7026 list);
6966 if (space_info->bytes_pinned > 0 || 7027 if (space_info->bytes_pinned > 0 ||
6967 space_info->bytes_reserved > 0) { 7028 space_info->bytes_reserved > 0 ||
7029 space_info->bytes_may_use > 0) {
6968 WARN_ON(1); 7030 WARN_ON(1);
6969 dump_space_info(space_info, 0, 0); 7031 dump_space_info(space_info, 0, 0);
6970 } 7032 }
@@ -7007,13 +7069,11 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7007 path->reada = 1; 7069 path->reada = 1;
7008 7070
7009 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); 7071 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
7010 if (cache_gen != 0 && 7072 if (btrfs_test_opt(root, SPACE_CACHE) &&
7011 btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) 7073 btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
7012 need_clear = 1; 7074 need_clear = 1;
7013 if (btrfs_test_opt(root, CLEAR_CACHE)) 7075 if (btrfs_test_opt(root, CLEAR_CACHE))
7014 need_clear = 1; 7076 need_clear = 1;
7015 if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
7016 printk(KERN_INFO "btrfs: disk space caching is enabled\n");
7017 7077
7018 while (1) { 7078 while (1) {
7019 ret = find_first_block_group(root, path, &key); 7079 ret = find_first_block_group(root, path, &key);
@@ -7268,7 +7328,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7268 spin_unlock(&block_group->lock); 7328 spin_unlock(&block_group->lock);
7269 } 7329 }
7270 /* One for our lookup ref */ 7330 /* One for our lookup ref */
7271 iput(inode); 7331 btrfs_add_delayed_iput(inode);
7272 } 7332 }
7273 7333
7274 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 7334 key.objectid = BTRFS_FREE_SPACE_OBJECTID;