aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c848
1 files changed, 518 insertions, 330 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c9ee0e18bbdc..9879bd474632 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -23,6 +23,7 @@
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/kthread.h> 24#include <linux/kthread.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/ratelimit.h>
26#include "compat.h" 27#include "compat.h"
27#include "hash.h" 28#include "hash.h"
28#include "ctree.h" 29#include "ctree.h"
@@ -52,6 +53,21 @@ enum {
52 CHUNK_ALLOC_LIMITED = 2, 53 CHUNK_ALLOC_LIMITED = 2,
53}; 54};
54 55
56/*
57 * Control how reservations are dealt with.
58 *
59 * RESERVE_FREE - freeing a reservation.
60 * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
61 * ENOSPC accounting
62 * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
63 * bytes_may_use as the ENOSPC accounting is done elsewhere
64 */
65enum {
66 RESERVE_FREE = 0,
67 RESERVE_ALLOC = 1,
68 RESERVE_ALLOC_NO_ACCOUNT = 2,
69};
70
55static int update_block_group(struct btrfs_trans_handle *trans, 71static int update_block_group(struct btrfs_trans_handle *trans,
56 struct btrfs_root *root, 72 struct btrfs_root *root,
57 u64 bytenr, u64 num_bytes, int alloc); 73 u64 bytenr, u64 num_bytes, int alloc);
@@ -81,6 +97,8 @@ static int find_next_key(struct btrfs_path *path, int level,
81 struct btrfs_key *key); 97 struct btrfs_key *key);
82static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 98static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
83 int dump_block_groups); 99 int dump_block_groups);
100static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
101 u64 num_bytes, int reserve);
84 102
85static noinline int 103static noinline int
86block_group_cache_done(struct btrfs_block_group_cache *cache) 104block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -104,7 +122,6 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
104 if (atomic_dec_and_test(&cache->count)) { 122 if (atomic_dec_and_test(&cache->count)) {
105 WARN_ON(cache->pinned > 0); 123 WARN_ON(cache->pinned > 0);
106 WARN_ON(cache->reserved > 0); 124 WARN_ON(cache->reserved > 0);
107 WARN_ON(cache->reserved_pinned > 0);
108 kfree(cache->free_space_ctl); 125 kfree(cache->free_space_ctl);
109 kfree(cache); 126 kfree(cache);
110 } 127 }
@@ -465,7 +482,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
465 * we likely hold important locks. 482 * we likely hold important locks.
466 */ 483 */
467 if (trans && (!trans->transaction->in_commit) && 484 if (trans && (!trans->transaction->in_commit) &&
468 (root && root != root->fs_info->tree_root)) { 485 (root && root != root->fs_info->tree_root) &&
486 btrfs_test_opt(root, SPACE_CACHE)) {
469 spin_lock(&cache->lock); 487 spin_lock(&cache->lock);
470 if (cache->cached != BTRFS_CACHE_NO) { 488 if (cache->cached != BTRFS_CACHE_NO) {
471 spin_unlock(&cache->lock); 489 spin_unlock(&cache->lock);
@@ -1770,18 +1788,18 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1770{ 1788{
1771 int ret; 1789 int ret;
1772 u64 discarded_bytes = 0; 1790 u64 discarded_bytes = 0;
1773 struct btrfs_multi_bio *multi = NULL; 1791 struct btrfs_bio *bbio = NULL;
1774 1792
1775 1793
1776 /* Tell the block device(s) that the sectors can be discarded */ 1794 /* Tell the block device(s) that the sectors can be discarded */
1777 ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, 1795 ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
1778 bytenr, &num_bytes, &multi, 0); 1796 bytenr, &num_bytes, &bbio, 0);
1779 if (!ret) { 1797 if (!ret) {
1780 struct btrfs_bio_stripe *stripe = multi->stripes; 1798 struct btrfs_bio_stripe *stripe = bbio->stripes;
1781 int i; 1799 int i;
1782 1800
1783 1801
1784 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1802 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
1785 if (!stripe->dev->can_discard) 1803 if (!stripe->dev->can_discard)
1786 continue; 1804 continue;
1787 1805
@@ -1800,7 +1818,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1800 */ 1818 */
1801 ret = 0; 1819 ret = 0;
1802 } 1820 }
1803 kfree(multi); 1821 kfree(bbio);
1804 } 1822 }
1805 1823
1806 if (actual_bytes) 1824 if (actual_bytes)
@@ -2700,6 +2718,13 @@ again:
2700 goto again; 2718 goto again;
2701 } 2719 }
2702 2720
2721 /* We've already setup this transaction, go ahead and exit */
2722 if (block_group->cache_generation == trans->transid &&
2723 i_size_read(inode)) {
2724 dcs = BTRFS_DC_SETUP;
2725 goto out_put;
2726 }
2727
2703 /* 2728 /*
2704 * We want to set the generation to 0, that way if anything goes wrong 2729 * We want to set the generation to 0, that way if anything goes wrong
2705 * from here on out we know not to trust this cache when we load up next 2730 * from here on out we know not to trust this cache when we load up next
@@ -2749,12 +2774,15 @@ again:
2749 if (!ret) 2774 if (!ret)
2750 dcs = BTRFS_DC_SETUP; 2775 dcs = BTRFS_DC_SETUP;
2751 btrfs_free_reserved_data_space(inode, num_pages); 2776 btrfs_free_reserved_data_space(inode, num_pages);
2777
2752out_put: 2778out_put:
2753 iput(inode); 2779 iput(inode);
2754out_free: 2780out_free:
2755 btrfs_release_path(path); 2781 btrfs_release_path(path);
2756out: 2782out:
2757 spin_lock(&block_group->lock); 2783 spin_lock(&block_group->lock);
2784 if (!ret)
2785 block_group->cache_generation = trans->transid;
2758 block_group->disk_cache_state = dcs; 2786 block_group->disk_cache_state = dcs;
2759 spin_unlock(&block_group->lock); 2787 spin_unlock(&block_group->lock);
2760 2788
@@ -3122,16 +3150,13 @@ commit_trans:
3122 return -ENOSPC; 3150 return -ENOSPC;
3123 } 3151 }
3124 data_sinfo->bytes_may_use += bytes; 3152 data_sinfo->bytes_may_use += bytes;
3125 BTRFS_I(inode)->reserved_bytes += bytes;
3126 spin_unlock(&data_sinfo->lock); 3153 spin_unlock(&data_sinfo->lock);
3127 3154
3128 return 0; 3155 return 0;
3129} 3156}
3130 3157
3131/* 3158/*
3132 * called when we are clearing an delalloc extent from the 3159 * Called if we need to clear a data reservation for this inode.
3133 * inode's io_tree or there was an error for whatever reason
3134 * after calling btrfs_check_data_free_space
3135 */ 3160 */
3136void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) 3161void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3137{ 3162{
@@ -3144,7 +3169,6 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3144 data_sinfo = BTRFS_I(inode)->space_info; 3169 data_sinfo = BTRFS_I(inode)->space_info;
3145 spin_lock(&data_sinfo->lock); 3170 spin_lock(&data_sinfo->lock);
3146 data_sinfo->bytes_may_use -= bytes; 3171 data_sinfo->bytes_may_use -= bytes;
3147 BTRFS_I(inode)->reserved_bytes -= bytes;
3148 spin_unlock(&data_sinfo->lock); 3172 spin_unlock(&data_sinfo->lock);
3149} 3173}
3150 3174
@@ -3165,6 +3189,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
3165 struct btrfs_space_info *sinfo, u64 alloc_bytes, 3189 struct btrfs_space_info *sinfo, u64 alloc_bytes,
3166 int force) 3190 int force)
3167{ 3191{
3192 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
3168 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3193 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3169 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; 3194 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
3170 u64 thresh; 3195 u64 thresh;
@@ -3173,11 +3198,18 @@ static int should_alloc_chunk(struct btrfs_root *root,
3173 return 1; 3198 return 1;
3174 3199
3175 /* 3200 /*
3201 * We need to take into account the global rsv because for all intents
3202 * and purposes it's used space. Don't worry about locking the
3203 * global_rsv, it doesn't change except when the transaction commits.
3204 */
3205 num_allocated += global_rsv->size;
3206
3207 /*
3176 * in limited mode, we want to have some free space up to 3208 * in limited mode, we want to have some free space up to
3177 * about 1% of the FS size. 3209 * about 1% of the FS size.
3178 */ 3210 */
3179 if (force == CHUNK_ALLOC_LIMITED) { 3211 if (force == CHUNK_ALLOC_LIMITED) {
3180 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3212 thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
3181 thresh = max_t(u64, 64 * 1024 * 1024, 3213 thresh = max_t(u64, 64 * 1024 * 1024,
3182 div_factor_fine(thresh, 1)); 3214 div_factor_fine(thresh, 1));
3183 3215
@@ -3199,7 +3231,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
3199 if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) 3231 if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
3200 return 0; 3232 return 0;
3201 3233
3202 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3234 thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
3203 3235
3204 /* 256MB or 5% of the FS */ 3236 /* 256MB or 5% of the FS */
3205 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); 3237 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
@@ -3302,24 +3334,26 @@ out:
3302/* 3334/*
3303 * shrink metadata reservation for delalloc 3335 * shrink metadata reservation for delalloc
3304 */ 3336 */
3305static int shrink_delalloc(struct btrfs_trans_handle *trans, 3337static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim,
3306 struct btrfs_root *root, u64 to_reclaim, int sync) 3338 bool wait_ordered)
3307{ 3339{
3308 struct btrfs_block_rsv *block_rsv; 3340 struct btrfs_block_rsv *block_rsv;
3309 struct btrfs_space_info *space_info; 3341 struct btrfs_space_info *space_info;
3342 struct btrfs_trans_handle *trans;
3310 u64 reserved; 3343 u64 reserved;
3311 u64 max_reclaim; 3344 u64 max_reclaim;
3312 u64 reclaimed = 0; 3345 u64 reclaimed = 0;
3313 long time_left; 3346 long time_left;
3314 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3347 unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3315 int loops = 0; 3348 int loops = 0;
3316 unsigned long progress; 3349 unsigned long progress;
3317 3350
3351 trans = (struct btrfs_trans_handle *)current->journal_info;
3318 block_rsv = &root->fs_info->delalloc_block_rsv; 3352 block_rsv = &root->fs_info->delalloc_block_rsv;
3319 space_info = block_rsv->space_info; 3353 space_info = block_rsv->space_info;
3320 3354
3321 smp_mb(); 3355 smp_mb();
3322 reserved = space_info->bytes_reserved; 3356 reserved = space_info->bytes_may_use;
3323 progress = space_info->reservation_progress; 3357 progress = space_info->reservation_progress;
3324 3358
3325 if (reserved == 0) 3359 if (reserved == 0)
@@ -3334,7 +3368,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3334 } 3368 }
3335 3369
3336 max_reclaim = min(reserved, to_reclaim); 3370 max_reclaim = min(reserved, to_reclaim);
3337 3371 nr_pages = max_t(unsigned long, nr_pages,
3372 max_reclaim >> PAGE_CACHE_SHIFT);
3338 while (loops < 1024) { 3373 while (loops < 1024) {
3339 /* have the flusher threads jump in and do some IO */ 3374 /* have the flusher threads jump in and do some IO */
3340 smp_mb(); 3375 smp_mb();
@@ -3344,9 +3379,9 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3344 WB_REASON_FS_FREE_SPACE); 3379 WB_REASON_FS_FREE_SPACE);
3345 3380
3346 spin_lock(&space_info->lock); 3381 spin_lock(&space_info->lock);
3347 if (reserved > space_info->bytes_reserved) 3382 if (reserved > space_info->bytes_may_use)
3348 reclaimed += reserved - space_info->bytes_reserved; 3383 reclaimed += reserved - space_info->bytes_may_use;
3349 reserved = space_info->bytes_reserved; 3384 reserved = space_info->bytes_may_use;
3350 spin_unlock(&space_info->lock); 3385 spin_unlock(&space_info->lock);
3351 3386
3352 loops++; 3387 loops++;
@@ -3357,11 +3392,15 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3357 if (trans && trans->transaction->blocked) 3392 if (trans && trans->transaction->blocked)
3358 return -EAGAIN; 3393 return -EAGAIN;
3359 3394
3360 time_left = schedule_timeout_interruptible(1); 3395 if (wait_ordered && !trans) {
3396 btrfs_wait_ordered_extents(root, 0, 0);
3397 } else {
3398 time_left = schedule_timeout_interruptible(1);
3361 3399
3362 /* We were interrupted, exit */ 3400 /* We were interrupted, exit */
3363 if (time_left) 3401 if (time_left)
3364 break; 3402 break;
3403 }
3365 3404
3366 /* we've kicked the IO a few times, if anything has been freed, 3405 /* we've kicked the IO a few times, if anything has been freed,
3367 * exit. There is no sense in looping here for a long time 3406 * exit. There is no sense in looping here for a long time
@@ -3376,34 +3415,90 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3376 } 3415 }
3377 3416
3378 } 3417 }
3379 if (reclaimed >= to_reclaim && !trans) 3418
3380 btrfs_wait_ordered_extents(root, 0, 0);
3381 return reclaimed >= to_reclaim; 3419 return reclaimed >= to_reclaim;
3382} 3420}
3383 3421
3384/* 3422/**
3385 * Retries tells us how many times we've called reserve_metadata_bytes. The 3423 * maybe_commit_transaction - possibly commit the transaction if its ok to
3386 * idea is if this is the first call (retries == 0) then we will add to our 3424 * @root - the root we're allocating for
3387 * reserved count if we can't make the allocation in order to hold our place 3425 * @bytes - the number of bytes we want to reserve
3388 * while we go and try and free up space. That way for retries > 1 we don't try 3426 * @force - force the commit
3389 * and add space, we just check to see if the amount of unused space is >= the
3390 * total space, meaning that our reservation is valid.
3391 * 3427 *
3392 * However if we don't intend to retry this reservation, pass -1 as retries so 3428 * This will check to make sure that committing the transaction will actually
3393 * that it short circuits this logic. 3429 * get us somewhere and then commit the transaction if it does. Otherwise it
3430 * will return -ENOSPC.
3394 */ 3431 */
3395static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, 3432static int may_commit_transaction(struct btrfs_root *root,
3396 struct btrfs_root *root, 3433 struct btrfs_space_info *space_info,
3434 u64 bytes, int force)
3435{
3436 struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
3437 struct btrfs_trans_handle *trans;
3438
3439 trans = (struct btrfs_trans_handle *)current->journal_info;
3440 if (trans)
3441 return -EAGAIN;
3442
3443 if (force)
3444 goto commit;
3445
3446 /* See if there is enough pinned space to make this reservation */
3447 spin_lock(&space_info->lock);
3448 if (space_info->bytes_pinned >= bytes) {
3449 spin_unlock(&space_info->lock);
3450 goto commit;
3451 }
3452 spin_unlock(&space_info->lock);
3453
3454 /*
3455 * See if there is some space in the delayed insertion reservation for
3456 * this reservation.
3457 */
3458 if (space_info != delayed_rsv->space_info)
3459 return -ENOSPC;
3460
3461 spin_lock(&delayed_rsv->lock);
3462 if (delayed_rsv->size < bytes) {
3463 spin_unlock(&delayed_rsv->lock);
3464 return -ENOSPC;
3465 }
3466 spin_unlock(&delayed_rsv->lock);
3467
3468commit:
3469 trans = btrfs_join_transaction(root);
3470 if (IS_ERR(trans))
3471 return -ENOSPC;
3472
3473 return btrfs_commit_transaction(trans, root);
3474}
3475
3476/**
3477 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
3478 * @root - the root we're allocating for
3479 * @block_rsv - the block_rsv we're allocating for
3480 * @orig_bytes - the number of bytes we want
3481 * @flush - wether or not we can flush to make our reservation
3482 *
3483 * This will reserve orgi_bytes number of bytes from the space info associated
3484 * with the block_rsv. If there is not enough space it will make an attempt to
3485 * flush out space to make room. It will do this by flushing delalloc if
3486 * possible or committing the transaction. If flush is 0 then no attempts to
3487 * regain reservations will be made and this will fail if there is not enough
3488 * space already.
3489 */
3490static int reserve_metadata_bytes(struct btrfs_root *root,
3397 struct btrfs_block_rsv *block_rsv, 3491 struct btrfs_block_rsv *block_rsv,
3398 u64 orig_bytes, int flush) 3492 u64 orig_bytes, int flush)
3399{ 3493{
3400 struct btrfs_space_info *space_info = block_rsv->space_info; 3494 struct btrfs_space_info *space_info = block_rsv->space_info;
3401 u64 unused; 3495 u64 used;
3402 u64 num_bytes = orig_bytes; 3496 u64 num_bytes = orig_bytes;
3403 int retries = 0; 3497 int retries = 0;
3404 int ret = 0; 3498 int ret = 0;
3405 bool committed = false; 3499 bool committed = false;
3406 bool flushing = false; 3500 bool flushing = false;
3501 bool wait_ordered = false;
3407 3502
3408again: 3503again:
3409 ret = 0; 3504 ret = 0;
@@ -3420,7 +3515,7 @@ again:
3420 * deadlock since we are waiting for the flusher to finish, but 3515 * deadlock since we are waiting for the flusher to finish, but
3421 * hold the current transaction open. 3516 * hold the current transaction open.
3422 */ 3517 */
3423 if (trans) 3518 if (current->journal_info)
3424 return -EAGAIN; 3519 return -EAGAIN;
3425 ret = wait_event_interruptible(space_info->wait, 3520 ret = wait_event_interruptible(space_info->wait,
3426 !space_info->flush); 3521 !space_info->flush);
@@ -3432,9 +3527,9 @@ again:
3432 } 3527 }
3433 3528
3434 ret = -ENOSPC; 3529 ret = -ENOSPC;
3435 unused = space_info->bytes_used + space_info->bytes_reserved + 3530 used = space_info->bytes_used + space_info->bytes_reserved +
3436 space_info->bytes_pinned + space_info->bytes_readonly + 3531 space_info->bytes_pinned + space_info->bytes_readonly +
3437 space_info->bytes_may_use; 3532 space_info->bytes_may_use;
3438 3533
3439 /* 3534 /*
3440 * The idea here is that we've not already over-reserved the block group 3535 * The idea here is that we've not already over-reserved the block group
@@ -3443,10 +3538,9 @@ again:
3443 * lets start flushing stuff first and then come back and try to make 3538 * lets start flushing stuff first and then come back and try to make
3444 * our reservation. 3539 * our reservation.
3445 */ 3540 */
3446 if (unused <= space_info->total_bytes) { 3541 if (used <= space_info->total_bytes) {
3447 unused = space_info->total_bytes - unused; 3542 if (used + orig_bytes <= space_info->total_bytes) {
3448 if (unused >= num_bytes) { 3543 space_info->bytes_may_use += orig_bytes;
3449 space_info->bytes_reserved += orig_bytes;
3450 ret = 0; 3544 ret = 0;
3451 } else { 3545 } else {
3452 /* 3546 /*
@@ -3462,10 +3556,64 @@ again:
3462 * amount plus the amount of bytes that we need for this 3556 * amount plus the amount of bytes that we need for this
3463 * reservation. 3557 * reservation.
3464 */ 3558 */
3465 num_bytes = unused - space_info->total_bytes + 3559 wait_ordered = true;
3560 num_bytes = used - space_info->total_bytes +
3466 (orig_bytes * (retries + 1)); 3561 (orig_bytes * (retries + 1));
3467 } 3562 }
3468 3563
3564 if (ret) {
3565 u64 profile = btrfs_get_alloc_profile(root, 0);
3566 u64 avail;
3567
3568 /*
3569 * If we have a lot of space that's pinned, don't bother doing
3570 * the overcommit dance yet and just commit the transaction.
3571 */
3572 avail = (space_info->total_bytes - space_info->bytes_used) * 8;
3573 do_div(avail, 10);
3574 if (space_info->bytes_pinned >= avail && flush && !committed) {
3575 space_info->flush = 1;
3576 flushing = true;
3577 spin_unlock(&space_info->lock);
3578 ret = may_commit_transaction(root, space_info,
3579 orig_bytes, 1);
3580 if (ret)
3581 goto out;
3582 committed = true;
3583 goto again;
3584 }
3585
3586 spin_lock(&root->fs_info->free_chunk_lock);
3587 avail = root->fs_info->free_chunk_space;
3588
3589 /*
3590 * If we have dup, raid1 or raid10 then only half of the free
3591 * space is actually useable.
3592 */
3593 if (profile & (BTRFS_BLOCK_GROUP_DUP |
3594 BTRFS_BLOCK_GROUP_RAID1 |
3595 BTRFS_BLOCK_GROUP_RAID10))
3596 avail >>= 1;
3597
3598 /*
3599 * If we aren't flushing don't let us overcommit too much, say
3600 * 1/8th of the space. If we can flush, let it overcommit up to
3601 * 1/2 of the space.
3602 */
3603 if (flush)
3604 avail >>= 3;
3605 else
3606 avail >>= 1;
3607 spin_unlock(&root->fs_info->free_chunk_lock);
3608
3609 if (used + num_bytes < space_info->total_bytes + avail) {
3610 space_info->bytes_may_use += orig_bytes;
3611 ret = 0;
3612 } else {
3613 wait_ordered = true;
3614 }
3615 }
3616
3469 /* 3617 /*
3470 * Couldn't make our reservation, save our place so while we're trying 3618 * Couldn't make our reservation, save our place so while we're trying
3471 * to reclaim space we can actually use it instead of somebody else 3619 * to reclaim space we can actually use it instead of somebody else
@@ -3485,7 +3633,7 @@ again:
3485 * We do synchronous shrinking since we don't actually unreserve 3633 * We do synchronous shrinking since we don't actually unreserve
3486 * metadata until after the IO is completed. 3634 * metadata until after the IO is completed.
3487 */ 3635 */
3488 ret = shrink_delalloc(trans, root, num_bytes, 1); 3636 ret = shrink_delalloc(root, num_bytes, wait_ordered);
3489 if (ret < 0) 3637 if (ret < 0)
3490 goto out; 3638 goto out;
3491 3639
@@ -3497,35 +3645,17 @@ again:
3497 * so go back around and try again. 3645 * so go back around and try again.
3498 */ 3646 */
3499 if (retries < 2) { 3647 if (retries < 2) {
3648 wait_ordered = true;
3500 retries++; 3649 retries++;
3501 goto again; 3650 goto again;
3502 } 3651 }
3503 3652
3504 /*
3505 * Not enough space to be reclaimed, don't bother committing the
3506 * transaction.
3507 */
3508 spin_lock(&space_info->lock);
3509 if (space_info->bytes_pinned < orig_bytes)
3510 ret = -ENOSPC;
3511 spin_unlock(&space_info->lock);
3512 if (ret)
3513 goto out;
3514
3515 ret = -EAGAIN;
3516 if (trans)
3517 goto out;
3518
3519 ret = -ENOSPC; 3653 ret = -ENOSPC;
3520 if (committed) 3654 if (committed)
3521 goto out; 3655 goto out;
3522 3656
3523 trans = btrfs_join_transaction(root); 3657 ret = may_commit_transaction(root, space_info, orig_bytes, 0);
3524 if (IS_ERR(trans))
3525 goto out;
3526 ret = btrfs_commit_transaction(trans, root);
3527 if (!ret) { 3658 if (!ret) {
3528 trans = NULL;
3529 committed = true; 3659 committed = true;
3530 goto again; 3660 goto again;
3531 } 3661 }
@@ -3543,10 +3673,12 @@ out:
3543static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, 3673static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
3544 struct btrfs_root *root) 3674 struct btrfs_root *root)
3545{ 3675{
3546 struct btrfs_block_rsv *block_rsv; 3676 struct btrfs_block_rsv *block_rsv = NULL;
3547 if (root->ref_cows) 3677
3678 if (root->ref_cows || root == root->fs_info->csum_root)
3548 block_rsv = trans->block_rsv; 3679 block_rsv = trans->block_rsv;
3549 else 3680
3681 if (!block_rsv)
3550 block_rsv = root->block_rsv; 3682 block_rsv = root->block_rsv;
3551 3683
3552 if (!block_rsv) 3684 if (!block_rsv)
@@ -3617,7 +3749,7 @@ static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3617 } 3749 }
3618 if (num_bytes) { 3750 if (num_bytes) {
3619 spin_lock(&space_info->lock); 3751 spin_lock(&space_info->lock);
3620 space_info->bytes_reserved -= num_bytes; 3752 space_info->bytes_may_use -= num_bytes;
3621 space_info->reservation_progress++; 3753 space_info->reservation_progress++;
3622 spin_unlock(&space_info->lock); 3754 spin_unlock(&space_info->lock);
3623 } 3755 }
@@ -3641,9 +3773,6 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
3641{ 3773{
3642 memset(rsv, 0, sizeof(*rsv)); 3774 memset(rsv, 0, sizeof(*rsv));
3643 spin_lock_init(&rsv->lock); 3775 spin_lock_init(&rsv->lock);
3644 atomic_set(&rsv->usage, 1);
3645 rsv->priority = 6;
3646 INIT_LIST_HEAD(&rsv->list);
3647} 3776}
3648 3777
3649struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) 3778struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
@@ -3664,38 +3793,38 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
3664void btrfs_free_block_rsv(struct btrfs_root *root, 3793void btrfs_free_block_rsv(struct btrfs_root *root,
3665 struct btrfs_block_rsv *rsv) 3794 struct btrfs_block_rsv *rsv)
3666{ 3795{
3667 if (rsv && atomic_dec_and_test(&rsv->usage)) { 3796 btrfs_block_rsv_release(root, rsv, (u64)-1);
3668 btrfs_block_rsv_release(root, rsv, (u64)-1); 3797 kfree(rsv);
3669 if (!rsv->durable)
3670 kfree(rsv);
3671 }
3672} 3798}
3673 3799
3674/* 3800int btrfs_block_rsv_add(struct btrfs_root *root,
3675 * make the block_rsv struct be able to capture freed space. 3801 struct btrfs_block_rsv *block_rsv,
3676 * the captured space will re-add to the the block_rsv struct 3802 u64 num_bytes)
3677 * after transaction commit
3678 */
3679void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
3680 struct btrfs_block_rsv *block_rsv)
3681{ 3803{
3682 block_rsv->durable = 1; 3804 int ret;
3683 mutex_lock(&fs_info->durable_block_rsv_mutex); 3805
3684 list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); 3806 if (num_bytes == 0)
3685 mutex_unlock(&fs_info->durable_block_rsv_mutex); 3807 return 0;
3808
3809 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1);
3810 if (!ret) {
3811 block_rsv_add_bytes(block_rsv, num_bytes, 1);
3812 return 0;
3813 }
3814
3815 return ret;
3686} 3816}
3687 3817
3688int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 3818int btrfs_block_rsv_add_noflush(struct btrfs_root *root,
3689 struct btrfs_root *root, 3819 struct btrfs_block_rsv *block_rsv,
3690 struct btrfs_block_rsv *block_rsv, 3820 u64 num_bytes)
3691 u64 num_bytes)
3692{ 3821{
3693 int ret; 3822 int ret;
3694 3823
3695 if (num_bytes == 0) 3824 if (num_bytes == 0)
3696 return 0; 3825 return 0;
3697 3826
3698 ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); 3827 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 0);
3699 if (!ret) { 3828 if (!ret) {
3700 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3829 block_rsv_add_bytes(block_rsv, num_bytes, 1);
3701 return 0; 3830 return 0;
@@ -3704,55 +3833,52 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
3704 return ret; 3833 return ret;
3705} 3834}
3706 3835
3707int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, 3836int btrfs_block_rsv_check(struct btrfs_root *root,
3708 struct btrfs_root *root, 3837 struct btrfs_block_rsv *block_rsv, int min_factor)
3709 struct btrfs_block_rsv *block_rsv,
3710 u64 min_reserved, int min_factor)
3711{ 3838{
3712 u64 num_bytes = 0; 3839 u64 num_bytes = 0;
3713 int commit_trans = 0;
3714 int ret = -ENOSPC; 3840 int ret = -ENOSPC;
3715 3841
3716 if (!block_rsv) 3842 if (!block_rsv)
3717 return 0; 3843 return 0;
3718 3844
3719 spin_lock(&block_rsv->lock); 3845 spin_lock(&block_rsv->lock);
3720 if (min_factor > 0) 3846 num_bytes = div_factor(block_rsv->size, min_factor);
3721 num_bytes = div_factor(block_rsv->size, min_factor); 3847 if (block_rsv->reserved >= num_bytes)
3722 if (min_reserved > num_bytes) 3848 ret = 0;
3723 num_bytes = min_reserved; 3849 spin_unlock(&block_rsv->lock);
3724 3850
3725 if (block_rsv->reserved >= num_bytes) { 3851 return ret;
3852}
3853
3854int btrfs_block_rsv_refill(struct btrfs_root *root,
3855 struct btrfs_block_rsv *block_rsv,
3856 u64 min_reserved)
3857{
3858 u64 num_bytes = 0;
3859 int ret = -ENOSPC;
3860
3861 if (!block_rsv)
3862 return 0;
3863
3864 spin_lock(&block_rsv->lock);
3865 num_bytes = min_reserved;
3866 if (block_rsv->reserved >= num_bytes)
3726 ret = 0; 3867 ret = 0;
3727 } else { 3868 else
3728 num_bytes -= block_rsv->reserved; 3869 num_bytes -= block_rsv->reserved;
3729 if (block_rsv->durable &&
3730 block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes)
3731 commit_trans = 1;
3732 }
3733 spin_unlock(&block_rsv->lock); 3870 spin_unlock(&block_rsv->lock);
3871
3734 if (!ret) 3872 if (!ret)
3735 return 0; 3873 return 0;
3736 3874
3737 if (block_rsv->refill_used) { 3875 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1);
3738 ret = reserve_metadata_bytes(trans, root, block_rsv, 3876 if (!ret) {
3739 num_bytes, 0); 3877 block_rsv_add_bytes(block_rsv, num_bytes, 0);
3740 if (!ret) {
3741 block_rsv_add_bytes(block_rsv, num_bytes, 0);
3742 return 0;
3743 }
3744 }
3745
3746 if (commit_trans) {
3747 if (trans)
3748 return -EAGAIN;
3749 trans = btrfs_join_transaction(root);
3750 BUG_ON(IS_ERR(trans));
3751 ret = btrfs_commit_transaction(trans, root);
3752 return 0; 3878 return 0;
3753 } 3879 }
3754 3880
3755 return -ENOSPC; 3881 return ret;
3756} 3882}
3757 3883
3758int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, 3884int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
@@ -3784,7 +3910,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
3784 u64 num_bytes; 3910 u64 num_bytes;
3785 u64 meta_used; 3911 u64 meta_used;
3786 u64 data_used; 3912 u64 data_used;
3787 int csum_size = btrfs_super_csum_size(&fs_info->super_copy); 3913 int csum_size = btrfs_super_csum_size(fs_info->super_copy);
3788 3914
3789 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); 3915 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
3790 spin_lock(&sinfo->lock); 3916 spin_lock(&sinfo->lock);
@@ -3828,12 +3954,12 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3828 if (sinfo->total_bytes > num_bytes) { 3954 if (sinfo->total_bytes > num_bytes) {
3829 num_bytes = sinfo->total_bytes - num_bytes; 3955 num_bytes = sinfo->total_bytes - num_bytes;
3830 block_rsv->reserved += num_bytes; 3956 block_rsv->reserved += num_bytes;
3831 sinfo->bytes_reserved += num_bytes; 3957 sinfo->bytes_may_use += num_bytes;
3832 } 3958 }
3833 3959
3834 if (block_rsv->reserved >= block_rsv->size) { 3960 if (block_rsv->reserved >= block_rsv->size) {
3835 num_bytes = block_rsv->reserved - block_rsv->size; 3961 num_bytes = block_rsv->reserved - block_rsv->size;
3836 sinfo->bytes_reserved -= num_bytes; 3962 sinfo->bytes_may_use -= num_bytes;
3837 sinfo->reservation_progress++; 3963 sinfo->reservation_progress++;
3838 block_rsv->reserved = block_rsv->size; 3964 block_rsv->reserved = block_rsv->size;
3839 block_rsv->full = 1; 3965 block_rsv->full = 1;
@@ -3849,16 +3975,13 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
3849 3975
3850 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); 3976 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
3851 fs_info->chunk_block_rsv.space_info = space_info; 3977 fs_info->chunk_block_rsv.space_info = space_info;
3852 fs_info->chunk_block_rsv.priority = 10;
3853 3978
3854 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3979 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
3855 fs_info->global_block_rsv.space_info = space_info; 3980 fs_info->global_block_rsv.space_info = space_info;
3856 fs_info->global_block_rsv.priority = 10;
3857 fs_info->global_block_rsv.refill_used = 1;
3858 fs_info->delalloc_block_rsv.space_info = space_info; 3981 fs_info->delalloc_block_rsv.space_info = space_info;
3859 fs_info->trans_block_rsv.space_info = space_info; 3982 fs_info->trans_block_rsv.space_info = space_info;
3860 fs_info->empty_block_rsv.space_info = space_info; 3983 fs_info->empty_block_rsv.space_info = space_info;
3861 fs_info->empty_block_rsv.priority = 10; 3984 fs_info->delayed_block_rsv.space_info = space_info;
3862 3985
3863 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; 3986 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
3864 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; 3987 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
@@ -3866,10 +3989,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
3866 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; 3989 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
3867 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; 3990 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
3868 3991
3869 btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv);
3870
3871 btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv);
3872
3873 update_global_block_rsv(fs_info); 3992 update_global_block_rsv(fs_info);
3874} 3993}
3875 3994
@@ -3882,37 +4001,8 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3882 WARN_ON(fs_info->trans_block_rsv.reserved > 0); 4001 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
3883 WARN_ON(fs_info->chunk_block_rsv.size > 0); 4002 WARN_ON(fs_info->chunk_block_rsv.size > 0);
3884 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 4003 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3885} 4004 WARN_ON(fs_info->delayed_block_rsv.size > 0);
3886 4005 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
3887int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
3888 struct btrfs_root *root,
3889 struct btrfs_block_rsv *rsv)
3890{
3891 struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv;
3892 u64 num_bytes;
3893 int ret;
3894
3895 /*
3896 * Truncate should be freeing data, but give us 2 items just in case it
3897 * needs to use some space. We may want to be smarter about this in the
3898 * future.
3899 */
3900 num_bytes = btrfs_calc_trans_metadata_size(root, 2);
3901
3902 /* We already have enough bytes, just return */
3903 if (rsv->reserved >= num_bytes)
3904 return 0;
3905
3906 num_bytes -= rsv->reserved;
3907
3908 /*
3909 * You should have reserved enough space before hand to do this, so this
3910 * should not fail.
3911 */
3912 ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes);
3913 BUG_ON(ret);
3914
3915 return 0;
3916} 4006}
3917 4007
3918void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 4008void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
@@ -3921,9 +4011,7 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
3921 if (!trans->bytes_reserved) 4011 if (!trans->bytes_reserved)
3922 return; 4012 return;
3923 4013
3924 BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); 4014 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
3925 btrfs_block_rsv_release(root, trans->block_rsv,
3926 trans->bytes_reserved);
3927 trans->bytes_reserved = 0; 4015 trans->bytes_reserved = 0;
3928} 4016}
3929 4017
@@ -3965,11 +4053,19 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
3965 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 4053 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3966} 4054}
3967 4055
4056/**
4057 * drop_outstanding_extent - drop an outstanding extent
4058 * @inode: the inode we're dropping the extent for
4059 *
4060 * This is called when we are freeing up an outstanding extent, either called
4061 * after an error or after an extent is written. This will return the number of
4062 * reserved extents that need to be freed. This must be called with
4063 * BTRFS_I(inode)->lock held.
4064 */
3968static unsigned drop_outstanding_extent(struct inode *inode) 4065static unsigned drop_outstanding_extent(struct inode *inode)
3969{ 4066{
3970 unsigned dropped_extents = 0; 4067 unsigned dropped_extents = 0;
3971 4068
3972 spin_lock(&BTRFS_I(inode)->lock);
3973 BUG_ON(!BTRFS_I(inode)->outstanding_extents); 4069 BUG_ON(!BTRFS_I(inode)->outstanding_extents);
3974 BTRFS_I(inode)->outstanding_extents--; 4070 BTRFS_I(inode)->outstanding_extents--;
3975 4071
@@ -3979,19 +4075,70 @@ static unsigned drop_outstanding_extent(struct inode *inode)
3979 */ 4075 */
3980 if (BTRFS_I(inode)->outstanding_extents >= 4076 if (BTRFS_I(inode)->outstanding_extents >=
3981 BTRFS_I(inode)->reserved_extents) 4077 BTRFS_I(inode)->reserved_extents)
3982 goto out; 4078 return 0;
3983 4079
3984 dropped_extents = BTRFS_I(inode)->reserved_extents - 4080 dropped_extents = BTRFS_I(inode)->reserved_extents -
3985 BTRFS_I(inode)->outstanding_extents; 4081 BTRFS_I(inode)->outstanding_extents;
3986 BTRFS_I(inode)->reserved_extents -= dropped_extents; 4082 BTRFS_I(inode)->reserved_extents -= dropped_extents;
3987out:
3988 spin_unlock(&BTRFS_I(inode)->lock);
3989 return dropped_extents; 4083 return dropped_extents;
3990} 4084}
3991 4085
3992static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) 4086/**
4087 * calc_csum_metadata_size - return the amount of metada space that must be
4088 * reserved/free'd for the given bytes.
4089 * @inode: the inode we're manipulating
4090 * @num_bytes: the number of bytes in question
4091 * @reserve: 1 if we are reserving space, 0 if we are freeing space
4092 *
4093 * This adjusts the number of csum_bytes in the inode and then returns the
4094 * correct amount of metadata that must either be reserved or freed. We
4095 * calculate how many checksums we can fit into one leaf and then divide the
4096 * number of bytes that will need to be checksumed by this value to figure out
4097 * how many checksums will be required. If we are adding bytes then the number
4098 * may go up and we will return the number of additional bytes that must be
4099 * reserved. If it is going down we will return the number of bytes that must
4100 * be freed.
4101 *
4102 * This must be called with BTRFS_I(inode)->lock held.
4103 */
4104static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
4105 int reserve)
3993{ 4106{
3994 return num_bytes >>= 3; 4107 struct btrfs_root *root = BTRFS_I(inode)->root;
4108 u64 csum_size;
4109 int num_csums_per_leaf;
4110 int num_csums;
4111 int old_csums;
4112
4113 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
4114 BTRFS_I(inode)->csum_bytes == 0)
4115 return 0;
4116
4117 old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
4118 if (reserve)
4119 BTRFS_I(inode)->csum_bytes += num_bytes;
4120 else
4121 BTRFS_I(inode)->csum_bytes -= num_bytes;
4122 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
4123 num_csums_per_leaf = (int)div64_u64(csum_size,
4124 sizeof(struct btrfs_csum_item) +
4125 sizeof(struct btrfs_disk_key));
4126 num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
4127 num_csums = num_csums + num_csums_per_leaf - 1;
4128 num_csums = num_csums / num_csums_per_leaf;
4129
4130 old_csums = old_csums + num_csums_per_leaf - 1;
4131 old_csums = old_csums / num_csums_per_leaf;
4132
4133 /* No change, no need to reserve more */
4134 if (old_csums == num_csums)
4135 return 0;
4136
4137 if (reserve)
4138 return btrfs_calc_trans_metadata_size(root,
4139 num_csums - old_csums);
4140
4141 return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
3995} 4142}
3996 4143
3997int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) 4144int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
@@ -4000,9 +4147,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4000 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 4147 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
4001 u64 to_reserve = 0; 4148 u64 to_reserve = 0;
4002 unsigned nr_extents = 0; 4149 unsigned nr_extents = 0;
4150 int flush = 1;
4003 int ret; 4151 int ret;
4004 4152
4005 if (btrfs_transaction_in_commit(root->fs_info)) 4153 if (btrfs_is_free_space_inode(root, inode))
4154 flush = 0;
4155
4156 if (flush && btrfs_transaction_in_commit(root->fs_info))
4006 schedule_timeout(1); 4157 schedule_timeout(1);
4007 4158
4008 num_bytes = ALIGN(num_bytes, root->sectorsize); 4159 num_bytes = ALIGN(num_bytes, root->sectorsize);
@@ -4018,18 +4169,29 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4018 4169
4019 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); 4170 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
4020 } 4171 }
4172 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
4021 spin_unlock(&BTRFS_I(inode)->lock); 4173 spin_unlock(&BTRFS_I(inode)->lock);
4022 4174
4023 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4175 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
4024 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
4025 if (ret) { 4176 if (ret) {
4177 u64 to_free = 0;
4026 unsigned dropped; 4178 unsigned dropped;
4179
4180 spin_lock(&BTRFS_I(inode)->lock);
4181 dropped = drop_outstanding_extent(inode);
4182 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4183 spin_unlock(&BTRFS_I(inode)->lock);
4184 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4185
4027 /* 4186 /*
4028 * We don't need the return value since our reservation failed, 4187 * Somebody could have come in and twiddled with the
4029 * we just need to clean up our counter. 4188 * reservation, so if we have to free more than we would have
4189 * reserved from this reservation go ahead and release those
4190 * bytes.
4030 */ 4191 */
4031 dropped = drop_outstanding_extent(inode); 4192 to_free -= to_reserve;
4032 WARN_ON(dropped > 1); 4193 if (to_free)
4194 btrfs_block_rsv_release(root, block_rsv, to_free);
4033 return ret; 4195 return ret;
4034 } 4196 }
4035 4197
@@ -4038,6 +4200,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4038 return 0; 4200 return 0;
4039} 4201}
4040 4202
4203/**
4204 * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
4205 * @inode: the inode to release the reservation for
4206 * @num_bytes: the number of bytes we're releasing
4207 *
4208 * This will release the metadata reservation for an inode. This can be called
4209 * once we complete IO for a given set of bytes to release their metadata
4210 * reservations.
4211 */
4041void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) 4212void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4042{ 4213{
4043 struct btrfs_root *root = BTRFS_I(inode)->root; 4214 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -4045,9 +4216,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4045 unsigned dropped; 4216 unsigned dropped;
4046 4217
4047 num_bytes = ALIGN(num_bytes, root->sectorsize); 4218 num_bytes = ALIGN(num_bytes, root->sectorsize);
4219 spin_lock(&BTRFS_I(inode)->lock);
4048 dropped = drop_outstanding_extent(inode); 4220 dropped = drop_outstanding_extent(inode);
4049 4221
4050 to_free = calc_csum_metadata_size(inode, num_bytes); 4222 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4223 spin_unlock(&BTRFS_I(inode)->lock);
4051 if (dropped > 0) 4224 if (dropped > 0)
4052 to_free += btrfs_calc_trans_metadata_size(root, dropped); 4225 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4053 4226
@@ -4055,6 +4228,21 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4055 to_free); 4228 to_free);
4056} 4229}
4057 4230
4231/**
4232 * btrfs_delalloc_reserve_space - reserve data and metadata space for delalloc
4233 * @inode: inode we're writing to
4234 * @num_bytes: the number of bytes we want to allocate
4235 *
4236 * This will do the following things
4237 *
4238 * o reserve space in the data space info for num_bytes
4239 * o reserve space in the metadata space info based on number of outstanding
4240 * extents and how much csums will be needed
4241 * o add to the inodes ->delalloc_bytes
4242 * o add it to the fs_info's delalloc inodes list.
4243 *
4244 * This will return 0 for success and -ENOSPC if there is no space left.
4245 */
4058int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) 4246int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
4059{ 4247{
4060 int ret; 4248 int ret;
@@ -4072,6 +4260,19 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
4072 return 0; 4260 return 0;
4073} 4261}
4074 4262
4263/**
4264 * btrfs_delalloc_release_space - release data and metadata space for delalloc
4265 * @inode: inode we're releasing space for
4266 * @num_bytes: the number of bytes we want to free up
4267 *
4268 * This must be matched with a call to btrfs_delalloc_reserve_space. This is
4269 * called in the case that we don't need the metadata AND data reservations
4270 * anymore. So if there is an error or we insert an inline extent.
4271 *
4272 * This function will release the metadata space that was not used and will
4273 * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
4274 * list if there are no delalloc bytes left.
4275 */
4075void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) 4276void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
4076{ 4277{
4077 btrfs_delalloc_release_metadata(inode, num_bytes); 4278 btrfs_delalloc_release_metadata(inode, num_bytes);
@@ -4091,12 +4292,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4091 4292
4092 /* block accounting for super block */ 4293 /* block accounting for super block */
4093 spin_lock(&info->delalloc_lock); 4294 spin_lock(&info->delalloc_lock);
4094 old_val = btrfs_super_bytes_used(&info->super_copy); 4295 old_val = btrfs_super_bytes_used(info->super_copy);
4095 if (alloc) 4296 if (alloc)
4096 old_val += num_bytes; 4297 old_val += num_bytes;
4097 else 4298 else
4098 old_val -= num_bytes; 4299 old_val -= num_bytes;
4099 btrfs_set_super_bytes_used(&info->super_copy, old_val); 4300 btrfs_set_super_bytes_used(info->super_copy, old_val);
4100 spin_unlock(&info->delalloc_lock); 4301 spin_unlock(&info->delalloc_lock);
4101 4302
4102 while (total) { 4303 while (total) {
@@ -4124,7 +4325,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4124 spin_lock(&cache->space_info->lock); 4325 spin_lock(&cache->space_info->lock);
4125 spin_lock(&cache->lock); 4326 spin_lock(&cache->lock);
4126 4327
4127 if (btrfs_super_cache_generation(&info->super_copy) != 0 && 4328 if (btrfs_test_opt(root, SPACE_CACHE) &&
4128 cache->disk_cache_state < BTRFS_DC_CLEAR) 4329 cache->disk_cache_state < BTRFS_DC_CLEAR)
4129 cache->disk_cache_state = BTRFS_DC_CLEAR; 4330 cache->disk_cache_state = BTRFS_DC_CLEAR;
4130 4331
@@ -4136,7 +4337,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4136 btrfs_set_block_group_used(&cache->item, old_val); 4337 btrfs_set_block_group_used(&cache->item, old_val);
4137 cache->reserved -= num_bytes; 4338 cache->reserved -= num_bytes;
4138 cache->space_info->bytes_reserved -= num_bytes; 4339 cache->space_info->bytes_reserved -= num_bytes;
4139 cache->space_info->reservation_progress++;
4140 cache->space_info->bytes_used += num_bytes; 4340 cache->space_info->bytes_used += num_bytes;
4141 cache->space_info->disk_used += num_bytes * factor; 4341 cache->space_info->disk_used += num_bytes * factor;
4142 spin_unlock(&cache->lock); 4342 spin_unlock(&cache->lock);
@@ -4188,7 +4388,6 @@ static int pin_down_extent(struct btrfs_root *root,
4188 if (reserved) { 4388 if (reserved) {
4189 cache->reserved -= num_bytes; 4389 cache->reserved -= num_bytes;
4190 cache->space_info->bytes_reserved -= num_bytes; 4390 cache->space_info->bytes_reserved -= num_bytes;
4191 cache->space_info->reservation_progress++;
4192 } 4391 }
4193 spin_unlock(&cache->lock); 4392 spin_unlock(&cache->lock);
4194 spin_unlock(&cache->space_info->lock); 4393 spin_unlock(&cache->space_info->lock);
@@ -4216,45 +4415,82 @@ int btrfs_pin_extent(struct btrfs_root *root,
4216} 4415}
4217 4416
4218/* 4417/*
4219 * update size of reserved extents. this function may return -EAGAIN 4418 * this function must be called within transaction
4220 * if 'reserve' is true or 'sinfo' is false. 4419 */
4420int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
4421 struct btrfs_root *root,
4422 u64 bytenr, u64 num_bytes)
4423{
4424 struct btrfs_block_group_cache *cache;
4425
4426 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
4427 BUG_ON(!cache);
4428
4429 /*
4430 * pull in the free space cache (if any) so that our pin
4431 * removes the free space from the cache. We have load_only set
4432 * to one because the slow code to read in the free extents does check
4433 * the pinned extents.
4434 */
4435 cache_block_group(cache, trans, root, 1);
4436
4437 pin_down_extent(root, cache, bytenr, num_bytes, 0);
4438
4439 /* remove us from the free space cache (if we're there at all) */
4440 btrfs_remove_free_space(cache, bytenr, num_bytes);
4441 btrfs_put_block_group(cache);
4442 return 0;
4443}
4444
4445/**
4446 * btrfs_update_reserved_bytes - update the block_group and space info counters
4447 * @cache: The cache we are manipulating
4448 * @num_bytes: The number of bytes in question
4449 * @reserve: One of the reservation enums
4450 *
4451 * This is called by the allocator when it reserves space, or by somebody who is
4452 * freeing space that was never actually used on disk. For example if you
4453 * reserve some space for a new leaf in transaction A and before transaction A
4454 * commits you free that leaf, you call this with reserve set to 0 in order to
4455 * clear the reservation.
4456 *
4457 * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
4458 * ENOSPC accounting. For data we handle the reservation through clearing the
4459 * delalloc bits in the io_tree. We have to do this since we could end up
4460 * allocating less disk space for the amount of data we have reserved in the
4461 * case of compression.
4462 *
4463 * If this is a reservation and the block group has become read only we cannot
4464 * make the reservation and return -EAGAIN, otherwise this function always
4465 * succeeds.
4221 */ 4466 */
4222int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 4467static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
4223 u64 num_bytes, int reserve, int sinfo) 4468 u64 num_bytes, int reserve)
4224{ 4469{
4470 struct btrfs_space_info *space_info = cache->space_info;
4225 int ret = 0; 4471 int ret = 0;
4226 if (sinfo) { 4472 spin_lock(&space_info->lock);
4227 struct btrfs_space_info *space_info = cache->space_info; 4473 spin_lock(&cache->lock);
4228 spin_lock(&space_info->lock); 4474 if (reserve != RESERVE_FREE) {
4229 spin_lock(&cache->lock);
4230 if (reserve) {
4231 if (cache->ro) {
4232 ret = -EAGAIN;
4233 } else {
4234 cache->reserved += num_bytes;
4235 space_info->bytes_reserved += num_bytes;
4236 }
4237 } else {
4238 if (cache->ro)
4239 space_info->bytes_readonly += num_bytes;
4240 cache->reserved -= num_bytes;
4241 space_info->bytes_reserved -= num_bytes;
4242 space_info->reservation_progress++;
4243 }
4244 spin_unlock(&cache->lock);
4245 spin_unlock(&space_info->lock);
4246 } else {
4247 spin_lock(&cache->lock);
4248 if (cache->ro) { 4475 if (cache->ro) {
4249 ret = -EAGAIN; 4476 ret = -EAGAIN;
4250 } else { 4477 } else {
4251 if (reserve) 4478 cache->reserved += num_bytes;
4252 cache->reserved += num_bytes; 4479 space_info->bytes_reserved += num_bytes;
4253 else 4480 if (reserve == RESERVE_ALLOC) {
4254 cache->reserved -= num_bytes; 4481 BUG_ON(space_info->bytes_may_use < num_bytes);
4482 space_info->bytes_may_use -= num_bytes;
4483 }
4255 } 4484 }
4256 spin_unlock(&cache->lock); 4485 } else {
4486 if (cache->ro)
4487 space_info->bytes_readonly += num_bytes;
4488 cache->reserved -= num_bytes;
4489 space_info->bytes_reserved -= num_bytes;
4490 space_info->reservation_progress++;
4257 } 4491 }
4492 spin_unlock(&cache->lock);
4493 spin_unlock(&space_info->lock);
4258 return ret; 4494 return ret;
4259} 4495}
4260 4496
@@ -4320,13 +4556,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
4320 spin_lock(&cache->lock); 4556 spin_lock(&cache->lock);
4321 cache->pinned -= len; 4557 cache->pinned -= len;
4322 cache->space_info->bytes_pinned -= len; 4558 cache->space_info->bytes_pinned -= len;
4323 if (cache->ro) { 4559 if (cache->ro)
4324 cache->space_info->bytes_readonly += len; 4560 cache->space_info->bytes_readonly += len;
4325 } else if (cache->reserved_pinned > 0) {
4326 len = min(len, cache->reserved_pinned);
4327 cache->reserved_pinned -= len;
4328 cache->space_info->bytes_reserved += len;
4329 }
4330 spin_unlock(&cache->lock); 4561 spin_unlock(&cache->lock);
4331 spin_unlock(&cache->space_info->lock); 4562 spin_unlock(&cache->space_info->lock);
4332 } 4563 }
@@ -4341,11 +4572,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
4341{ 4572{
4342 struct btrfs_fs_info *fs_info = root->fs_info; 4573 struct btrfs_fs_info *fs_info = root->fs_info;
4343 struct extent_io_tree *unpin; 4574 struct extent_io_tree *unpin;
4344 struct btrfs_block_rsv *block_rsv;
4345 struct btrfs_block_rsv *next_rsv;
4346 u64 start; 4575 u64 start;
4347 u64 end; 4576 u64 end;
4348 int idx;
4349 int ret; 4577 int ret;
4350 4578
4351 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 4579 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
@@ -4368,30 +4596,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
4368 cond_resched(); 4596 cond_resched();
4369 } 4597 }
4370 4598
4371 mutex_lock(&fs_info->durable_block_rsv_mutex);
4372 list_for_each_entry_safe(block_rsv, next_rsv,
4373 &fs_info->durable_block_rsv_list, list) {
4374
4375 idx = trans->transid & 0x1;
4376 if (block_rsv->freed[idx] > 0) {
4377 block_rsv_add_bytes(block_rsv,
4378 block_rsv->freed[idx], 0);
4379 block_rsv->freed[idx] = 0;
4380 }
4381 if (atomic_read(&block_rsv->usage) == 0) {
4382 btrfs_block_rsv_release(root, block_rsv, (u64)-1);
4383
4384 if (block_rsv->freed[0] == 0 &&
4385 block_rsv->freed[1] == 0) {
4386 list_del_init(&block_rsv->list);
4387 kfree(block_rsv);
4388 }
4389 } else {
4390 btrfs_block_rsv_release(root, block_rsv, 0);
4391 }
4392 }
4393 mutex_unlock(&fs_info->durable_block_rsv_mutex);
4394
4395 return 0; 4599 return 0;
4396} 4600}
4397 4601
@@ -4669,7 +4873,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4669 struct extent_buffer *buf, 4873 struct extent_buffer *buf,
4670 u64 parent, int last_ref) 4874 u64 parent, int last_ref)
4671{ 4875{
4672 struct btrfs_block_rsv *block_rsv;
4673 struct btrfs_block_group_cache *cache = NULL; 4876 struct btrfs_block_group_cache *cache = NULL;
4674 int ret; 4877 int ret;
4675 4878
@@ -4684,64 +4887,24 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4684 if (!last_ref) 4887 if (!last_ref)
4685 return; 4888 return;
4686 4889
4687 block_rsv = get_block_rsv(trans, root);
4688 cache = btrfs_lookup_block_group(root->fs_info, buf->start); 4890 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
4689 if (block_rsv->space_info != cache->space_info)
4690 goto out;
4691 4891
4692 if (btrfs_header_generation(buf) == trans->transid) { 4892 if (btrfs_header_generation(buf) == trans->transid) {
4693 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 4893 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4694 ret = check_ref_cleanup(trans, root, buf->start); 4894 ret = check_ref_cleanup(trans, root, buf->start);
4695 if (!ret) 4895 if (!ret)
4696 goto pin; 4896 goto out;
4697 } 4897 }
4698 4898
4699 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 4899 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
4700 pin_down_extent(root, cache, buf->start, buf->len, 1); 4900 pin_down_extent(root, cache, buf->start, buf->len, 1);
4701 goto pin; 4901 goto out;
4702 } 4902 }
4703 4903
4704 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 4904 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
4705 4905
4706 btrfs_add_free_space(cache, buf->start, buf->len); 4906 btrfs_add_free_space(cache, buf->start, buf->len);
4707 ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); 4907 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
4708 if (ret == -EAGAIN) {
4709 /* block group became read-only */
4710 btrfs_update_reserved_bytes(cache, buf->len, 0, 1);
4711 goto out;
4712 }
4713
4714 ret = 1;
4715 spin_lock(&block_rsv->lock);
4716 if (block_rsv->reserved < block_rsv->size) {
4717 block_rsv->reserved += buf->len;
4718 ret = 0;
4719 }
4720 spin_unlock(&block_rsv->lock);
4721
4722 if (ret) {
4723 spin_lock(&cache->space_info->lock);
4724 cache->space_info->bytes_reserved -= buf->len;
4725 cache->space_info->reservation_progress++;
4726 spin_unlock(&cache->space_info->lock);
4727 }
4728 goto out;
4729 }
4730pin:
4731 if (block_rsv->durable && !cache->ro) {
4732 ret = 0;
4733 spin_lock(&cache->lock);
4734 if (!cache->ro) {
4735 cache->reserved_pinned += buf->len;
4736 ret = 1;
4737 }
4738 spin_unlock(&cache->lock);
4739
4740 if (ret) {
4741 spin_lock(&block_rsv->lock);
4742 block_rsv->freed[trans->transid & 0x1] += buf->len;
4743 spin_unlock(&block_rsv->lock);
4744 }
4745 } 4908 }
4746out: 4909out:
4747 /* 4910 /*
@@ -4884,10 +5047,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4884 int last_ptr_loop = 0; 5047 int last_ptr_loop = 0;
4885 int loop = 0; 5048 int loop = 0;
4886 int index = 0; 5049 int index = 0;
5050 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
5051 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
4887 bool found_uncached_bg = false; 5052 bool found_uncached_bg = false;
4888 bool failed_cluster_refill = false; 5053 bool failed_cluster_refill = false;
4889 bool failed_alloc = false; 5054 bool failed_alloc = false;
4890 bool use_cluster = true; 5055 bool use_cluster = true;
5056 bool have_caching_bg = false;
4891 u64 ideal_cache_percent = 0; 5057 u64 ideal_cache_percent = 0;
4892 u64 ideal_cache_offset = 0; 5058 u64 ideal_cache_offset = 0;
4893 5059
@@ -4970,6 +5136,7 @@ ideal_cache:
4970 } 5136 }
4971 } 5137 }
4972search: 5138search:
5139 have_caching_bg = false;
4973 down_read(&space_info->groups_sem); 5140 down_read(&space_info->groups_sem);
4974 list_for_each_entry(block_group, &space_info->block_groups[index], 5141 list_for_each_entry(block_group, &space_info->block_groups[index],
4975 list) { 5142 list) {
@@ -5178,6 +5345,8 @@ refill_cluster:
5178 failed_alloc = true; 5345 failed_alloc = true;
5179 goto have_block_group; 5346 goto have_block_group;
5180 } else if (!offset) { 5347 } else if (!offset) {
5348 if (!cached)
5349 have_caching_bg = true;
5181 goto loop; 5350 goto loop;
5182 } 5351 }
5183checks: 5352checks:
@@ -5203,8 +5372,8 @@ checks:
5203 search_start - offset); 5372 search_start - offset);
5204 BUG_ON(offset > search_start); 5373 BUG_ON(offset > search_start);
5205 5374
5206 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, 5375 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
5207 (data & BTRFS_BLOCK_GROUP_DATA)); 5376 alloc_type);
5208 if (ret == -EAGAIN) { 5377 if (ret == -EAGAIN) {
5209 btrfs_add_free_space(block_group, offset, num_bytes); 5378 btrfs_add_free_space(block_group, offset, num_bytes);
5210 goto loop; 5379 goto loop;
@@ -5228,6 +5397,9 @@ loop:
5228 } 5397 }
5229 up_read(&space_info->groups_sem); 5398 up_read(&space_info->groups_sem);
5230 5399
5400 if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
5401 goto search;
5402
5231 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) 5403 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
5232 goto search; 5404 goto search;
5233 5405
@@ -5326,7 +5498,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
5326 int index = 0; 5498 int index = 0;
5327 5499
5328 spin_lock(&info->lock); 5500 spin_lock(&info->lock);
5329 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 5501 printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n",
5502 (unsigned long long)info->flags,
5330 (unsigned long long)(info->total_bytes - info->bytes_used - 5503 (unsigned long long)(info->total_bytes - info->bytes_used -
5331 info->bytes_pinned - info->bytes_reserved - 5504 info->bytes_pinned - info->bytes_reserved -
5332 info->bytes_readonly), 5505 info->bytes_readonly),
@@ -5412,7 +5585,8 @@ again:
5412 return ret; 5585 return ret;
5413} 5586}
5414 5587
5415int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 5588static int __btrfs_free_reserved_extent(struct btrfs_root *root,
5589 u64 start, u64 len, int pin)
5416{ 5590{
5417 struct btrfs_block_group_cache *cache; 5591 struct btrfs_block_group_cache *cache;
5418 int ret = 0; 5592 int ret = 0;
@@ -5427,8 +5601,12 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
5427 if (btrfs_test_opt(root, DISCARD)) 5601 if (btrfs_test_opt(root, DISCARD))
5428 ret = btrfs_discard_extent(root, start, len, NULL); 5602 ret = btrfs_discard_extent(root, start, len, NULL);
5429 5603
5430 btrfs_add_free_space(cache, start, len); 5604 if (pin)
5431 btrfs_update_reserved_bytes(cache, len, 0, 1); 5605 pin_down_extent(root, cache, start, len, 1);
5606 else {
5607 btrfs_add_free_space(cache, start, len);
5608 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
5609 }
5432 btrfs_put_block_group(cache); 5610 btrfs_put_block_group(cache);
5433 5611
5434 trace_btrfs_reserved_extent_free(root, start, len); 5612 trace_btrfs_reserved_extent_free(root, start, len);
@@ -5436,6 +5614,18 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
5436 return ret; 5614 return ret;
5437} 5615}
5438 5616
5617int btrfs_free_reserved_extent(struct btrfs_root *root,
5618 u64 start, u64 len)
5619{
5620 return __btrfs_free_reserved_extent(root, start, len, 0);
5621}
5622
5623int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
5624 u64 start, u64 len)
5625{
5626 return __btrfs_free_reserved_extent(root, start, len, 1);
5627}
5628
5439static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 5629static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
5440 struct btrfs_root *root, 5630 struct btrfs_root *root,
5441 u64 parent, u64 root_objectid, 5631 u64 parent, u64 root_objectid,
@@ -5631,7 +5821,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5631 put_caching_control(caching_ctl); 5821 put_caching_control(caching_ctl);
5632 } 5822 }
5633 5823
5634 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); 5824 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
5825 RESERVE_ALLOC_NO_ACCOUNT);
5635 BUG_ON(ret); 5826 BUG_ON(ret);
5636 btrfs_put_block_group(block_group); 5827 btrfs_put_block_group(block_group);
5637 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 5828 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
@@ -5688,8 +5879,7 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5688 block_rsv = get_block_rsv(trans, root); 5879 block_rsv = get_block_rsv(trans, root);
5689 5880
5690 if (block_rsv->size == 0) { 5881 if (block_rsv->size == 0) {
5691 ret = reserve_metadata_bytes(trans, root, block_rsv, 5882 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
5692 blocksize, 0);
5693 /* 5883 /*
5694 * If we couldn't reserve metadata bytes try and use some from 5884 * If we couldn't reserve metadata bytes try and use some from
5695 * the global reserve. 5885 * the global reserve.
@@ -5709,13 +5899,15 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5709 if (!ret) 5899 if (!ret)
5710 return block_rsv; 5900 return block_rsv;
5711 if (ret) { 5901 if (ret) {
5712 WARN_ON(1); 5902 static DEFINE_RATELIMIT_STATE(_rs,
5713 ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, 5903 DEFAULT_RATELIMIT_INTERVAL,
5714 0); 5904 /*DEFAULT_RATELIMIT_BURST*/ 2);
5905 if (__ratelimit(&_rs)) {
5906 printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret);
5907 WARN_ON(1);
5908 }
5909 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
5715 if (!ret) { 5910 if (!ret) {
5716 spin_lock(&block_rsv->lock);
5717 block_rsv->size += blocksize;
5718 spin_unlock(&block_rsv->lock);
5719 return block_rsv; 5911 return block_rsv;
5720 } else if (ret && block_rsv != global_rsv) { 5912 } else if (ret && block_rsv != global_rsv) {
5721 ret = block_rsv_use_bytes(global_rsv, blocksize); 5913 ret = block_rsv_use_bytes(global_rsv, blocksize);
@@ -6593,12 +6785,9 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
6593 cache->bytes_super - btrfs_block_group_used(&cache->item); 6785 cache->bytes_super - btrfs_block_group_used(&cache->item);
6594 6786
6595 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + 6787 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
6596 sinfo->bytes_may_use + sinfo->bytes_readonly + 6788 sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
6597 cache->reserved_pinned + num_bytes + min_allocable_bytes <= 6789 min_allocable_bytes <= sinfo->total_bytes) {
6598 sinfo->total_bytes) {
6599 sinfo->bytes_readonly += num_bytes; 6790 sinfo->bytes_readonly += num_bytes;
6600 sinfo->bytes_reserved += cache->reserved_pinned;
6601 cache->reserved_pinned = 0;
6602 cache->ro = 1; 6791 cache->ro = 1;
6603 ret = 0; 6792 ret = 0;
6604 } 6793 }
@@ -6965,7 +7154,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
6965 struct btrfs_space_info, 7154 struct btrfs_space_info,
6966 list); 7155 list);
6967 if (space_info->bytes_pinned > 0 || 7156 if (space_info->bytes_pinned > 0 ||
6968 space_info->bytes_reserved > 0) { 7157 space_info->bytes_reserved > 0 ||
7158 space_info->bytes_may_use > 0) {
6969 WARN_ON(1); 7159 WARN_ON(1);
6970 dump_space_info(space_info, 0, 0); 7160 dump_space_info(space_info, 0, 0);
6971 } 7161 }
@@ -7007,14 +7197,12 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7007 return -ENOMEM; 7197 return -ENOMEM;
7008 path->reada = 1; 7198 path->reada = 1;
7009 7199
7010 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); 7200 cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
7011 if (cache_gen != 0 && 7201 if (btrfs_test_opt(root, SPACE_CACHE) &&
7012 btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) 7202 btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
7013 need_clear = 1; 7203 need_clear = 1;
7014 if (btrfs_test_opt(root, CLEAR_CACHE)) 7204 if (btrfs_test_opt(root, CLEAR_CACHE))
7015 need_clear = 1; 7205 need_clear = 1;
7016 if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
7017 printk(KERN_INFO "btrfs: disk space caching is enabled\n");
7018 7206
7019 while (1) { 7207 while (1) {
7020 ret = find_first_block_group(root, path, &key); 7208 ret = find_first_block_group(root, path, &key);
@@ -7253,7 +7441,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7253 goto out; 7441 goto out;
7254 } 7442 }
7255 7443
7256 inode = lookup_free_space_inode(root, block_group, path); 7444 inode = lookup_free_space_inode(tree_root, block_group, path);
7257 if (!IS_ERR(inode)) { 7445 if (!IS_ERR(inode)) {
7258 ret = btrfs_orphan_add(trans, inode); 7446 ret = btrfs_orphan_add(trans, inode);
7259 BUG_ON(ret); 7447 BUG_ON(ret);
@@ -7269,7 +7457,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7269 spin_unlock(&block_group->lock); 7457 spin_unlock(&block_group->lock);
7270 } 7458 }
7271 /* One for our lookup ref */ 7459 /* One for our lookup ref */
7272 iput(inode); 7460 btrfs_add_delayed_iput(inode);
7273 } 7461 }
7274 7462
7275 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 7463 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
@@ -7340,7 +7528,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
7340 int mixed = 0; 7528 int mixed = 0;
7341 int ret; 7529 int ret;
7342 7530
7343 disk_super = &fs_info->super_copy; 7531 disk_super = fs_info->super_copy;
7344 if (!btrfs_super_root(disk_super)) 7532 if (!btrfs_super_root(disk_super))
7345 return 1; 7533 return 1;
7346 7534