diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 848 |
1 files changed, 518 insertions, 330 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c9ee0e18bbdc..9879bd474632 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/rcupdate.h> | 23 | #include <linux/rcupdate.h> |
24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/ratelimit.h> | ||
26 | #include "compat.h" | 27 | #include "compat.h" |
27 | #include "hash.h" | 28 | #include "hash.h" |
28 | #include "ctree.h" | 29 | #include "ctree.h" |
@@ -52,6 +53,21 @@ enum { | |||
52 | CHUNK_ALLOC_LIMITED = 2, | 53 | CHUNK_ALLOC_LIMITED = 2, |
53 | }; | 54 | }; |
54 | 55 | ||
56 | /* | ||
57 | * Control how reservations are dealt with. | ||
58 | * | ||
59 | * RESERVE_FREE - freeing a reservation. | ||
60 | * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for | ||
61 | * ENOSPC accounting | ||
62 | * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update | ||
63 | * bytes_may_use as the ENOSPC accounting is done elsewhere | ||
64 | */ | ||
65 | enum { | ||
66 | RESERVE_FREE = 0, | ||
67 | RESERVE_ALLOC = 1, | ||
68 | RESERVE_ALLOC_NO_ACCOUNT = 2, | ||
69 | }; | ||
70 | |||
55 | static int update_block_group(struct btrfs_trans_handle *trans, | 71 | static int update_block_group(struct btrfs_trans_handle *trans, |
56 | struct btrfs_root *root, | 72 | struct btrfs_root *root, |
57 | u64 bytenr, u64 num_bytes, int alloc); | 73 | u64 bytenr, u64 num_bytes, int alloc); |
@@ -81,6 +97,8 @@ static int find_next_key(struct btrfs_path *path, int level, | |||
81 | struct btrfs_key *key); | 97 | struct btrfs_key *key); |
82 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 98 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
83 | int dump_block_groups); | 99 | int dump_block_groups); |
100 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
101 | u64 num_bytes, int reserve); | ||
84 | 102 | ||
85 | static noinline int | 103 | static noinline int |
86 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 104 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
@@ -104,7 +122,6 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache) | |||
104 | if (atomic_dec_and_test(&cache->count)) { | 122 | if (atomic_dec_and_test(&cache->count)) { |
105 | WARN_ON(cache->pinned > 0); | 123 | WARN_ON(cache->pinned > 0); |
106 | WARN_ON(cache->reserved > 0); | 124 | WARN_ON(cache->reserved > 0); |
107 | WARN_ON(cache->reserved_pinned > 0); | ||
108 | kfree(cache->free_space_ctl); | 125 | kfree(cache->free_space_ctl); |
109 | kfree(cache); | 126 | kfree(cache); |
110 | } | 127 | } |
@@ -465,7 +482,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
465 | * we likely hold important locks. | 482 | * we likely hold important locks. |
466 | */ | 483 | */ |
467 | if (trans && (!trans->transaction->in_commit) && | 484 | if (trans && (!trans->transaction->in_commit) && |
468 | (root && root != root->fs_info->tree_root)) { | 485 | (root && root != root->fs_info->tree_root) && |
486 | btrfs_test_opt(root, SPACE_CACHE)) { | ||
469 | spin_lock(&cache->lock); | 487 | spin_lock(&cache->lock); |
470 | if (cache->cached != BTRFS_CACHE_NO) { | 488 | if (cache->cached != BTRFS_CACHE_NO) { |
471 | spin_unlock(&cache->lock); | 489 | spin_unlock(&cache->lock); |
@@ -1770,18 +1788,18 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1770 | { | 1788 | { |
1771 | int ret; | 1789 | int ret; |
1772 | u64 discarded_bytes = 0; | 1790 | u64 discarded_bytes = 0; |
1773 | struct btrfs_multi_bio *multi = NULL; | 1791 | struct btrfs_bio *bbio = NULL; |
1774 | 1792 | ||
1775 | 1793 | ||
1776 | /* Tell the block device(s) that the sectors can be discarded */ | 1794 | /* Tell the block device(s) that the sectors can be discarded */ |
1777 | ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, | 1795 | ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, |
1778 | bytenr, &num_bytes, &multi, 0); | 1796 | bytenr, &num_bytes, &bbio, 0); |
1779 | if (!ret) { | 1797 | if (!ret) { |
1780 | struct btrfs_bio_stripe *stripe = multi->stripes; | 1798 | struct btrfs_bio_stripe *stripe = bbio->stripes; |
1781 | int i; | 1799 | int i; |
1782 | 1800 | ||
1783 | 1801 | ||
1784 | for (i = 0; i < multi->num_stripes; i++, stripe++) { | 1802 | for (i = 0; i < bbio->num_stripes; i++, stripe++) { |
1785 | if (!stripe->dev->can_discard) | 1803 | if (!stripe->dev->can_discard) |
1786 | continue; | 1804 | continue; |
1787 | 1805 | ||
@@ -1800,7 +1818,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1800 | */ | 1818 | */ |
1801 | ret = 0; | 1819 | ret = 0; |
1802 | } | 1820 | } |
1803 | kfree(multi); | 1821 | kfree(bbio); |
1804 | } | 1822 | } |
1805 | 1823 | ||
1806 | if (actual_bytes) | 1824 | if (actual_bytes) |
@@ -2700,6 +2718,13 @@ again: | |||
2700 | goto again; | 2718 | goto again; |
2701 | } | 2719 | } |
2702 | 2720 | ||
2721 | /* We've already setup this transaction, go ahead and exit */ | ||
2722 | if (block_group->cache_generation == trans->transid && | ||
2723 | i_size_read(inode)) { | ||
2724 | dcs = BTRFS_DC_SETUP; | ||
2725 | goto out_put; | ||
2726 | } | ||
2727 | |||
2703 | /* | 2728 | /* |
2704 | * We want to set the generation to 0, that way if anything goes wrong | 2729 | * We want to set the generation to 0, that way if anything goes wrong |
2705 | * from here on out we know not to trust this cache when we load up next | 2730 | * from here on out we know not to trust this cache when we load up next |
@@ -2749,12 +2774,15 @@ again: | |||
2749 | if (!ret) | 2774 | if (!ret) |
2750 | dcs = BTRFS_DC_SETUP; | 2775 | dcs = BTRFS_DC_SETUP; |
2751 | btrfs_free_reserved_data_space(inode, num_pages); | 2776 | btrfs_free_reserved_data_space(inode, num_pages); |
2777 | |||
2752 | out_put: | 2778 | out_put: |
2753 | iput(inode); | 2779 | iput(inode); |
2754 | out_free: | 2780 | out_free: |
2755 | btrfs_release_path(path); | 2781 | btrfs_release_path(path); |
2756 | out: | 2782 | out: |
2757 | spin_lock(&block_group->lock); | 2783 | spin_lock(&block_group->lock); |
2784 | if (!ret) | ||
2785 | block_group->cache_generation = trans->transid; | ||
2758 | block_group->disk_cache_state = dcs; | 2786 | block_group->disk_cache_state = dcs; |
2759 | spin_unlock(&block_group->lock); | 2787 | spin_unlock(&block_group->lock); |
2760 | 2788 | ||
@@ -3122,16 +3150,13 @@ commit_trans: | |||
3122 | return -ENOSPC; | 3150 | return -ENOSPC; |
3123 | } | 3151 | } |
3124 | data_sinfo->bytes_may_use += bytes; | 3152 | data_sinfo->bytes_may_use += bytes; |
3125 | BTRFS_I(inode)->reserved_bytes += bytes; | ||
3126 | spin_unlock(&data_sinfo->lock); | 3153 | spin_unlock(&data_sinfo->lock); |
3127 | 3154 | ||
3128 | return 0; | 3155 | return 0; |
3129 | } | 3156 | } |
3130 | 3157 | ||
3131 | /* | 3158 | /* |
3132 | * called when we are clearing an delalloc extent from the | 3159 | * Called if we need to clear a data reservation for this inode. |
3133 | * inode's io_tree or there was an error for whatever reason | ||
3134 | * after calling btrfs_check_data_free_space | ||
3135 | */ | 3160 | */ |
3136 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | 3161 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) |
3137 | { | 3162 | { |
@@ -3144,7 +3169,6 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | |||
3144 | data_sinfo = BTRFS_I(inode)->space_info; | 3169 | data_sinfo = BTRFS_I(inode)->space_info; |
3145 | spin_lock(&data_sinfo->lock); | 3170 | spin_lock(&data_sinfo->lock); |
3146 | data_sinfo->bytes_may_use -= bytes; | 3171 | data_sinfo->bytes_may_use -= bytes; |
3147 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
3148 | spin_unlock(&data_sinfo->lock); | 3172 | spin_unlock(&data_sinfo->lock); |
3149 | } | 3173 | } |
3150 | 3174 | ||
@@ -3165,6 +3189,7 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
3165 | struct btrfs_space_info *sinfo, u64 alloc_bytes, | 3189 | struct btrfs_space_info *sinfo, u64 alloc_bytes, |
3166 | int force) | 3190 | int force) |
3167 | { | 3191 | { |
3192 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
3168 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | 3193 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; |
3169 | u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; | 3194 | u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; |
3170 | u64 thresh; | 3195 | u64 thresh; |
@@ -3173,11 +3198,18 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
3173 | return 1; | 3198 | return 1; |
3174 | 3199 | ||
3175 | /* | 3200 | /* |
3201 | * We need to take into account the global rsv because for all intents | ||
3202 | * and purposes it's used space. Don't worry about locking the | ||
3203 | * global_rsv, it doesn't change except when the transaction commits. | ||
3204 | */ | ||
3205 | num_allocated += global_rsv->size; | ||
3206 | |||
3207 | /* | ||
3176 | * in limited mode, we want to have some free space up to | 3208 | * in limited mode, we want to have some free space up to |
3177 | * about 1% of the FS size. | 3209 | * about 1% of the FS size. |
3178 | */ | 3210 | */ |
3179 | if (force == CHUNK_ALLOC_LIMITED) { | 3211 | if (force == CHUNK_ALLOC_LIMITED) { |
3180 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | 3212 | thresh = btrfs_super_total_bytes(root->fs_info->super_copy); |
3181 | thresh = max_t(u64, 64 * 1024 * 1024, | 3213 | thresh = max_t(u64, 64 * 1024 * 1024, |
3182 | div_factor_fine(thresh, 1)); | 3214 | div_factor_fine(thresh, 1)); |
3183 | 3215 | ||
@@ -3199,7 +3231,7 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
3199 | if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) | 3231 | if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) |
3200 | return 0; | 3232 | return 0; |
3201 | 3233 | ||
3202 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | 3234 | thresh = btrfs_super_total_bytes(root->fs_info->super_copy); |
3203 | 3235 | ||
3204 | /* 256MB or 5% of the FS */ | 3236 | /* 256MB or 5% of the FS */ |
3205 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); | 3237 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); |
@@ -3302,24 +3334,26 @@ out: | |||
3302 | /* | 3334 | /* |
3303 | * shrink metadata reservation for delalloc | 3335 | * shrink metadata reservation for delalloc |
3304 | */ | 3336 | */ |
3305 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | 3337 | static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, |
3306 | struct btrfs_root *root, u64 to_reclaim, int sync) | 3338 | bool wait_ordered) |
3307 | { | 3339 | { |
3308 | struct btrfs_block_rsv *block_rsv; | 3340 | struct btrfs_block_rsv *block_rsv; |
3309 | struct btrfs_space_info *space_info; | 3341 | struct btrfs_space_info *space_info; |
3342 | struct btrfs_trans_handle *trans; | ||
3310 | u64 reserved; | 3343 | u64 reserved; |
3311 | u64 max_reclaim; | 3344 | u64 max_reclaim; |
3312 | u64 reclaimed = 0; | 3345 | u64 reclaimed = 0; |
3313 | long time_left; | 3346 | long time_left; |
3314 | int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; | 3347 | unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; |
3315 | int loops = 0; | 3348 | int loops = 0; |
3316 | unsigned long progress; | 3349 | unsigned long progress; |
3317 | 3350 | ||
3351 | trans = (struct btrfs_trans_handle *)current->journal_info; | ||
3318 | block_rsv = &root->fs_info->delalloc_block_rsv; | 3352 | block_rsv = &root->fs_info->delalloc_block_rsv; |
3319 | space_info = block_rsv->space_info; | 3353 | space_info = block_rsv->space_info; |
3320 | 3354 | ||
3321 | smp_mb(); | 3355 | smp_mb(); |
3322 | reserved = space_info->bytes_reserved; | 3356 | reserved = space_info->bytes_may_use; |
3323 | progress = space_info->reservation_progress; | 3357 | progress = space_info->reservation_progress; |
3324 | 3358 | ||
3325 | if (reserved == 0) | 3359 | if (reserved == 0) |
@@ -3334,7 +3368,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3334 | } | 3368 | } |
3335 | 3369 | ||
3336 | max_reclaim = min(reserved, to_reclaim); | 3370 | max_reclaim = min(reserved, to_reclaim); |
3337 | 3371 | nr_pages = max_t(unsigned long, nr_pages, | |
3372 | max_reclaim >> PAGE_CACHE_SHIFT); | ||
3338 | while (loops < 1024) { | 3373 | while (loops < 1024) { |
3339 | /* have the flusher threads jump in and do some IO */ | 3374 | /* have the flusher threads jump in and do some IO */ |
3340 | smp_mb(); | 3375 | smp_mb(); |
@@ -3344,9 +3379,9 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3344 | WB_REASON_FS_FREE_SPACE); | 3379 | WB_REASON_FS_FREE_SPACE); |
3345 | 3380 | ||
3346 | spin_lock(&space_info->lock); | 3381 | spin_lock(&space_info->lock); |
3347 | if (reserved > space_info->bytes_reserved) | 3382 | if (reserved > space_info->bytes_may_use) |
3348 | reclaimed += reserved - space_info->bytes_reserved; | 3383 | reclaimed += reserved - space_info->bytes_may_use; |
3349 | reserved = space_info->bytes_reserved; | 3384 | reserved = space_info->bytes_may_use; |
3350 | spin_unlock(&space_info->lock); | 3385 | spin_unlock(&space_info->lock); |
3351 | 3386 | ||
3352 | loops++; | 3387 | loops++; |
@@ -3357,11 +3392,15 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3357 | if (trans && trans->transaction->blocked) | 3392 | if (trans && trans->transaction->blocked) |
3358 | return -EAGAIN; | 3393 | return -EAGAIN; |
3359 | 3394 | ||
3360 | time_left = schedule_timeout_interruptible(1); | 3395 | if (wait_ordered && !trans) { |
3396 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3397 | } else { | ||
3398 | time_left = schedule_timeout_interruptible(1); | ||
3361 | 3399 | ||
3362 | /* We were interrupted, exit */ | 3400 | /* We were interrupted, exit */ |
3363 | if (time_left) | 3401 | if (time_left) |
3364 | break; | 3402 | break; |
3403 | } | ||
3365 | 3404 | ||
3366 | /* we've kicked the IO a few times, if anything has been freed, | 3405 | /* we've kicked the IO a few times, if anything has been freed, |
3367 | * exit. There is no sense in looping here for a long time | 3406 | * exit. There is no sense in looping here for a long time |
@@ -3376,34 +3415,90 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3376 | } | 3415 | } |
3377 | 3416 | ||
3378 | } | 3417 | } |
3379 | if (reclaimed >= to_reclaim && !trans) | 3418 | |
3380 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3381 | return reclaimed >= to_reclaim; | 3419 | return reclaimed >= to_reclaim; |
3382 | } | 3420 | } |
3383 | 3421 | ||
3384 | /* | 3422 | /** |
3385 | * Retries tells us how many times we've called reserve_metadata_bytes. The | 3423 | * maybe_commit_transaction - possibly commit the transaction if its ok to |
3386 | * idea is if this is the first call (retries == 0) then we will add to our | 3424 | * @root - the root we're allocating for |
3387 | * reserved count if we can't make the allocation in order to hold our place | 3425 | * @bytes - the number of bytes we want to reserve |
3388 | * while we go and try and free up space. That way for retries > 1 we don't try | 3426 | * @force - force the commit |
3389 | * and add space, we just check to see if the amount of unused space is >= the | ||
3390 | * total space, meaning that our reservation is valid. | ||
3391 | * | 3427 | * |
3392 | * However if we don't intend to retry this reservation, pass -1 as retries so | 3428 | * This will check to make sure that committing the transaction will actually |
3393 | * that it short circuits this logic. | 3429 | * get us somewhere and then commit the transaction if it does. Otherwise it |
3430 | * will return -ENOSPC. | ||
3394 | */ | 3431 | */ |
3395 | static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, | 3432 | static int may_commit_transaction(struct btrfs_root *root, |
3396 | struct btrfs_root *root, | 3433 | struct btrfs_space_info *space_info, |
3434 | u64 bytes, int force) | ||
3435 | { | ||
3436 | struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv; | ||
3437 | struct btrfs_trans_handle *trans; | ||
3438 | |||
3439 | trans = (struct btrfs_trans_handle *)current->journal_info; | ||
3440 | if (trans) | ||
3441 | return -EAGAIN; | ||
3442 | |||
3443 | if (force) | ||
3444 | goto commit; | ||
3445 | |||
3446 | /* See if there is enough pinned space to make this reservation */ | ||
3447 | spin_lock(&space_info->lock); | ||
3448 | if (space_info->bytes_pinned >= bytes) { | ||
3449 | spin_unlock(&space_info->lock); | ||
3450 | goto commit; | ||
3451 | } | ||
3452 | spin_unlock(&space_info->lock); | ||
3453 | |||
3454 | /* | ||
3455 | * See if there is some space in the delayed insertion reservation for | ||
3456 | * this reservation. | ||
3457 | */ | ||
3458 | if (space_info != delayed_rsv->space_info) | ||
3459 | return -ENOSPC; | ||
3460 | |||
3461 | spin_lock(&delayed_rsv->lock); | ||
3462 | if (delayed_rsv->size < bytes) { | ||
3463 | spin_unlock(&delayed_rsv->lock); | ||
3464 | return -ENOSPC; | ||
3465 | } | ||
3466 | spin_unlock(&delayed_rsv->lock); | ||
3467 | |||
3468 | commit: | ||
3469 | trans = btrfs_join_transaction(root); | ||
3470 | if (IS_ERR(trans)) | ||
3471 | return -ENOSPC; | ||
3472 | |||
3473 | return btrfs_commit_transaction(trans, root); | ||
3474 | } | ||
3475 | |||
3476 | /** | ||
3477 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | ||
3478 | * @root - the root we're allocating for | ||
3479 | * @block_rsv - the block_rsv we're allocating for | ||
3480 | * @orig_bytes - the number of bytes we want | ||
3481 | * @flush - wether or not we can flush to make our reservation | ||
3482 | * | ||
3483 | * This will reserve orgi_bytes number of bytes from the space info associated | ||
3484 | * with the block_rsv. If there is not enough space it will make an attempt to | ||
3485 | * flush out space to make room. It will do this by flushing delalloc if | ||
3486 | * possible or committing the transaction. If flush is 0 then no attempts to | ||
3487 | * regain reservations will be made and this will fail if there is not enough | ||
3488 | * space already. | ||
3489 | */ | ||
3490 | static int reserve_metadata_bytes(struct btrfs_root *root, | ||
3397 | struct btrfs_block_rsv *block_rsv, | 3491 | struct btrfs_block_rsv *block_rsv, |
3398 | u64 orig_bytes, int flush) | 3492 | u64 orig_bytes, int flush) |
3399 | { | 3493 | { |
3400 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3494 | struct btrfs_space_info *space_info = block_rsv->space_info; |
3401 | u64 unused; | 3495 | u64 used; |
3402 | u64 num_bytes = orig_bytes; | 3496 | u64 num_bytes = orig_bytes; |
3403 | int retries = 0; | 3497 | int retries = 0; |
3404 | int ret = 0; | 3498 | int ret = 0; |
3405 | bool committed = false; | 3499 | bool committed = false; |
3406 | bool flushing = false; | 3500 | bool flushing = false; |
3501 | bool wait_ordered = false; | ||
3407 | 3502 | ||
3408 | again: | 3503 | again: |
3409 | ret = 0; | 3504 | ret = 0; |
@@ -3420,7 +3515,7 @@ again: | |||
3420 | * deadlock since we are waiting for the flusher to finish, but | 3515 | * deadlock since we are waiting for the flusher to finish, but |
3421 | * hold the current transaction open. | 3516 | * hold the current transaction open. |
3422 | */ | 3517 | */ |
3423 | if (trans) | 3518 | if (current->journal_info) |
3424 | return -EAGAIN; | 3519 | return -EAGAIN; |
3425 | ret = wait_event_interruptible(space_info->wait, | 3520 | ret = wait_event_interruptible(space_info->wait, |
3426 | !space_info->flush); | 3521 | !space_info->flush); |
@@ -3432,9 +3527,9 @@ again: | |||
3432 | } | 3527 | } |
3433 | 3528 | ||
3434 | ret = -ENOSPC; | 3529 | ret = -ENOSPC; |
3435 | unused = space_info->bytes_used + space_info->bytes_reserved + | 3530 | used = space_info->bytes_used + space_info->bytes_reserved + |
3436 | space_info->bytes_pinned + space_info->bytes_readonly + | 3531 | space_info->bytes_pinned + space_info->bytes_readonly + |
3437 | space_info->bytes_may_use; | 3532 | space_info->bytes_may_use; |
3438 | 3533 | ||
3439 | /* | 3534 | /* |
3440 | * The idea here is that we've not already over-reserved the block group | 3535 | * The idea here is that we've not already over-reserved the block group |
@@ -3443,10 +3538,9 @@ again: | |||
3443 | * lets start flushing stuff first and then come back and try to make | 3538 | * lets start flushing stuff first and then come back and try to make |
3444 | * our reservation. | 3539 | * our reservation. |
3445 | */ | 3540 | */ |
3446 | if (unused <= space_info->total_bytes) { | 3541 | if (used <= space_info->total_bytes) { |
3447 | unused = space_info->total_bytes - unused; | 3542 | if (used + orig_bytes <= space_info->total_bytes) { |
3448 | if (unused >= num_bytes) { | 3543 | space_info->bytes_may_use += orig_bytes; |
3449 | space_info->bytes_reserved += orig_bytes; | ||
3450 | ret = 0; | 3544 | ret = 0; |
3451 | } else { | 3545 | } else { |
3452 | /* | 3546 | /* |
@@ -3462,10 +3556,64 @@ again: | |||
3462 | * amount plus the amount of bytes that we need for this | 3556 | * amount plus the amount of bytes that we need for this |
3463 | * reservation. | 3557 | * reservation. |
3464 | */ | 3558 | */ |
3465 | num_bytes = unused - space_info->total_bytes + | 3559 | wait_ordered = true; |
3560 | num_bytes = used - space_info->total_bytes + | ||
3466 | (orig_bytes * (retries + 1)); | 3561 | (orig_bytes * (retries + 1)); |
3467 | } | 3562 | } |
3468 | 3563 | ||
3564 | if (ret) { | ||
3565 | u64 profile = btrfs_get_alloc_profile(root, 0); | ||
3566 | u64 avail; | ||
3567 | |||
3568 | /* | ||
3569 | * If we have a lot of space that's pinned, don't bother doing | ||
3570 | * the overcommit dance yet and just commit the transaction. | ||
3571 | */ | ||
3572 | avail = (space_info->total_bytes - space_info->bytes_used) * 8; | ||
3573 | do_div(avail, 10); | ||
3574 | if (space_info->bytes_pinned >= avail && flush && !committed) { | ||
3575 | space_info->flush = 1; | ||
3576 | flushing = true; | ||
3577 | spin_unlock(&space_info->lock); | ||
3578 | ret = may_commit_transaction(root, space_info, | ||
3579 | orig_bytes, 1); | ||
3580 | if (ret) | ||
3581 | goto out; | ||
3582 | committed = true; | ||
3583 | goto again; | ||
3584 | } | ||
3585 | |||
3586 | spin_lock(&root->fs_info->free_chunk_lock); | ||
3587 | avail = root->fs_info->free_chunk_space; | ||
3588 | |||
3589 | /* | ||
3590 | * If we have dup, raid1 or raid10 then only half of the free | ||
3591 | * space is actually useable. | ||
3592 | */ | ||
3593 | if (profile & (BTRFS_BLOCK_GROUP_DUP | | ||
3594 | BTRFS_BLOCK_GROUP_RAID1 | | ||
3595 | BTRFS_BLOCK_GROUP_RAID10)) | ||
3596 | avail >>= 1; | ||
3597 | |||
3598 | /* | ||
3599 | * If we aren't flushing don't let us overcommit too much, say | ||
3600 | * 1/8th of the space. If we can flush, let it overcommit up to | ||
3601 | * 1/2 of the space. | ||
3602 | */ | ||
3603 | if (flush) | ||
3604 | avail >>= 3; | ||
3605 | else | ||
3606 | avail >>= 1; | ||
3607 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
3608 | |||
3609 | if (used + num_bytes < space_info->total_bytes + avail) { | ||
3610 | space_info->bytes_may_use += orig_bytes; | ||
3611 | ret = 0; | ||
3612 | } else { | ||
3613 | wait_ordered = true; | ||
3614 | } | ||
3615 | } | ||
3616 | |||
3469 | /* | 3617 | /* |
3470 | * Couldn't make our reservation, save our place so while we're trying | 3618 | * Couldn't make our reservation, save our place so while we're trying |
3471 | * to reclaim space we can actually use it instead of somebody else | 3619 | * to reclaim space we can actually use it instead of somebody else |
@@ -3485,7 +3633,7 @@ again: | |||
3485 | * We do synchronous shrinking since we don't actually unreserve | 3633 | * We do synchronous shrinking since we don't actually unreserve |
3486 | * metadata until after the IO is completed. | 3634 | * metadata until after the IO is completed. |
3487 | */ | 3635 | */ |
3488 | ret = shrink_delalloc(trans, root, num_bytes, 1); | 3636 | ret = shrink_delalloc(root, num_bytes, wait_ordered); |
3489 | if (ret < 0) | 3637 | if (ret < 0) |
3490 | goto out; | 3638 | goto out; |
3491 | 3639 | ||
@@ -3497,35 +3645,17 @@ again: | |||
3497 | * so go back around and try again. | 3645 | * so go back around and try again. |
3498 | */ | 3646 | */ |
3499 | if (retries < 2) { | 3647 | if (retries < 2) { |
3648 | wait_ordered = true; | ||
3500 | retries++; | 3649 | retries++; |
3501 | goto again; | 3650 | goto again; |
3502 | } | 3651 | } |
3503 | 3652 | ||
3504 | /* | ||
3505 | * Not enough space to be reclaimed, don't bother committing the | ||
3506 | * transaction. | ||
3507 | */ | ||
3508 | spin_lock(&space_info->lock); | ||
3509 | if (space_info->bytes_pinned < orig_bytes) | ||
3510 | ret = -ENOSPC; | ||
3511 | spin_unlock(&space_info->lock); | ||
3512 | if (ret) | ||
3513 | goto out; | ||
3514 | |||
3515 | ret = -EAGAIN; | ||
3516 | if (trans) | ||
3517 | goto out; | ||
3518 | |||
3519 | ret = -ENOSPC; | 3653 | ret = -ENOSPC; |
3520 | if (committed) | 3654 | if (committed) |
3521 | goto out; | 3655 | goto out; |
3522 | 3656 | ||
3523 | trans = btrfs_join_transaction(root); | 3657 | ret = may_commit_transaction(root, space_info, orig_bytes, 0); |
3524 | if (IS_ERR(trans)) | ||
3525 | goto out; | ||
3526 | ret = btrfs_commit_transaction(trans, root); | ||
3527 | if (!ret) { | 3658 | if (!ret) { |
3528 | trans = NULL; | ||
3529 | committed = true; | 3659 | committed = true; |
3530 | goto again; | 3660 | goto again; |
3531 | } | 3661 | } |
@@ -3543,10 +3673,12 @@ out: | |||
3543 | static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, | 3673 | static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, |
3544 | struct btrfs_root *root) | 3674 | struct btrfs_root *root) |
3545 | { | 3675 | { |
3546 | struct btrfs_block_rsv *block_rsv; | 3676 | struct btrfs_block_rsv *block_rsv = NULL; |
3547 | if (root->ref_cows) | 3677 | |
3678 | if (root->ref_cows || root == root->fs_info->csum_root) | ||
3548 | block_rsv = trans->block_rsv; | 3679 | block_rsv = trans->block_rsv; |
3549 | else | 3680 | |
3681 | if (!block_rsv) | ||
3550 | block_rsv = root->block_rsv; | 3682 | block_rsv = root->block_rsv; |
3551 | 3683 | ||
3552 | if (!block_rsv) | 3684 | if (!block_rsv) |
@@ -3617,7 +3749,7 @@ static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | |||
3617 | } | 3749 | } |
3618 | if (num_bytes) { | 3750 | if (num_bytes) { |
3619 | spin_lock(&space_info->lock); | 3751 | spin_lock(&space_info->lock); |
3620 | space_info->bytes_reserved -= num_bytes; | 3752 | space_info->bytes_may_use -= num_bytes; |
3621 | space_info->reservation_progress++; | 3753 | space_info->reservation_progress++; |
3622 | spin_unlock(&space_info->lock); | 3754 | spin_unlock(&space_info->lock); |
3623 | } | 3755 | } |
@@ -3641,9 +3773,6 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) | |||
3641 | { | 3773 | { |
3642 | memset(rsv, 0, sizeof(*rsv)); | 3774 | memset(rsv, 0, sizeof(*rsv)); |
3643 | spin_lock_init(&rsv->lock); | 3775 | spin_lock_init(&rsv->lock); |
3644 | atomic_set(&rsv->usage, 1); | ||
3645 | rsv->priority = 6; | ||
3646 | INIT_LIST_HEAD(&rsv->list); | ||
3647 | } | 3776 | } |
3648 | 3777 | ||
3649 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | 3778 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) |
@@ -3664,38 +3793,38 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | |||
3664 | void btrfs_free_block_rsv(struct btrfs_root *root, | 3793 | void btrfs_free_block_rsv(struct btrfs_root *root, |
3665 | struct btrfs_block_rsv *rsv) | 3794 | struct btrfs_block_rsv *rsv) |
3666 | { | 3795 | { |
3667 | if (rsv && atomic_dec_and_test(&rsv->usage)) { | 3796 | btrfs_block_rsv_release(root, rsv, (u64)-1); |
3668 | btrfs_block_rsv_release(root, rsv, (u64)-1); | 3797 | kfree(rsv); |
3669 | if (!rsv->durable) | ||
3670 | kfree(rsv); | ||
3671 | } | ||
3672 | } | 3798 | } |
3673 | 3799 | ||
3674 | /* | 3800 | int btrfs_block_rsv_add(struct btrfs_root *root, |
3675 | * make the block_rsv struct be able to capture freed space. | 3801 | struct btrfs_block_rsv *block_rsv, |
3676 | * the captured space will re-add to the the block_rsv struct | 3802 | u64 num_bytes) |
3677 | * after transaction commit | ||
3678 | */ | ||
3679 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | ||
3680 | struct btrfs_block_rsv *block_rsv) | ||
3681 | { | 3803 | { |
3682 | block_rsv->durable = 1; | 3804 | int ret; |
3683 | mutex_lock(&fs_info->durable_block_rsv_mutex); | 3805 | |
3684 | list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); | 3806 | if (num_bytes == 0) |
3685 | mutex_unlock(&fs_info->durable_block_rsv_mutex); | 3807 | return 0; |
3808 | |||
3809 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); | ||
3810 | if (!ret) { | ||
3811 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | ||
3812 | return 0; | ||
3813 | } | ||
3814 | |||
3815 | return ret; | ||
3686 | } | 3816 | } |
3687 | 3817 | ||
3688 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | 3818 | int btrfs_block_rsv_add_noflush(struct btrfs_root *root, |
3689 | struct btrfs_root *root, | 3819 | struct btrfs_block_rsv *block_rsv, |
3690 | struct btrfs_block_rsv *block_rsv, | 3820 | u64 num_bytes) |
3691 | u64 num_bytes) | ||
3692 | { | 3821 | { |
3693 | int ret; | 3822 | int ret; |
3694 | 3823 | ||
3695 | if (num_bytes == 0) | 3824 | if (num_bytes == 0) |
3696 | return 0; | 3825 | return 0; |
3697 | 3826 | ||
3698 | ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); | 3827 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 0); |
3699 | if (!ret) { | 3828 | if (!ret) { |
3700 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | 3829 | block_rsv_add_bytes(block_rsv, num_bytes, 1); |
3701 | return 0; | 3830 | return 0; |
@@ -3704,55 +3833,52 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | |||
3704 | return ret; | 3833 | return ret; |
3705 | } | 3834 | } |
3706 | 3835 | ||
3707 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | 3836 | int btrfs_block_rsv_check(struct btrfs_root *root, |
3708 | struct btrfs_root *root, | 3837 | struct btrfs_block_rsv *block_rsv, int min_factor) |
3709 | struct btrfs_block_rsv *block_rsv, | ||
3710 | u64 min_reserved, int min_factor) | ||
3711 | { | 3838 | { |
3712 | u64 num_bytes = 0; | 3839 | u64 num_bytes = 0; |
3713 | int commit_trans = 0; | ||
3714 | int ret = -ENOSPC; | 3840 | int ret = -ENOSPC; |
3715 | 3841 | ||
3716 | if (!block_rsv) | 3842 | if (!block_rsv) |
3717 | return 0; | 3843 | return 0; |
3718 | 3844 | ||
3719 | spin_lock(&block_rsv->lock); | 3845 | spin_lock(&block_rsv->lock); |
3720 | if (min_factor > 0) | 3846 | num_bytes = div_factor(block_rsv->size, min_factor); |
3721 | num_bytes = div_factor(block_rsv->size, min_factor); | 3847 | if (block_rsv->reserved >= num_bytes) |
3722 | if (min_reserved > num_bytes) | 3848 | ret = 0; |
3723 | num_bytes = min_reserved; | 3849 | spin_unlock(&block_rsv->lock); |
3724 | 3850 | ||
3725 | if (block_rsv->reserved >= num_bytes) { | 3851 | return ret; |
3852 | } | ||
3853 | |||
3854 | int btrfs_block_rsv_refill(struct btrfs_root *root, | ||
3855 | struct btrfs_block_rsv *block_rsv, | ||
3856 | u64 min_reserved) | ||
3857 | { | ||
3858 | u64 num_bytes = 0; | ||
3859 | int ret = -ENOSPC; | ||
3860 | |||
3861 | if (!block_rsv) | ||
3862 | return 0; | ||
3863 | |||
3864 | spin_lock(&block_rsv->lock); | ||
3865 | num_bytes = min_reserved; | ||
3866 | if (block_rsv->reserved >= num_bytes) | ||
3726 | ret = 0; | 3867 | ret = 0; |
3727 | } else { | 3868 | else |
3728 | num_bytes -= block_rsv->reserved; | 3869 | num_bytes -= block_rsv->reserved; |
3729 | if (block_rsv->durable && | ||
3730 | block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) | ||
3731 | commit_trans = 1; | ||
3732 | } | ||
3733 | spin_unlock(&block_rsv->lock); | 3870 | spin_unlock(&block_rsv->lock); |
3871 | |||
3734 | if (!ret) | 3872 | if (!ret) |
3735 | return 0; | 3873 | return 0; |
3736 | 3874 | ||
3737 | if (block_rsv->refill_used) { | 3875 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); |
3738 | ret = reserve_metadata_bytes(trans, root, block_rsv, | 3876 | if (!ret) { |
3739 | num_bytes, 0); | 3877 | block_rsv_add_bytes(block_rsv, num_bytes, 0); |
3740 | if (!ret) { | ||
3741 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | ||
3742 | return 0; | ||
3743 | } | ||
3744 | } | ||
3745 | |||
3746 | if (commit_trans) { | ||
3747 | if (trans) | ||
3748 | return -EAGAIN; | ||
3749 | trans = btrfs_join_transaction(root); | ||
3750 | BUG_ON(IS_ERR(trans)); | ||
3751 | ret = btrfs_commit_transaction(trans, root); | ||
3752 | return 0; | 3878 | return 0; |
3753 | } | 3879 | } |
3754 | 3880 | ||
3755 | return -ENOSPC; | 3881 | return ret; |
3756 | } | 3882 | } |
3757 | 3883 | ||
3758 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 3884 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
@@ -3784,7 +3910,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | |||
3784 | u64 num_bytes; | 3910 | u64 num_bytes; |
3785 | u64 meta_used; | 3911 | u64 meta_used; |
3786 | u64 data_used; | 3912 | u64 data_used; |
3787 | int csum_size = btrfs_super_csum_size(&fs_info->super_copy); | 3913 | int csum_size = btrfs_super_csum_size(fs_info->super_copy); |
3788 | 3914 | ||
3789 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); | 3915 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); |
3790 | spin_lock(&sinfo->lock); | 3916 | spin_lock(&sinfo->lock); |
@@ -3828,12 +3954,12 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3828 | if (sinfo->total_bytes > num_bytes) { | 3954 | if (sinfo->total_bytes > num_bytes) { |
3829 | num_bytes = sinfo->total_bytes - num_bytes; | 3955 | num_bytes = sinfo->total_bytes - num_bytes; |
3830 | block_rsv->reserved += num_bytes; | 3956 | block_rsv->reserved += num_bytes; |
3831 | sinfo->bytes_reserved += num_bytes; | 3957 | sinfo->bytes_may_use += num_bytes; |
3832 | } | 3958 | } |
3833 | 3959 | ||
3834 | if (block_rsv->reserved >= block_rsv->size) { | 3960 | if (block_rsv->reserved >= block_rsv->size) { |
3835 | num_bytes = block_rsv->reserved - block_rsv->size; | 3961 | num_bytes = block_rsv->reserved - block_rsv->size; |
3836 | sinfo->bytes_reserved -= num_bytes; | 3962 | sinfo->bytes_may_use -= num_bytes; |
3837 | sinfo->reservation_progress++; | 3963 | sinfo->reservation_progress++; |
3838 | block_rsv->reserved = block_rsv->size; | 3964 | block_rsv->reserved = block_rsv->size; |
3839 | block_rsv->full = 1; | 3965 | block_rsv->full = 1; |
@@ -3849,16 +3975,13 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3849 | 3975 | ||
3850 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | 3976 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); |
3851 | fs_info->chunk_block_rsv.space_info = space_info; | 3977 | fs_info->chunk_block_rsv.space_info = space_info; |
3852 | fs_info->chunk_block_rsv.priority = 10; | ||
3853 | 3978 | ||
3854 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | 3979 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
3855 | fs_info->global_block_rsv.space_info = space_info; | 3980 | fs_info->global_block_rsv.space_info = space_info; |
3856 | fs_info->global_block_rsv.priority = 10; | ||
3857 | fs_info->global_block_rsv.refill_used = 1; | ||
3858 | fs_info->delalloc_block_rsv.space_info = space_info; | 3981 | fs_info->delalloc_block_rsv.space_info = space_info; |
3859 | fs_info->trans_block_rsv.space_info = space_info; | 3982 | fs_info->trans_block_rsv.space_info = space_info; |
3860 | fs_info->empty_block_rsv.space_info = space_info; | 3983 | fs_info->empty_block_rsv.space_info = space_info; |
3861 | fs_info->empty_block_rsv.priority = 10; | 3984 | fs_info->delayed_block_rsv.space_info = space_info; |
3862 | 3985 | ||
3863 | fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; | 3986 | fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; |
3864 | fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; | 3987 | fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; |
@@ -3866,10 +3989,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3866 | fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; | 3989 | fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; |
3867 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; | 3990 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; |
3868 | 3991 | ||
3869 | btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); | ||
3870 | |||
3871 | btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); | ||
3872 | |||
3873 | update_global_block_rsv(fs_info); | 3992 | update_global_block_rsv(fs_info); |
3874 | } | 3993 | } |
3875 | 3994 | ||
@@ -3882,37 +4001,8 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3882 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); | 4001 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); |
3883 | WARN_ON(fs_info->chunk_block_rsv.size > 0); | 4002 | WARN_ON(fs_info->chunk_block_rsv.size > 0); |
3884 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); | 4003 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); |
3885 | } | 4004 | WARN_ON(fs_info->delayed_block_rsv.size > 0); |
3886 | 4005 | WARN_ON(fs_info->delayed_block_rsv.reserved > 0); | |
3887 | int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3888 | struct btrfs_root *root, | ||
3889 | struct btrfs_block_rsv *rsv) | ||
3890 | { | ||
3891 | struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; | ||
3892 | u64 num_bytes; | ||
3893 | int ret; | ||
3894 | |||
3895 | /* | ||
3896 | * Truncate should be freeing data, but give us 2 items just in case it | ||
3897 | * needs to use some space. We may want to be smarter about this in the | ||
3898 | * future. | ||
3899 | */ | ||
3900 | num_bytes = btrfs_calc_trans_metadata_size(root, 2); | ||
3901 | |||
3902 | /* We already have enough bytes, just return */ | ||
3903 | if (rsv->reserved >= num_bytes) | ||
3904 | return 0; | ||
3905 | |||
3906 | num_bytes -= rsv->reserved; | ||
3907 | |||
3908 | /* | ||
3909 | * You should have reserved enough space before hand to do this, so this | ||
3910 | * should not fail. | ||
3911 | */ | ||
3912 | ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); | ||
3913 | BUG_ON(ret); | ||
3914 | |||
3915 | return 0; | ||
3916 | } | 4006 | } |
3917 | 4007 | ||
3918 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 4008 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
@@ -3921,9 +4011,7 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | |||
3921 | if (!trans->bytes_reserved) | 4011 | if (!trans->bytes_reserved) |
3922 | return; | 4012 | return; |
3923 | 4013 | ||
3924 | BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); | 4014 | btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); |
3925 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
3926 | trans->bytes_reserved); | ||
3927 | trans->bytes_reserved = 0; | 4015 | trans->bytes_reserved = 0; |
3928 | } | 4016 | } |
3929 | 4017 | ||
@@ -3965,11 +4053,19 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3965 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 4053 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
3966 | } | 4054 | } |
3967 | 4055 | ||
4056 | /** | ||
4057 | * drop_outstanding_extent - drop an outstanding extent | ||
4058 | * @inode: the inode we're dropping the extent for | ||
4059 | * | ||
4060 | * This is called when we are freeing up an outstanding extent, either called | ||
4061 | * after an error or after an extent is written. This will return the number of | ||
4062 | * reserved extents that need to be freed. This must be called with | ||
4063 | * BTRFS_I(inode)->lock held. | ||
4064 | */ | ||
3968 | static unsigned drop_outstanding_extent(struct inode *inode) | 4065 | static unsigned drop_outstanding_extent(struct inode *inode) |
3969 | { | 4066 | { |
3970 | unsigned dropped_extents = 0; | 4067 | unsigned dropped_extents = 0; |
3971 | 4068 | ||
3972 | spin_lock(&BTRFS_I(inode)->lock); | ||
3973 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); | 4069 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); |
3974 | BTRFS_I(inode)->outstanding_extents--; | 4070 | BTRFS_I(inode)->outstanding_extents--; |
3975 | 4071 | ||
@@ -3979,19 +4075,70 @@ static unsigned drop_outstanding_extent(struct inode *inode) | |||
3979 | */ | 4075 | */ |
3980 | if (BTRFS_I(inode)->outstanding_extents >= | 4076 | if (BTRFS_I(inode)->outstanding_extents >= |
3981 | BTRFS_I(inode)->reserved_extents) | 4077 | BTRFS_I(inode)->reserved_extents) |
3982 | goto out; | 4078 | return 0; |
3983 | 4079 | ||
3984 | dropped_extents = BTRFS_I(inode)->reserved_extents - | 4080 | dropped_extents = BTRFS_I(inode)->reserved_extents - |
3985 | BTRFS_I(inode)->outstanding_extents; | 4081 | BTRFS_I(inode)->outstanding_extents; |
3986 | BTRFS_I(inode)->reserved_extents -= dropped_extents; | 4082 | BTRFS_I(inode)->reserved_extents -= dropped_extents; |
3987 | out: | ||
3988 | spin_unlock(&BTRFS_I(inode)->lock); | ||
3989 | return dropped_extents; | 4083 | return dropped_extents; |
3990 | } | 4084 | } |
3991 | 4085 | ||
3992 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) | 4086 | /** |
4087 | * calc_csum_metadata_size - return the amount of metada space that must be | ||
4088 | * reserved/free'd for the given bytes. | ||
4089 | * @inode: the inode we're manipulating | ||
4090 | * @num_bytes: the number of bytes in question | ||
4091 | * @reserve: 1 if we are reserving space, 0 if we are freeing space | ||
4092 | * | ||
4093 | * This adjusts the number of csum_bytes in the inode and then returns the | ||
4094 | * correct amount of metadata that must either be reserved or freed. We | ||
4095 | * calculate how many checksums we can fit into one leaf and then divide the | ||
4096 | * number of bytes that will need to be checksumed by this value to figure out | ||
4097 | * how many checksums will be required. If we are adding bytes then the number | ||
4098 | * may go up and we will return the number of additional bytes that must be | ||
4099 | * reserved. If it is going down we will return the number of bytes that must | ||
4100 | * be freed. | ||
4101 | * | ||
4102 | * This must be called with BTRFS_I(inode)->lock held. | ||
4103 | */ | ||
4104 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes, | ||
4105 | int reserve) | ||
3993 | { | 4106 | { |
3994 | return num_bytes >>= 3; | 4107 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4108 | u64 csum_size; | ||
4109 | int num_csums_per_leaf; | ||
4110 | int num_csums; | ||
4111 | int old_csums; | ||
4112 | |||
4113 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM && | ||
4114 | BTRFS_I(inode)->csum_bytes == 0) | ||
4115 | return 0; | ||
4116 | |||
4117 | old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); | ||
4118 | if (reserve) | ||
4119 | BTRFS_I(inode)->csum_bytes += num_bytes; | ||
4120 | else | ||
4121 | BTRFS_I(inode)->csum_bytes -= num_bytes; | ||
4122 | csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); | ||
4123 | num_csums_per_leaf = (int)div64_u64(csum_size, | ||
4124 | sizeof(struct btrfs_csum_item) + | ||
4125 | sizeof(struct btrfs_disk_key)); | ||
4126 | num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); | ||
4127 | num_csums = num_csums + num_csums_per_leaf - 1; | ||
4128 | num_csums = num_csums / num_csums_per_leaf; | ||
4129 | |||
4130 | old_csums = old_csums + num_csums_per_leaf - 1; | ||
4131 | old_csums = old_csums / num_csums_per_leaf; | ||
4132 | |||
4133 | /* No change, no need to reserve more */ | ||
4134 | if (old_csums == num_csums) | ||
4135 | return 0; | ||
4136 | |||
4137 | if (reserve) | ||
4138 | return btrfs_calc_trans_metadata_size(root, | ||
4139 | num_csums - old_csums); | ||
4140 | |||
4141 | return btrfs_calc_trans_metadata_size(root, old_csums - num_csums); | ||
3995 | } | 4142 | } |
3996 | 4143 | ||
3997 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | 4144 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) |
@@ -4000,9 +4147,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4000 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 4147 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
4001 | u64 to_reserve = 0; | 4148 | u64 to_reserve = 0; |
4002 | unsigned nr_extents = 0; | 4149 | unsigned nr_extents = 0; |
4150 | int flush = 1; | ||
4003 | int ret; | 4151 | int ret; |
4004 | 4152 | ||
4005 | if (btrfs_transaction_in_commit(root->fs_info)) | 4153 | if (btrfs_is_free_space_inode(root, inode)) |
4154 | flush = 0; | ||
4155 | |||
4156 | if (flush && btrfs_transaction_in_commit(root->fs_info)) | ||
4006 | schedule_timeout(1); | 4157 | schedule_timeout(1); |
4007 | 4158 | ||
4008 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4159 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
@@ -4018,18 +4169,29 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4018 | 4169 | ||
4019 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | 4170 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); |
4020 | } | 4171 | } |
4172 | to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); | ||
4021 | spin_unlock(&BTRFS_I(inode)->lock); | 4173 | spin_unlock(&BTRFS_I(inode)->lock); |
4022 | 4174 | ||
4023 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 4175 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); |
4024 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); | ||
4025 | if (ret) { | 4176 | if (ret) { |
4177 | u64 to_free = 0; | ||
4026 | unsigned dropped; | 4178 | unsigned dropped; |
4179 | |||
4180 | spin_lock(&BTRFS_I(inode)->lock); | ||
4181 | dropped = drop_outstanding_extent(inode); | ||
4182 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
4183 | spin_unlock(&BTRFS_I(inode)->lock); | ||
4184 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | ||
4185 | |||
4027 | /* | 4186 | /* |
4028 | * We don't need the return value since our reservation failed, | 4187 | * Somebody could have come in and twiddled with the |
4029 | * we just need to clean up our counter. | 4188 | * reservation, so if we have to free more than we would have |
4189 | * reserved from this reservation go ahead and release those | ||
4190 | * bytes. | ||
4030 | */ | 4191 | */ |
4031 | dropped = drop_outstanding_extent(inode); | 4192 | to_free -= to_reserve; |
4032 | WARN_ON(dropped > 1); | 4193 | if (to_free) |
4194 | btrfs_block_rsv_release(root, block_rsv, to_free); | ||
4033 | return ret; | 4195 | return ret; |
4034 | } | 4196 | } |
4035 | 4197 | ||
@@ -4038,6 +4200,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4038 | return 0; | 4200 | return 0; |
4039 | } | 4201 | } |
4040 | 4202 | ||
4203 | /** | ||
4204 | * btrfs_delalloc_release_metadata - release a metadata reservation for an inode | ||
4205 | * @inode: the inode to release the reservation for | ||
4206 | * @num_bytes: the number of bytes we're releasing | ||
4207 | * | ||
4208 | * This will release the metadata reservation for an inode. This can be called | ||
4209 | * once we complete IO for a given set of bytes to release their metadata | ||
4210 | * reservations. | ||
4211 | */ | ||
4041 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | 4212 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) |
4042 | { | 4213 | { |
4043 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4214 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -4045,9 +4216,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
4045 | unsigned dropped; | 4216 | unsigned dropped; |
4046 | 4217 | ||
4047 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4218 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4219 | spin_lock(&BTRFS_I(inode)->lock); | ||
4048 | dropped = drop_outstanding_extent(inode); | 4220 | dropped = drop_outstanding_extent(inode); |
4049 | 4221 | ||
4050 | to_free = calc_csum_metadata_size(inode, num_bytes); | 4222 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); |
4223 | spin_unlock(&BTRFS_I(inode)->lock); | ||
4051 | if (dropped > 0) | 4224 | if (dropped > 0) |
4052 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | 4225 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
4053 | 4226 | ||
@@ -4055,6 +4228,21 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
4055 | to_free); | 4228 | to_free); |
4056 | } | 4229 | } |
4057 | 4230 | ||
4231 | /** | ||
4232 | * btrfs_delalloc_reserve_space - reserve data and metadata space for delalloc | ||
4233 | * @inode: inode we're writing to | ||
4234 | * @num_bytes: the number of bytes we want to allocate | ||
4235 | * | ||
4236 | * This will do the following things | ||
4237 | * | ||
4238 | * o reserve space in the data space info for num_bytes | ||
4239 | * o reserve space in the metadata space info based on number of outstanding | ||
4240 | * extents and how much csums will be needed | ||
4241 | * o add to the inodes ->delalloc_bytes | ||
4242 | * o add it to the fs_info's delalloc inodes list. | ||
4243 | * | ||
4244 | * This will return 0 for success and -ENOSPC if there is no space left. | ||
4245 | */ | ||
4058 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) | 4246 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) |
4059 | { | 4247 | { |
4060 | int ret; | 4248 | int ret; |
@@ -4072,6 +4260,19 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) | |||
4072 | return 0; | 4260 | return 0; |
4073 | } | 4261 | } |
4074 | 4262 | ||
4263 | /** | ||
4264 | * btrfs_delalloc_release_space - release data and metadata space for delalloc | ||
4265 | * @inode: inode we're releasing space for | ||
4266 | * @num_bytes: the number of bytes we want to free up | ||
4267 | * | ||
4268 | * This must be matched with a call to btrfs_delalloc_reserve_space. This is | ||
4269 | * called in the case that we don't need the metadata AND data reservations | ||
4270 | * anymore. So if there is an error or we insert an inline extent. | ||
4271 | * | ||
4272 | * This function will release the metadata space that was not used and will | ||
4273 | * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes | ||
4274 | * list if there are no delalloc bytes left. | ||
4275 | */ | ||
4075 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) | 4276 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) |
4076 | { | 4277 | { |
4077 | btrfs_delalloc_release_metadata(inode, num_bytes); | 4278 | btrfs_delalloc_release_metadata(inode, num_bytes); |
@@ -4091,12 +4292,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
4091 | 4292 | ||
4092 | /* block accounting for super block */ | 4293 | /* block accounting for super block */ |
4093 | spin_lock(&info->delalloc_lock); | 4294 | spin_lock(&info->delalloc_lock); |
4094 | old_val = btrfs_super_bytes_used(&info->super_copy); | 4295 | old_val = btrfs_super_bytes_used(info->super_copy); |
4095 | if (alloc) | 4296 | if (alloc) |
4096 | old_val += num_bytes; | 4297 | old_val += num_bytes; |
4097 | else | 4298 | else |
4098 | old_val -= num_bytes; | 4299 | old_val -= num_bytes; |
4099 | btrfs_set_super_bytes_used(&info->super_copy, old_val); | 4300 | btrfs_set_super_bytes_used(info->super_copy, old_val); |
4100 | spin_unlock(&info->delalloc_lock); | 4301 | spin_unlock(&info->delalloc_lock); |
4101 | 4302 | ||
4102 | while (total) { | 4303 | while (total) { |
@@ -4124,7 +4325,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
4124 | spin_lock(&cache->space_info->lock); | 4325 | spin_lock(&cache->space_info->lock); |
4125 | spin_lock(&cache->lock); | 4326 | spin_lock(&cache->lock); |
4126 | 4327 | ||
4127 | if (btrfs_super_cache_generation(&info->super_copy) != 0 && | 4328 | if (btrfs_test_opt(root, SPACE_CACHE) && |
4128 | cache->disk_cache_state < BTRFS_DC_CLEAR) | 4329 | cache->disk_cache_state < BTRFS_DC_CLEAR) |
4129 | cache->disk_cache_state = BTRFS_DC_CLEAR; | 4330 | cache->disk_cache_state = BTRFS_DC_CLEAR; |
4130 | 4331 | ||
@@ -4136,7 +4337,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
4136 | btrfs_set_block_group_used(&cache->item, old_val); | 4337 | btrfs_set_block_group_used(&cache->item, old_val); |
4137 | cache->reserved -= num_bytes; | 4338 | cache->reserved -= num_bytes; |
4138 | cache->space_info->bytes_reserved -= num_bytes; | 4339 | cache->space_info->bytes_reserved -= num_bytes; |
4139 | cache->space_info->reservation_progress++; | ||
4140 | cache->space_info->bytes_used += num_bytes; | 4340 | cache->space_info->bytes_used += num_bytes; |
4141 | cache->space_info->disk_used += num_bytes * factor; | 4341 | cache->space_info->disk_used += num_bytes * factor; |
4142 | spin_unlock(&cache->lock); | 4342 | spin_unlock(&cache->lock); |
@@ -4188,7 +4388,6 @@ static int pin_down_extent(struct btrfs_root *root, | |||
4188 | if (reserved) { | 4388 | if (reserved) { |
4189 | cache->reserved -= num_bytes; | 4389 | cache->reserved -= num_bytes; |
4190 | cache->space_info->bytes_reserved -= num_bytes; | 4390 | cache->space_info->bytes_reserved -= num_bytes; |
4191 | cache->space_info->reservation_progress++; | ||
4192 | } | 4391 | } |
4193 | spin_unlock(&cache->lock); | 4392 | spin_unlock(&cache->lock); |
4194 | spin_unlock(&cache->space_info->lock); | 4393 | spin_unlock(&cache->space_info->lock); |
@@ -4216,45 +4415,82 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
4216 | } | 4415 | } |
4217 | 4416 | ||
4218 | /* | 4417 | /* |
4219 | * update size of reserved extents. this function may return -EAGAIN | 4418 | * this function must be called within transaction |
4220 | * if 'reserve' is true or 'sinfo' is false. | 4419 | */ |
4420 | int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, | ||
4421 | struct btrfs_root *root, | ||
4422 | u64 bytenr, u64 num_bytes) | ||
4423 | { | ||
4424 | struct btrfs_block_group_cache *cache; | ||
4425 | |||
4426 | cache = btrfs_lookup_block_group(root->fs_info, bytenr); | ||
4427 | BUG_ON(!cache); | ||
4428 | |||
4429 | /* | ||
4430 | * pull in the free space cache (if any) so that our pin | ||
4431 | * removes the free space from the cache. We have load_only set | ||
4432 | * to one because the slow code to read in the free extents does check | ||
4433 | * the pinned extents. | ||
4434 | */ | ||
4435 | cache_block_group(cache, trans, root, 1); | ||
4436 | |||
4437 | pin_down_extent(root, cache, bytenr, num_bytes, 0); | ||
4438 | |||
4439 | /* remove us from the free space cache (if we're there at all) */ | ||
4440 | btrfs_remove_free_space(cache, bytenr, num_bytes); | ||
4441 | btrfs_put_block_group(cache); | ||
4442 | return 0; | ||
4443 | } | ||
4444 | |||
4445 | /** | ||
4446 | * btrfs_update_reserved_bytes - update the block_group and space info counters | ||
4447 | * @cache: The cache we are manipulating | ||
4448 | * @num_bytes: The number of bytes in question | ||
4449 | * @reserve: One of the reservation enums | ||
4450 | * | ||
4451 | * This is called by the allocator when it reserves space, or by somebody who is | ||
4452 | * freeing space that was never actually used on disk. For example if you | ||
4453 | * reserve some space for a new leaf in transaction A and before transaction A | ||
4454 | * commits you free that leaf, you call this with reserve set to 0 in order to | ||
4455 | * clear the reservation. | ||
4456 | * | ||
4457 | * Metadata reservations should be called with RESERVE_ALLOC so we do the proper | ||
4458 | * ENOSPC accounting. For data we handle the reservation through clearing the | ||
4459 | * delalloc bits in the io_tree. We have to do this since we could end up | ||
4460 | * allocating less disk space for the amount of data we have reserved in the | ||
4461 | * case of compression. | ||
4462 | * | ||
4463 | * If this is a reservation and the block group has become read only we cannot | ||
4464 | * make the reservation and return -EAGAIN, otherwise this function always | ||
4465 | * succeeds. | ||
4221 | */ | 4466 | */ |
4222 | int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | 4467 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, |
4223 | u64 num_bytes, int reserve, int sinfo) | 4468 | u64 num_bytes, int reserve) |
4224 | { | 4469 | { |
4470 | struct btrfs_space_info *space_info = cache->space_info; | ||
4225 | int ret = 0; | 4471 | int ret = 0; |
4226 | if (sinfo) { | 4472 | spin_lock(&space_info->lock); |
4227 | struct btrfs_space_info *space_info = cache->space_info; | 4473 | spin_lock(&cache->lock); |
4228 | spin_lock(&space_info->lock); | 4474 | if (reserve != RESERVE_FREE) { |
4229 | spin_lock(&cache->lock); | ||
4230 | if (reserve) { | ||
4231 | if (cache->ro) { | ||
4232 | ret = -EAGAIN; | ||
4233 | } else { | ||
4234 | cache->reserved += num_bytes; | ||
4235 | space_info->bytes_reserved += num_bytes; | ||
4236 | } | ||
4237 | } else { | ||
4238 | if (cache->ro) | ||
4239 | space_info->bytes_readonly += num_bytes; | ||
4240 | cache->reserved -= num_bytes; | ||
4241 | space_info->bytes_reserved -= num_bytes; | ||
4242 | space_info->reservation_progress++; | ||
4243 | } | ||
4244 | spin_unlock(&cache->lock); | ||
4245 | spin_unlock(&space_info->lock); | ||
4246 | } else { | ||
4247 | spin_lock(&cache->lock); | ||
4248 | if (cache->ro) { | 4475 | if (cache->ro) { |
4249 | ret = -EAGAIN; | 4476 | ret = -EAGAIN; |
4250 | } else { | 4477 | } else { |
4251 | if (reserve) | 4478 | cache->reserved += num_bytes; |
4252 | cache->reserved += num_bytes; | 4479 | space_info->bytes_reserved += num_bytes; |
4253 | else | 4480 | if (reserve == RESERVE_ALLOC) { |
4254 | cache->reserved -= num_bytes; | 4481 | BUG_ON(space_info->bytes_may_use < num_bytes); |
4482 | space_info->bytes_may_use -= num_bytes; | ||
4483 | } | ||
4255 | } | 4484 | } |
4256 | spin_unlock(&cache->lock); | 4485 | } else { |
4486 | if (cache->ro) | ||
4487 | space_info->bytes_readonly += num_bytes; | ||
4488 | cache->reserved -= num_bytes; | ||
4489 | space_info->bytes_reserved -= num_bytes; | ||
4490 | space_info->reservation_progress++; | ||
4257 | } | 4491 | } |
4492 | spin_unlock(&cache->lock); | ||
4493 | spin_unlock(&space_info->lock); | ||
4258 | return ret; | 4494 | return ret; |
4259 | } | 4495 | } |
4260 | 4496 | ||
@@ -4320,13 +4556,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
4320 | spin_lock(&cache->lock); | 4556 | spin_lock(&cache->lock); |
4321 | cache->pinned -= len; | 4557 | cache->pinned -= len; |
4322 | cache->space_info->bytes_pinned -= len; | 4558 | cache->space_info->bytes_pinned -= len; |
4323 | if (cache->ro) { | 4559 | if (cache->ro) |
4324 | cache->space_info->bytes_readonly += len; | 4560 | cache->space_info->bytes_readonly += len; |
4325 | } else if (cache->reserved_pinned > 0) { | ||
4326 | len = min(len, cache->reserved_pinned); | ||
4327 | cache->reserved_pinned -= len; | ||
4328 | cache->space_info->bytes_reserved += len; | ||
4329 | } | ||
4330 | spin_unlock(&cache->lock); | 4561 | spin_unlock(&cache->lock); |
4331 | spin_unlock(&cache->space_info->lock); | 4562 | spin_unlock(&cache->space_info->lock); |
4332 | } | 4563 | } |
@@ -4341,11 +4572,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
4341 | { | 4572 | { |
4342 | struct btrfs_fs_info *fs_info = root->fs_info; | 4573 | struct btrfs_fs_info *fs_info = root->fs_info; |
4343 | struct extent_io_tree *unpin; | 4574 | struct extent_io_tree *unpin; |
4344 | struct btrfs_block_rsv *block_rsv; | ||
4345 | struct btrfs_block_rsv *next_rsv; | ||
4346 | u64 start; | 4575 | u64 start; |
4347 | u64 end; | 4576 | u64 end; |
4348 | int idx; | ||
4349 | int ret; | 4577 | int ret; |
4350 | 4578 | ||
4351 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | 4579 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) |
@@ -4368,30 +4596,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
4368 | cond_resched(); | 4596 | cond_resched(); |
4369 | } | 4597 | } |
4370 | 4598 | ||
4371 | mutex_lock(&fs_info->durable_block_rsv_mutex); | ||
4372 | list_for_each_entry_safe(block_rsv, next_rsv, | ||
4373 | &fs_info->durable_block_rsv_list, list) { | ||
4374 | |||
4375 | idx = trans->transid & 0x1; | ||
4376 | if (block_rsv->freed[idx] > 0) { | ||
4377 | block_rsv_add_bytes(block_rsv, | ||
4378 | block_rsv->freed[idx], 0); | ||
4379 | block_rsv->freed[idx] = 0; | ||
4380 | } | ||
4381 | if (atomic_read(&block_rsv->usage) == 0) { | ||
4382 | btrfs_block_rsv_release(root, block_rsv, (u64)-1); | ||
4383 | |||
4384 | if (block_rsv->freed[0] == 0 && | ||
4385 | block_rsv->freed[1] == 0) { | ||
4386 | list_del_init(&block_rsv->list); | ||
4387 | kfree(block_rsv); | ||
4388 | } | ||
4389 | } else { | ||
4390 | btrfs_block_rsv_release(root, block_rsv, 0); | ||
4391 | } | ||
4392 | } | ||
4393 | mutex_unlock(&fs_info->durable_block_rsv_mutex); | ||
4394 | |||
4395 | return 0; | 4599 | return 0; |
4396 | } | 4600 | } |
4397 | 4601 | ||
@@ -4669,7 +4873,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
4669 | struct extent_buffer *buf, | 4873 | struct extent_buffer *buf, |
4670 | u64 parent, int last_ref) | 4874 | u64 parent, int last_ref) |
4671 | { | 4875 | { |
4672 | struct btrfs_block_rsv *block_rsv; | ||
4673 | struct btrfs_block_group_cache *cache = NULL; | 4876 | struct btrfs_block_group_cache *cache = NULL; |
4674 | int ret; | 4877 | int ret; |
4675 | 4878 | ||
@@ -4684,64 +4887,24 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
4684 | if (!last_ref) | 4887 | if (!last_ref) |
4685 | return; | 4888 | return; |
4686 | 4889 | ||
4687 | block_rsv = get_block_rsv(trans, root); | ||
4688 | cache = btrfs_lookup_block_group(root->fs_info, buf->start); | 4890 | cache = btrfs_lookup_block_group(root->fs_info, buf->start); |
4689 | if (block_rsv->space_info != cache->space_info) | ||
4690 | goto out; | ||
4691 | 4891 | ||
4692 | if (btrfs_header_generation(buf) == trans->transid) { | 4892 | if (btrfs_header_generation(buf) == trans->transid) { |
4693 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | 4893 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
4694 | ret = check_ref_cleanup(trans, root, buf->start); | 4894 | ret = check_ref_cleanup(trans, root, buf->start); |
4695 | if (!ret) | 4895 | if (!ret) |
4696 | goto pin; | 4896 | goto out; |
4697 | } | 4897 | } |
4698 | 4898 | ||
4699 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 4899 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
4700 | pin_down_extent(root, cache, buf->start, buf->len, 1); | 4900 | pin_down_extent(root, cache, buf->start, buf->len, 1); |
4701 | goto pin; | 4901 | goto out; |
4702 | } | 4902 | } |
4703 | 4903 | ||
4704 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | 4904 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); |
4705 | 4905 | ||
4706 | btrfs_add_free_space(cache, buf->start, buf->len); | 4906 | btrfs_add_free_space(cache, buf->start, buf->len); |
4707 | ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); | 4907 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); |
4708 | if (ret == -EAGAIN) { | ||
4709 | /* block group became read-only */ | ||
4710 | btrfs_update_reserved_bytes(cache, buf->len, 0, 1); | ||
4711 | goto out; | ||
4712 | } | ||
4713 | |||
4714 | ret = 1; | ||
4715 | spin_lock(&block_rsv->lock); | ||
4716 | if (block_rsv->reserved < block_rsv->size) { | ||
4717 | block_rsv->reserved += buf->len; | ||
4718 | ret = 0; | ||
4719 | } | ||
4720 | spin_unlock(&block_rsv->lock); | ||
4721 | |||
4722 | if (ret) { | ||
4723 | spin_lock(&cache->space_info->lock); | ||
4724 | cache->space_info->bytes_reserved -= buf->len; | ||
4725 | cache->space_info->reservation_progress++; | ||
4726 | spin_unlock(&cache->space_info->lock); | ||
4727 | } | ||
4728 | goto out; | ||
4729 | } | ||
4730 | pin: | ||
4731 | if (block_rsv->durable && !cache->ro) { | ||
4732 | ret = 0; | ||
4733 | spin_lock(&cache->lock); | ||
4734 | if (!cache->ro) { | ||
4735 | cache->reserved_pinned += buf->len; | ||
4736 | ret = 1; | ||
4737 | } | ||
4738 | spin_unlock(&cache->lock); | ||
4739 | |||
4740 | if (ret) { | ||
4741 | spin_lock(&block_rsv->lock); | ||
4742 | block_rsv->freed[trans->transid & 0x1] += buf->len; | ||
4743 | spin_unlock(&block_rsv->lock); | ||
4744 | } | ||
4745 | } | 4908 | } |
4746 | out: | 4909 | out: |
4747 | /* | 4910 | /* |
@@ -4884,10 +5047,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4884 | int last_ptr_loop = 0; | 5047 | int last_ptr_loop = 0; |
4885 | int loop = 0; | 5048 | int loop = 0; |
4886 | int index = 0; | 5049 | int index = 0; |
5050 | int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? | ||
5051 | RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; | ||
4887 | bool found_uncached_bg = false; | 5052 | bool found_uncached_bg = false; |
4888 | bool failed_cluster_refill = false; | 5053 | bool failed_cluster_refill = false; |
4889 | bool failed_alloc = false; | 5054 | bool failed_alloc = false; |
4890 | bool use_cluster = true; | 5055 | bool use_cluster = true; |
5056 | bool have_caching_bg = false; | ||
4891 | u64 ideal_cache_percent = 0; | 5057 | u64 ideal_cache_percent = 0; |
4892 | u64 ideal_cache_offset = 0; | 5058 | u64 ideal_cache_offset = 0; |
4893 | 5059 | ||
@@ -4970,6 +5136,7 @@ ideal_cache: | |||
4970 | } | 5136 | } |
4971 | } | 5137 | } |
4972 | search: | 5138 | search: |
5139 | have_caching_bg = false; | ||
4973 | down_read(&space_info->groups_sem); | 5140 | down_read(&space_info->groups_sem); |
4974 | list_for_each_entry(block_group, &space_info->block_groups[index], | 5141 | list_for_each_entry(block_group, &space_info->block_groups[index], |
4975 | list) { | 5142 | list) { |
@@ -5178,6 +5345,8 @@ refill_cluster: | |||
5178 | failed_alloc = true; | 5345 | failed_alloc = true; |
5179 | goto have_block_group; | 5346 | goto have_block_group; |
5180 | } else if (!offset) { | 5347 | } else if (!offset) { |
5348 | if (!cached) | ||
5349 | have_caching_bg = true; | ||
5181 | goto loop; | 5350 | goto loop; |
5182 | } | 5351 | } |
5183 | checks: | 5352 | checks: |
@@ -5203,8 +5372,8 @@ checks: | |||
5203 | search_start - offset); | 5372 | search_start - offset); |
5204 | BUG_ON(offset > search_start); | 5373 | BUG_ON(offset > search_start); |
5205 | 5374 | ||
5206 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, | 5375 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, |
5207 | (data & BTRFS_BLOCK_GROUP_DATA)); | 5376 | alloc_type); |
5208 | if (ret == -EAGAIN) { | 5377 | if (ret == -EAGAIN) { |
5209 | btrfs_add_free_space(block_group, offset, num_bytes); | 5378 | btrfs_add_free_space(block_group, offset, num_bytes); |
5210 | goto loop; | 5379 | goto loop; |
@@ -5228,6 +5397,9 @@ loop: | |||
5228 | } | 5397 | } |
5229 | up_read(&space_info->groups_sem); | 5398 | up_read(&space_info->groups_sem); |
5230 | 5399 | ||
5400 | if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg) | ||
5401 | goto search; | ||
5402 | |||
5231 | if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) | 5403 | if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) |
5232 | goto search; | 5404 | goto search; |
5233 | 5405 | ||
@@ -5326,7 +5498,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
5326 | int index = 0; | 5498 | int index = 0; |
5327 | 5499 | ||
5328 | spin_lock(&info->lock); | 5500 | spin_lock(&info->lock); |
5329 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 5501 | printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", |
5502 | (unsigned long long)info->flags, | ||
5330 | (unsigned long long)(info->total_bytes - info->bytes_used - | 5503 | (unsigned long long)(info->total_bytes - info->bytes_used - |
5331 | info->bytes_pinned - info->bytes_reserved - | 5504 | info->bytes_pinned - info->bytes_reserved - |
5332 | info->bytes_readonly), | 5505 | info->bytes_readonly), |
@@ -5412,7 +5585,8 @@ again: | |||
5412 | return ret; | 5585 | return ret; |
5413 | } | 5586 | } |
5414 | 5587 | ||
5415 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | 5588 | static int __btrfs_free_reserved_extent(struct btrfs_root *root, |
5589 | u64 start, u64 len, int pin) | ||
5416 | { | 5590 | { |
5417 | struct btrfs_block_group_cache *cache; | 5591 | struct btrfs_block_group_cache *cache; |
5418 | int ret = 0; | 5592 | int ret = 0; |
@@ -5427,8 +5601,12 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
5427 | if (btrfs_test_opt(root, DISCARD)) | 5601 | if (btrfs_test_opt(root, DISCARD)) |
5428 | ret = btrfs_discard_extent(root, start, len, NULL); | 5602 | ret = btrfs_discard_extent(root, start, len, NULL); |
5429 | 5603 | ||
5430 | btrfs_add_free_space(cache, start, len); | 5604 | if (pin) |
5431 | btrfs_update_reserved_bytes(cache, len, 0, 1); | 5605 | pin_down_extent(root, cache, start, len, 1); |
5606 | else { | ||
5607 | btrfs_add_free_space(cache, start, len); | ||
5608 | btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); | ||
5609 | } | ||
5432 | btrfs_put_block_group(cache); | 5610 | btrfs_put_block_group(cache); |
5433 | 5611 | ||
5434 | trace_btrfs_reserved_extent_free(root, start, len); | 5612 | trace_btrfs_reserved_extent_free(root, start, len); |
@@ -5436,6 +5614,18 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
5436 | return ret; | 5614 | return ret; |
5437 | } | 5615 | } |
5438 | 5616 | ||
5617 | int btrfs_free_reserved_extent(struct btrfs_root *root, | ||
5618 | u64 start, u64 len) | ||
5619 | { | ||
5620 | return __btrfs_free_reserved_extent(root, start, len, 0); | ||
5621 | } | ||
5622 | |||
5623 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, | ||
5624 | u64 start, u64 len) | ||
5625 | { | ||
5626 | return __btrfs_free_reserved_extent(root, start, len, 1); | ||
5627 | } | ||
5628 | |||
5439 | static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | 5629 | static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, |
5440 | struct btrfs_root *root, | 5630 | struct btrfs_root *root, |
5441 | u64 parent, u64 root_objectid, | 5631 | u64 parent, u64 root_objectid, |
@@ -5631,7 +5821,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
5631 | put_caching_control(caching_ctl); | 5821 | put_caching_control(caching_ctl); |
5632 | } | 5822 | } |
5633 | 5823 | ||
5634 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); | 5824 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, |
5825 | RESERVE_ALLOC_NO_ACCOUNT); | ||
5635 | BUG_ON(ret); | 5826 | BUG_ON(ret); |
5636 | btrfs_put_block_group(block_group); | 5827 | btrfs_put_block_group(block_group); |
5637 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 5828 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
@@ -5688,8 +5879,7 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
5688 | block_rsv = get_block_rsv(trans, root); | 5879 | block_rsv = get_block_rsv(trans, root); |
5689 | 5880 | ||
5690 | if (block_rsv->size == 0) { | 5881 | if (block_rsv->size == 0) { |
5691 | ret = reserve_metadata_bytes(trans, root, block_rsv, | 5882 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); |
5692 | blocksize, 0); | ||
5693 | /* | 5883 | /* |
5694 | * If we couldn't reserve metadata bytes try and use some from | 5884 | * If we couldn't reserve metadata bytes try and use some from |
5695 | * the global reserve. | 5885 | * the global reserve. |
@@ -5709,13 +5899,15 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
5709 | if (!ret) | 5899 | if (!ret) |
5710 | return block_rsv; | 5900 | return block_rsv; |
5711 | if (ret) { | 5901 | if (ret) { |
5712 | WARN_ON(1); | 5902 | static DEFINE_RATELIMIT_STATE(_rs, |
5713 | ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, | 5903 | DEFAULT_RATELIMIT_INTERVAL, |
5714 | 0); | 5904 | /*DEFAULT_RATELIMIT_BURST*/ 2); |
5905 | if (__ratelimit(&_rs)) { | ||
5906 | printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret); | ||
5907 | WARN_ON(1); | ||
5908 | } | ||
5909 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); | ||
5715 | if (!ret) { | 5910 | if (!ret) { |
5716 | spin_lock(&block_rsv->lock); | ||
5717 | block_rsv->size += blocksize; | ||
5718 | spin_unlock(&block_rsv->lock); | ||
5719 | return block_rsv; | 5911 | return block_rsv; |
5720 | } else if (ret && block_rsv != global_rsv) { | 5912 | } else if (ret && block_rsv != global_rsv) { |
5721 | ret = block_rsv_use_bytes(global_rsv, blocksize); | 5913 | ret = block_rsv_use_bytes(global_rsv, blocksize); |
@@ -6593,12 +6785,9 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) | |||
6593 | cache->bytes_super - btrfs_block_group_used(&cache->item); | 6785 | cache->bytes_super - btrfs_block_group_used(&cache->item); |
6594 | 6786 | ||
6595 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 6787 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
6596 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 6788 | sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes + |
6597 | cache->reserved_pinned + num_bytes + min_allocable_bytes <= | 6789 | min_allocable_bytes <= sinfo->total_bytes) { |
6598 | sinfo->total_bytes) { | ||
6599 | sinfo->bytes_readonly += num_bytes; | 6790 | sinfo->bytes_readonly += num_bytes; |
6600 | sinfo->bytes_reserved += cache->reserved_pinned; | ||
6601 | cache->reserved_pinned = 0; | ||
6602 | cache->ro = 1; | 6791 | cache->ro = 1; |
6603 | ret = 0; | 6792 | ret = 0; |
6604 | } | 6793 | } |
@@ -6965,7 +7154,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
6965 | struct btrfs_space_info, | 7154 | struct btrfs_space_info, |
6966 | list); | 7155 | list); |
6967 | if (space_info->bytes_pinned > 0 || | 7156 | if (space_info->bytes_pinned > 0 || |
6968 | space_info->bytes_reserved > 0) { | 7157 | space_info->bytes_reserved > 0 || |
7158 | space_info->bytes_may_use > 0) { | ||
6969 | WARN_ON(1); | 7159 | WARN_ON(1); |
6970 | dump_space_info(space_info, 0, 0); | 7160 | dump_space_info(space_info, 0, 0); |
6971 | } | 7161 | } |
@@ -7007,14 +7197,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7007 | return -ENOMEM; | 7197 | return -ENOMEM; |
7008 | path->reada = 1; | 7198 | path->reada = 1; |
7009 | 7199 | ||
7010 | cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); | 7200 | cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); |
7011 | if (cache_gen != 0 && | 7201 | if (btrfs_test_opt(root, SPACE_CACHE) && |
7012 | btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) | 7202 | btrfs_super_generation(root->fs_info->super_copy) != cache_gen) |
7013 | need_clear = 1; | 7203 | need_clear = 1; |
7014 | if (btrfs_test_opt(root, CLEAR_CACHE)) | 7204 | if (btrfs_test_opt(root, CLEAR_CACHE)) |
7015 | need_clear = 1; | 7205 | need_clear = 1; |
7016 | if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) | ||
7017 | printk(KERN_INFO "btrfs: disk space caching is enabled\n"); | ||
7018 | 7206 | ||
7019 | while (1) { | 7207 | while (1) { |
7020 | ret = find_first_block_group(root, path, &key); | 7208 | ret = find_first_block_group(root, path, &key); |
@@ -7253,7 +7441,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7253 | goto out; | 7441 | goto out; |
7254 | } | 7442 | } |
7255 | 7443 | ||
7256 | inode = lookup_free_space_inode(root, block_group, path); | 7444 | inode = lookup_free_space_inode(tree_root, block_group, path); |
7257 | if (!IS_ERR(inode)) { | 7445 | if (!IS_ERR(inode)) { |
7258 | ret = btrfs_orphan_add(trans, inode); | 7446 | ret = btrfs_orphan_add(trans, inode); |
7259 | BUG_ON(ret); | 7447 | BUG_ON(ret); |
@@ -7269,7 +7457,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7269 | spin_unlock(&block_group->lock); | 7457 | spin_unlock(&block_group->lock); |
7270 | } | 7458 | } |
7271 | /* One for our lookup ref */ | 7459 | /* One for our lookup ref */ |
7272 | iput(inode); | 7460 | btrfs_add_delayed_iput(inode); |
7273 | } | 7461 | } |
7274 | 7462 | ||
7275 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 7463 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
@@ -7340,7 +7528,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info) | |||
7340 | int mixed = 0; | 7528 | int mixed = 0; |
7341 | int ret; | 7529 | int ret; |
7342 | 7530 | ||
7343 | disk_super = &fs_info->super_copy; | 7531 | disk_super = fs_info->super_copy; |
7344 | if (!btrfs_super_root(disk_super)) | 7532 | if (!btrfs_super_root(disk_super)) |
7345 | return 1; | 7533 | return 1; |
7346 | 7534 | ||