diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 336 |
1 files changed, 207 insertions, 129 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9879bd474632..f5fbe576d2ba 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -467,13 +467,59 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
467 | struct btrfs_root *root, | 467 | struct btrfs_root *root, |
468 | int load_cache_only) | 468 | int load_cache_only) |
469 | { | 469 | { |
470 | DEFINE_WAIT(wait); | ||
470 | struct btrfs_fs_info *fs_info = cache->fs_info; | 471 | struct btrfs_fs_info *fs_info = cache->fs_info; |
471 | struct btrfs_caching_control *caching_ctl; | 472 | struct btrfs_caching_control *caching_ctl; |
472 | int ret = 0; | 473 | int ret = 0; |
473 | 474 | ||
474 | smp_mb(); | 475 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); |
475 | if (cache->cached != BTRFS_CACHE_NO) | 476 | BUG_ON(!caching_ctl); |
477 | |||
478 | INIT_LIST_HEAD(&caching_ctl->list); | ||
479 | mutex_init(&caching_ctl->mutex); | ||
480 | init_waitqueue_head(&caching_ctl->wait); | ||
481 | caching_ctl->block_group = cache; | ||
482 | caching_ctl->progress = cache->key.objectid; | ||
483 | atomic_set(&caching_ctl->count, 1); | ||
484 | caching_ctl->work.func = caching_thread; | ||
485 | |||
486 | spin_lock(&cache->lock); | ||
487 | /* | ||
488 | * This should be a rare occasion, but this could happen I think in the | ||
489 | * case where one thread starts to load the space cache info, and then | ||
490 | * some other thread starts a transaction commit which tries to do an | ||
491 | * allocation while the other thread is still loading the space cache | ||
492 | * info. The previous loop should have kept us from choosing this block | ||
493 | * group, but if we've moved to the state where we will wait on caching | ||
494 | * block groups we need to first check if we're doing a fast load here, | ||
495 | * so we can wait for it to finish, otherwise we could end up allocating | ||
496 | * from a block group who's cache gets evicted for one reason or | ||
497 | * another. | ||
498 | */ | ||
499 | while (cache->cached == BTRFS_CACHE_FAST) { | ||
500 | struct btrfs_caching_control *ctl; | ||
501 | |||
502 | ctl = cache->caching_ctl; | ||
503 | atomic_inc(&ctl->count); | ||
504 | prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE); | ||
505 | spin_unlock(&cache->lock); | ||
506 | |||
507 | schedule(); | ||
508 | |||
509 | finish_wait(&ctl->wait, &wait); | ||
510 | put_caching_control(ctl); | ||
511 | spin_lock(&cache->lock); | ||
512 | } | ||
513 | |||
514 | if (cache->cached != BTRFS_CACHE_NO) { | ||
515 | spin_unlock(&cache->lock); | ||
516 | kfree(caching_ctl); | ||
476 | return 0; | 517 | return 0; |
518 | } | ||
519 | WARN_ON(cache->caching_ctl); | ||
520 | cache->caching_ctl = caching_ctl; | ||
521 | cache->cached = BTRFS_CACHE_FAST; | ||
522 | spin_unlock(&cache->lock); | ||
477 | 523 | ||
478 | /* | 524 | /* |
479 | * We can't do the read from on-disk cache during a commit since we need | 525 | * We can't do the read from on-disk cache during a commit since we need |
@@ -484,56 +530,51 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
484 | if (trans && (!trans->transaction->in_commit) && | 530 | if (trans && (!trans->transaction->in_commit) && |
485 | (root && root != root->fs_info->tree_root) && | 531 | (root && root != root->fs_info->tree_root) && |
486 | btrfs_test_opt(root, SPACE_CACHE)) { | 532 | btrfs_test_opt(root, SPACE_CACHE)) { |
487 | spin_lock(&cache->lock); | ||
488 | if (cache->cached != BTRFS_CACHE_NO) { | ||
489 | spin_unlock(&cache->lock); | ||
490 | return 0; | ||
491 | } | ||
492 | cache->cached = BTRFS_CACHE_STARTED; | ||
493 | spin_unlock(&cache->lock); | ||
494 | |||
495 | ret = load_free_space_cache(fs_info, cache); | 533 | ret = load_free_space_cache(fs_info, cache); |
496 | 534 | ||
497 | spin_lock(&cache->lock); | 535 | spin_lock(&cache->lock); |
498 | if (ret == 1) { | 536 | if (ret == 1) { |
537 | cache->caching_ctl = NULL; | ||
499 | cache->cached = BTRFS_CACHE_FINISHED; | 538 | cache->cached = BTRFS_CACHE_FINISHED; |
500 | cache->last_byte_to_unpin = (u64)-1; | 539 | cache->last_byte_to_unpin = (u64)-1; |
501 | } else { | 540 | } else { |
502 | cache->cached = BTRFS_CACHE_NO; | 541 | if (load_cache_only) { |
542 | cache->caching_ctl = NULL; | ||
543 | cache->cached = BTRFS_CACHE_NO; | ||
544 | } else { | ||
545 | cache->cached = BTRFS_CACHE_STARTED; | ||
546 | } | ||
503 | } | 547 | } |
504 | spin_unlock(&cache->lock); | 548 | spin_unlock(&cache->lock); |
549 | wake_up(&caching_ctl->wait); | ||
505 | if (ret == 1) { | 550 | if (ret == 1) { |
551 | put_caching_control(caching_ctl); | ||
506 | free_excluded_extents(fs_info->extent_root, cache); | 552 | free_excluded_extents(fs_info->extent_root, cache); |
507 | return 0; | 553 | return 0; |
508 | } | 554 | } |
555 | } else { | ||
556 | /* | ||
557 | * We are not going to do the fast caching, set cached to the | ||
558 | * appropriate value and wakeup any waiters. | ||
559 | */ | ||
560 | spin_lock(&cache->lock); | ||
561 | if (load_cache_only) { | ||
562 | cache->caching_ctl = NULL; | ||
563 | cache->cached = BTRFS_CACHE_NO; | ||
564 | } else { | ||
565 | cache->cached = BTRFS_CACHE_STARTED; | ||
566 | } | ||
567 | spin_unlock(&cache->lock); | ||
568 | wake_up(&caching_ctl->wait); | ||
509 | } | 569 | } |
510 | 570 | ||
511 | if (load_cache_only) | 571 | if (load_cache_only) { |
512 | return 0; | 572 | put_caching_control(caching_ctl); |
513 | |||
514 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); | ||
515 | BUG_ON(!caching_ctl); | ||
516 | |||
517 | INIT_LIST_HEAD(&caching_ctl->list); | ||
518 | mutex_init(&caching_ctl->mutex); | ||
519 | init_waitqueue_head(&caching_ctl->wait); | ||
520 | caching_ctl->block_group = cache; | ||
521 | caching_ctl->progress = cache->key.objectid; | ||
522 | /* one for caching kthread, one for caching block group list */ | ||
523 | atomic_set(&caching_ctl->count, 2); | ||
524 | caching_ctl->work.func = caching_thread; | ||
525 | |||
526 | spin_lock(&cache->lock); | ||
527 | if (cache->cached != BTRFS_CACHE_NO) { | ||
528 | spin_unlock(&cache->lock); | ||
529 | kfree(caching_ctl); | ||
530 | return 0; | 573 | return 0; |
531 | } | 574 | } |
532 | cache->caching_ctl = caching_ctl; | ||
533 | cache->cached = BTRFS_CACHE_STARTED; | ||
534 | spin_unlock(&cache->lock); | ||
535 | 575 | ||
536 | down_write(&fs_info->extent_commit_sem); | 576 | down_write(&fs_info->extent_commit_sem); |
577 | atomic_inc(&caching_ctl->count); | ||
537 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); | 578 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); |
538 | up_write(&fs_info->extent_commit_sem); | 579 | up_write(&fs_info->extent_commit_sem); |
539 | 580 | ||
@@ -2781,7 +2822,7 @@ out_free: | |||
2781 | btrfs_release_path(path); | 2822 | btrfs_release_path(path); |
2782 | out: | 2823 | out: |
2783 | spin_lock(&block_group->lock); | 2824 | spin_lock(&block_group->lock); |
2784 | if (!ret) | 2825 | if (!ret && dcs == BTRFS_DC_SETUP) |
2785 | block_group->cache_generation = trans->transid; | 2826 | block_group->cache_generation = trans->transid; |
2786 | block_group->disk_cache_state = dcs; | 2827 | block_group->disk_cache_state = dcs; |
2787 | spin_unlock(&block_group->lock); | 2828 | spin_unlock(&block_group->lock); |
@@ -3797,16 +3838,16 @@ void btrfs_free_block_rsv(struct btrfs_root *root, | |||
3797 | kfree(rsv); | 3838 | kfree(rsv); |
3798 | } | 3839 | } |
3799 | 3840 | ||
3800 | int btrfs_block_rsv_add(struct btrfs_root *root, | 3841 | static inline int __block_rsv_add(struct btrfs_root *root, |
3801 | struct btrfs_block_rsv *block_rsv, | 3842 | struct btrfs_block_rsv *block_rsv, |
3802 | u64 num_bytes) | 3843 | u64 num_bytes, int flush) |
3803 | { | 3844 | { |
3804 | int ret; | 3845 | int ret; |
3805 | 3846 | ||
3806 | if (num_bytes == 0) | 3847 | if (num_bytes == 0) |
3807 | return 0; | 3848 | return 0; |
3808 | 3849 | ||
3809 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); | 3850 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); |
3810 | if (!ret) { | 3851 | if (!ret) { |
3811 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | 3852 | block_rsv_add_bytes(block_rsv, num_bytes, 1); |
3812 | return 0; | 3853 | return 0; |
@@ -3815,22 +3856,18 @@ int btrfs_block_rsv_add(struct btrfs_root *root, | |||
3815 | return ret; | 3856 | return ret; |
3816 | } | 3857 | } |
3817 | 3858 | ||
3859 | int btrfs_block_rsv_add(struct btrfs_root *root, | ||
3860 | struct btrfs_block_rsv *block_rsv, | ||
3861 | u64 num_bytes) | ||
3862 | { | ||
3863 | return __block_rsv_add(root, block_rsv, num_bytes, 1); | ||
3864 | } | ||
3865 | |||
3818 | int btrfs_block_rsv_add_noflush(struct btrfs_root *root, | 3866 | int btrfs_block_rsv_add_noflush(struct btrfs_root *root, |
3819 | struct btrfs_block_rsv *block_rsv, | 3867 | struct btrfs_block_rsv *block_rsv, |
3820 | u64 num_bytes) | 3868 | u64 num_bytes) |
3821 | { | 3869 | { |
3822 | int ret; | 3870 | return __block_rsv_add(root, block_rsv, num_bytes, 0); |
3823 | |||
3824 | if (num_bytes == 0) | ||
3825 | return 0; | ||
3826 | |||
3827 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 0); | ||
3828 | if (!ret) { | ||
3829 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | ||
3830 | return 0; | ||
3831 | } | ||
3832 | |||
3833 | return ret; | ||
3834 | } | 3871 | } |
3835 | 3872 | ||
3836 | int btrfs_block_rsv_check(struct btrfs_root *root, | 3873 | int btrfs_block_rsv_check(struct btrfs_root *root, |
@@ -3851,9 +3888,9 @@ int btrfs_block_rsv_check(struct btrfs_root *root, | |||
3851 | return ret; | 3888 | return ret; |
3852 | } | 3889 | } |
3853 | 3890 | ||
3854 | int btrfs_block_rsv_refill(struct btrfs_root *root, | 3891 | static inline int __btrfs_block_rsv_refill(struct btrfs_root *root, |
3855 | struct btrfs_block_rsv *block_rsv, | 3892 | struct btrfs_block_rsv *block_rsv, |
3856 | u64 min_reserved) | 3893 | u64 min_reserved, int flush) |
3857 | { | 3894 | { |
3858 | u64 num_bytes = 0; | 3895 | u64 num_bytes = 0; |
3859 | int ret = -ENOSPC; | 3896 | int ret = -ENOSPC; |
@@ -3872,7 +3909,7 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, | |||
3872 | if (!ret) | 3909 | if (!ret) |
3873 | return 0; | 3910 | return 0; |
3874 | 3911 | ||
3875 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); | 3912 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); |
3876 | if (!ret) { | 3913 | if (!ret) { |
3877 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | 3914 | block_rsv_add_bytes(block_rsv, num_bytes, 0); |
3878 | return 0; | 3915 | return 0; |
@@ -3881,6 +3918,20 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, | |||
3881 | return ret; | 3918 | return ret; |
3882 | } | 3919 | } |
3883 | 3920 | ||
3921 | int btrfs_block_rsv_refill(struct btrfs_root *root, | ||
3922 | struct btrfs_block_rsv *block_rsv, | ||
3923 | u64 min_reserved) | ||
3924 | { | ||
3925 | return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1); | ||
3926 | } | ||
3927 | |||
3928 | int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, | ||
3929 | struct btrfs_block_rsv *block_rsv, | ||
3930 | u64 min_reserved) | ||
3931 | { | ||
3932 | return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0); | ||
3933 | } | ||
3934 | |||
3884 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 3935 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
3885 | struct btrfs_block_rsv *dst_rsv, | 3936 | struct btrfs_block_rsv *dst_rsv, |
3886 | u64 num_bytes) | 3937 | u64 num_bytes) |
@@ -4064,23 +4115,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
4064 | */ | 4115 | */ |
4065 | static unsigned drop_outstanding_extent(struct inode *inode) | 4116 | static unsigned drop_outstanding_extent(struct inode *inode) |
4066 | { | 4117 | { |
4118 | unsigned drop_inode_space = 0; | ||
4067 | unsigned dropped_extents = 0; | 4119 | unsigned dropped_extents = 0; |
4068 | 4120 | ||
4069 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); | 4121 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); |
4070 | BTRFS_I(inode)->outstanding_extents--; | 4122 | BTRFS_I(inode)->outstanding_extents--; |
4071 | 4123 | ||
4124 | if (BTRFS_I(inode)->outstanding_extents == 0 && | ||
4125 | BTRFS_I(inode)->delalloc_meta_reserved) { | ||
4126 | drop_inode_space = 1; | ||
4127 | BTRFS_I(inode)->delalloc_meta_reserved = 0; | ||
4128 | } | ||
4129 | |||
4072 | /* | 4130 | /* |
4073 | * If we have more or the same amount of outsanding extents than we have | 4131 | * If we have more or the same amount of outsanding extents than we have |
4074 | * reserved then we need to leave the reserved extents count alone. | 4132 | * reserved then we need to leave the reserved extents count alone. |
4075 | */ | 4133 | */ |
4076 | if (BTRFS_I(inode)->outstanding_extents >= | 4134 | if (BTRFS_I(inode)->outstanding_extents >= |
4077 | BTRFS_I(inode)->reserved_extents) | 4135 | BTRFS_I(inode)->reserved_extents) |
4078 | return 0; | 4136 | return drop_inode_space; |
4079 | 4137 | ||
4080 | dropped_extents = BTRFS_I(inode)->reserved_extents - | 4138 | dropped_extents = BTRFS_I(inode)->reserved_extents - |
4081 | BTRFS_I(inode)->outstanding_extents; | 4139 | BTRFS_I(inode)->outstanding_extents; |
4082 | BTRFS_I(inode)->reserved_extents -= dropped_extents; | 4140 | BTRFS_I(inode)->reserved_extents -= dropped_extents; |
4083 | return dropped_extents; | 4141 | return dropped_extents + drop_inode_space; |
4084 | } | 4142 | } |
4085 | 4143 | ||
4086 | /** | 4144 | /** |
@@ -4146,12 +4204,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4146 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4204 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4147 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 4205 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
4148 | u64 to_reserve = 0; | 4206 | u64 to_reserve = 0; |
4207 | u64 csum_bytes; | ||
4149 | unsigned nr_extents = 0; | 4208 | unsigned nr_extents = 0; |
4209 | int extra_reserve = 0; | ||
4150 | int flush = 1; | 4210 | int flush = 1; |
4151 | int ret; | 4211 | int ret; |
4152 | 4212 | ||
4213 | /* Need to be holding the i_mutex here if we aren't free space cache */ | ||
4153 | if (btrfs_is_free_space_inode(root, inode)) | 4214 | if (btrfs_is_free_space_inode(root, inode)) |
4154 | flush = 0; | 4215 | flush = 0; |
4216 | else | ||
4217 | WARN_ON(!mutex_is_locked(&inode->i_mutex)); | ||
4155 | 4218 | ||
4156 | if (flush && btrfs_transaction_in_commit(root->fs_info)) | 4219 | if (flush && btrfs_transaction_in_commit(root->fs_info)) |
4157 | schedule_timeout(1); | 4220 | schedule_timeout(1); |
@@ -4162,14 +4225,22 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4162 | BTRFS_I(inode)->outstanding_extents++; | 4225 | BTRFS_I(inode)->outstanding_extents++; |
4163 | 4226 | ||
4164 | if (BTRFS_I(inode)->outstanding_extents > | 4227 | if (BTRFS_I(inode)->outstanding_extents > |
4165 | BTRFS_I(inode)->reserved_extents) { | 4228 | BTRFS_I(inode)->reserved_extents) |
4166 | nr_extents = BTRFS_I(inode)->outstanding_extents - | 4229 | nr_extents = BTRFS_I(inode)->outstanding_extents - |
4167 | BTRFS_I(inode)->reserved_extents; | 4230 | BTRFS_I(inode)->reserved_extents; |
4168 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
4169 | 4231 | ||
4170 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | 4232 | /* |
4233 | * Add an item to reserve for updating the inode when we complete the | ||
4234 | * delalloc io. | ||
4235 | */ | ||
4236 | if (!BTRFS_I(inode)->delalloc_meta_reserved) { | ||
4237 | nr_extents++; | ||
4238 | extra_reserve = 1; | ||
4171 | } | 4239 | } |
4240 | |||
4241 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | ||
4172 | to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); | 4242 | to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); |
4243 | csum_bytes = BTRFS_I(inode)->csum_bytes; | ||
4173 | spin_unlock(&BTRFS_I(inode)->lock); | 4244 | spin_unlock(&BTRFS_I(inode)->lock); |
4174 | 4245 | ||
4175 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); | 4246 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); |
@@ -4179,22 +4250,35 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4179 | 4250 | ||
4180 | spin_lock(&BTRFS_I(inode)->lock); | 4251 | spin_lock(&BTRFS_I(inode)->lock); |
4181 | dropped = drop_outstanding_extent(inode); | 4252 | dropped = drop_outstanding_extent(inode); |
4182 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
4183 | spin_unlock(&BTRFS_I(inode)->lock); | ||
4184 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | ||
4185 | |||
4186 | /* | 4253 | /* |
4187 | * Somebody could have come in and twiddled with the | 4254 | * If the inodes csum_bytes is the same as the original |
4188 | * reservation, so if we have to free more than we would have | 4255 | * csum_bytes then we know we haven't raced with any free()ers |
4189 | * reserved from this reservation go ahead and release those | 4256 | * so we can just reduce our inodes csum bytes and carry on. |
4190 | * bytes. | 4257 | * Otherwise we have to do the normal free thing to account for |
4258 | * the case that the free side didn't free up its reserve | ||
4259 | * because of this outstanding reservation. | ||
4191 | */ | 4260 | */ |
4192 | to_free -= to_reserve; | 4261 | if (BTRFS_I(inode)->csum_bytes == csum_bytes) |
4262 | calc_csum_metadata_size(inode, num_bytes, 0); | ||
4263 | else | ||
4264 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
4265 | spin_unlock(&BTRFS_I(inode)->lock); | ||
4266 | if (dropped) | ||
4267 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | ||
4268 | |||
4193 | if (to_free) | 4269 | if (to_free) |
4194 | btrfs_block_rsv_release(root, block_rsv, to_free); | 4270 | btrfs_block_rsv_release(root, block_rsv, to_free); |
4195 | return ret; | 4271 | return ret; |
4196 | } | 4272 | } |
4197 | 4273 | ||
4274 | spin_lock(&BTRFS_I(inode)->lock); | ||
4275 | if (extra_reserve) { | ||
4276 | BTRFS_I(inode)->delalloc_meta_reserved = 1; | ||
4277 | nr_extents--; | ||
4278 | } | ||
4279 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
4280 | spin_unlock(&BTRFS_I(inode)->lock); | ||
4281 | |||
4198 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 4282 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
4199 | 4283 | ||
4200 | return 0; | 4284 | return 0; |
@@ -5040,11 +5124,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
5040 | struct btrfs_root *root = orig_root->fs_info->extent_root; | 5124 | struct btrfs_root *root = orig_root->fs_info->extent_root; |
5041 | struct btrfs_free_cluster *last_ptr = NULL; | 5125 | struct btrfs_free_cluster *last_ptr = NULL; |
5042 | struct btrfs_block_group_cache *block_group = NULL; | 5126 | struct btrfs_block_group_cache *block_group = NULL; |
5127 | struct btrfs_block_group_cache *used_block_group; | ||
5043 | int empty_cluster = 2 * 1024 * 1024; | 5128 | int empty_cluster = 2 * 1024 * 1024; |
5044 | int allowed_chunk_alloc = 0; | 5129 | int allowed_chunk_alloc = 0; |
5045 | int done_chunk_alloc = 0; | 5130 | int done_chunk_alloc = 0; |
5046 | struct btrfs_space_info *space_info; | 5131 | struct btrfs_space_info *space_info; |
5047 | int last_ptr_loop = 0; | ||
5048 | int loop = 0; | 5132 | int loop = 0; |
5049 | int index = 0; | 5133 | int index = 0; |
5050 | int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? | 5134 | int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? |
@@ -5106,6 +5190,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
5106 | ideal_cache: | 5190 | ideal_cache: |
5107 | block_group = btrfs_lookup_block_group(root->fs_info, | 5191 | block_group = btrfs_lookup_block_group(root->fs_info, |
5108 | search_start); | 5192 | search_start); |
5193 | used_block_group = block_group; | ||
5109 | /* | 5194 | /* |
5110 | * we don't want to use the block group if it doesn't match our | 5195 | * we don't want to use the block group if it doesn't match our |
5111 | * allocation bits, or if its not cached. | 5196 | * allocation bits, or if its not cached. |
@@ -5143,6 +5228,7 @@ search: | |||
5143 | u64 offset; | 5228 | u64 offset; |
5144 | int cached; | 5229 | int cached; |
5145 | 5230 | ||
5231 | used_block_group = block_group; | ||
5146 | btrfs_get_block_group(block_group); | 5232 | btrfs_get_block_group(block_group); |
5147 | search_start = block_group->key.objectid; | 5233 | search_start = block_group->key.objectid; |
5148 | 5234 | ||
@@ -5166,13 +5252,15 @@ search: | |||
5166 | } | 5252 | } |
5167 | 5253 | ||
5168 | have_block_group: | 5254 | have_block_group: |
5169 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { | 5255 | cached = block_group_cache_done(block_group); |
5256 | if (unlikely(!cached)) { | ||
5170 | u64 free_percent; | 5257 | u64 free_percent; |
5171 | 5258 | ||
5259 | found_uncached_bg = true; | ||
5172 | ret = cache_block_group(block_group, trans, | 5260 | ret = cache_block_group(block_group, trans, |
5173 | orig_root, 1); | 5261 | orig_root, 1); |
5174 | if (block_group->cached == BTRFS_CACHE_FINISHED) | 5262 | if (block_group->cached == BTRFS_CACHE_FINISHED) |
5175 | goto have_block_group; | 5263 | goto alloc; |
5176 | 5264 | ||
5177 | free_percent = btrfs_block_group_used(&block_group->item); | 5265 | free_percent = btrfs_block_group_used(&block_group->item); |
5178 | free_percent *= 100; | 5266 | free_percent *= 100; |
@@ -5194,7 +5282,6 @@ have_block_group: | |||
5194 | orig_root, 0); | 5282 | orig_root, 0); |
5195 | BUG_ON(ret); | 5283 | BUG_ON(ret); |
5196 | } | 5284 | } |
5197 | found_uncached_bg = true; | ||
5198 | 5285 | ||
5199 | /* | 5286 | /* |
5200 | * If loop is set for cached only, try the next block | 5287 | * If loop is set for cached only, try the next block |
@@ -5204,94 +5291,80 @@ have_block_group: | |||
5204 | goto loop; | 5291 | goto loop; |
5205 | } | 5292 | } |
5206 | 5293 | ||
5207 | cached = block_group_cache_done(block_group); | 5294 | alloc: |
5208 | if (unlikely(!cached)) | ||
5209 | found_uncached_bg = true; | ||
5210 | |||
5211 | if (unlikely(block_group->ro)) | 5295 | if (unlikely(block_group->ro)) |
5212 | goto loop; | 5296 | goto loop; |
5213 | 5297 | ||
5214 | spin_lock(&block_group->free_space_ctl->tree_lock); | 5298 | spin_lock(&block_group->free_space_ctl->tree_lock); |
5215 | if (cached && | 5299 | if (cached && |
5216 | block_group->free_space_ctl->free_space < | 5300 | block_group->free_space_ctl->free_space < |
5217 | num_bytes + empty_size) { | 5301 | num_bytes + empty_cluster + empty_size) { |
5218 | spin_unlock(&block_group->free_space_ctl->tree_lock); | 5302 | spin_unlock(&block_group->free_space_ctl->tree_lock); |
5219 | goto loop; | 5303 | goto loop; |
5220 | } | 5304 | } |
5221 | spin_unlock(&block_group->free_space_ctl->tree_lock); | 5305 | spin_unlock(&block_group->free_space_ctl->tree_lock); |
5222 | 5306 | ||
5223 | /* | 5307 | /* |
5224 | * Ok we want to try and use the cluster allocator, so lets look | 5308 | * Ok we want to try and use the cluster allocator, so |
5225 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will | 5309 | * lets look there |
5226 | * have tried the cluster allocator plenty of times at this | ||
5227 | * point and not have found anything, so we are likely way too | ||
5228 | * fragmented for the clustering stuff to find anything, so lets | ||
5229 | * just skip it and let the allocator find whatever block it can | ||
5230 | * find | ||
5231 | */ | 5310 | */ |
5232 | if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { | 5311 | if (last_ptr) { |
5233 | /* | 5312 | /* |
5234 | * the refill lock keeps out other | 5313 | * the refill lock keeps out other |
5235 | * people trying to start a new cluster | 5314 | * people trying to start a new cluster |
5236 | */ | 5315 | */ |
5237 | spin_lock(&last_ptr->refill_lock); | 5316 | spin_lock(&last_ptr->refill_lock); |
5238 | if (last_ptr->block_group && | 5317 | used_block_group = last_ptr->block_group; |
5239 | (last_ptr->block_group->ro || | 5318 | if (used_block_group != block_group && |
5240 | !block_group_bits(last_ptr->block_group, data))) { | 5319 | (!used_block_group || |
5241 | offset = 0; | 5320 | used_block_group->ro || |
5321 | !block_group_bits(used_block_group, data))) { | ||
5322 | used_block_group = block_group; | ||
5242 | goto refill_cluster; | 5323 | goto refill_cluster; |
5243 | } | 5324 | } |
5244 | 5325 | ||
5245 | offset = btrfs_alloc_from_cluster(block_group, last_ptr, | 5326 | if (used_block_group != block_group) |
5246 | num_bytes, search_start); | 5327 | btrfs_get_block_group(used_block_group); |
5328 | |||
5329 | offset = btrfs_alloc_from_cluster(used_block_group, | ||
5330 | last_ptr, num_bytes, used_block_group->key.objectid); | ||
5247 | if (offset) { | 5331 | if (offset) { |
5248 | /* we have a block, we're done */ | 5332 | /* we have a block, we're done */ |
5249 | spin_unlock(&last_ptr->refill_lock); | 5333 | spin_unlock(&last_ptr->refill_lock); |
5250 | goto checks; | 5334 | goto checks; |
5251 | } | 5335 | } |
5252 | 5336 | ||
5253 | spin_lock(&last_ptr->lock); | 5337 | WARN_ON(last_ptr->block_group != used_block_group); |
5254 | /* | 5338 | if (used_block_group != block_group) { |
5255 | * whoops, this cluster doesn't actually point to | 5339 | btrfs_put_block_group(used_block_group); |
5256 | * this block group. Get a ref on the block | 5340 | used_block_group = block_group; |
5257 | * group is does point to and try again | ||
5258 | */ | ||
5259 | if (!last_ptr_loop && last_ptr->block_group && | ||
5260 | last_ptr->block_group != block_group && | ||
5261 | index <= | ||
5262 | get_block_group_index(last_ptr->block_group)) { | ||
5263 | |||
5264 | btrfs_put_block_group(block_group); | ||
5265 | block_group = last_ptr->block_group; | ||
5266 | btrfs_get_block_group(block_group); | ||
5267 | spin_unlock(&last_ptr->lock); | ||
5268 | spin_unlock(&last_ptr->refill_lock); | ||
5269 | |||
5270 | last_ptr_loop = 1; | ||
5271 | search_start = block_group->key.objectid; | ||
5272 | /* | ||
5273 | * we know this block group is properly | ||
5274 | * in the list because | ||
5275 | * btrfs_remove_block_group, drops the | ||
5276 | * cluster before it removes the block | ||
5277 | * group from the list | ||
5278 | */ | ||
5279 | goto have_block_group; | ||
5280 | } | 5341 | } |
5281 | spin_unlock(&last_ptr->lock); | ||
5282 | refill_cluster: | 5342 | refill_cluster: |
5343 | BUG_ON(used_block_group != block_group); | ||
5344 | /* If we are on LOOP_NO_EMPTY_SIZE, we can't | ||
5345 | * set up a new clusters, so lets just skip it | ||
5346 | * and let the allocator find whatever block | ||
5347 | * it can find. If we reach this point, we | ||
5348 | * will have tried the cluster allocator | ||
5349 | * plenty of times and not have found | ||
5350 | * anything, so we are likely way too | ||
5351 | * fragmented for the clustering stuff to find | ||
5352 | * anything. */ | ||
5353 | if (loop >= LOOP_NO_EMPTY_SIZE) { | ||
5354 | spin_unlock(&last_ptr->refill_lock); | ||
5355 | goto unclustered_alloc; | ||
5356 | } | ||
5357 | |||
5283 | /* | 5358 | /* |
5284 | * this cluster didn't work out, free it and | 5359 | * this cluster didn't work out, free it and |
5285 | * start over | 5360 | * start over |
5286 | */ | 5361 | */ |
5287 | btrfs_return_cluster_to_free_space(NULL, last_ptr); | 5362 | btrfs_return_cluster_to_free_space(NULL, last_ptr); |
5288 | 5363 | ||
5289 | last_ptr_loop = 0; | ||
5290 | |||
5291 | /* allocate a cluster in this block group */ | 5364 | /* allocate a cluster in this block group */ |
5292 | ret = btrfs_find_space_cluster(trans, root, | 5365 | ret = btrfs_find_space_cluster(trans, root, |
5293 | block_group, last_ptr, | 5366 | block_group, last_ptr, |
5294 | offset, num_bytes, | 5367 | search_start, num_bytes, |
5295 | empty_cluster + empty_size); | 5368 | empty_cluster + empty_size); |
5296 | if (ret == 0) { | 5369 | if (ret == 0) { |
5297 | /* | 5370 | /* |
@@ -5327,6 +5400,7 @@ refill_cluster: | |||
5327 | goto loop; | 5400 | goto loop; |
5328 | } | 5401 | } |
5329 | 5402 | ||
5403 | unclustered_alloc: | ||
5330 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 5404 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
5331 | num_bytes, empty_size); | 5405 | num_bytes, empty_size); |
5332 | /* | 5406 | /* |
@@ -5353,14 +5427,14 @@ checks: | |||
5353 | search_start = stripe_align(root, offset); | 5427 | search_start = stripe_align(root, offset); |
5354 | /* move on to the next group */ | 5428 | /* move on to the next group */ |
5355 | if (search_start + num_bytes >= search_end) { | 5429 | if (search_start + num_bytes >= search_end) { |
5356 | btrfs_add_free_space(block_group, offset, num_bytes); | 5430 | btrfs_add_free_space(used_block_group, offset, num_bytes); |
5357 | goto loop; | 5431 | goto loop; |
5358 | } | 5432 | } |
5359 | 5433 | ||
5360 | /* move on to the next group */ | 5434 | /* move on to the next group */ |
5361 | if (search_start + num_bytes > | 5435 | if (search_start + num_bytes > |
5362 | block_group->key.objectid + block_group->key.offset) { | 5436 | used_block_group->key.objectid + used_block_group->key.offset) { |
5363 | btrfs_add_free_space(block_group, offset, num_bytes); | 5437 | btrfs_add_free_space(used_block_group, offset, num_bytes); |
5364 | goto loop; | 5438 | goto loop; |
5365 | } | 5439 | } |
5366 | 5440 | ||
@@ -5368,14 +5442,14 @@ checks: | |||
5368 | ins->offset = num_bytes; | 5442 | ins->offset = num_bytes; |
5369 | 5443 | ||
5370 | if (offset < search_start) | 5444 | if (offset < search_start) |
5371 | btrfs_add_free_space(block_group, offset, | 5445 | btrfs_add_free_space(used_block_group, offset, |
5372 | search_start - offset); | 5446 | search_start - offset); |
5373 | BUG_ON(offset > search_start); | 5447 | BUG_ON(offset > search_start); |
5374 | 5448 | ||
5375 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, | 5449 | ret = btrfs_update_reserved_bytes(used_block_group, num_bytes, |
5376 | alloc_type); | 5450 | alloc_type); |
5377 | if (ret == -EAGAIN) { | 5451 | if (ret == -EAGAIN) { |
5378 | btrfs_add_free_space(block_group, offset, num_bytes); | 5452 | btrfs_add_free_space(used_block_group, offset, num_bytes); |
5379 | goto loop; | 5453 | goto loop; |
5380 | } | 5454 | } |
5381 | 5455 | ||
@@ -5384,15 +5458,19 @@ checks: | |||
5384 | ins->offset = num_bytes; | 5458 | ins->offset = num_bytes; |
5385 | 5459 | ||
5386 | if (offset < search_start) | 5460 | if (offset < search_start) |
5387 | btrfs_add_free_space(block_group, offset, | 5461 | btrfs_add_free_space(used_block_group, offset, |
5388 | search_start - offset); | 5462 | search_start - offset); |
5389 | BUG_ON(offset > search_start); | 5463 | BUG_ON(offset > search_start); |
5464 | if (used_block_group != block_group) | ||
5465 | btrfs_put_block_group(used_block_group); | ||
5390 | btrfs_put_block_group(block_group); | 5466 | btrfs_put_block_group(block_group); |
5391 | break; | 5467 | break; |
5392 | loop: | 5468 | loop: |
5393 | failed_cluster_refill = false; | 5469 | failed_cluster_refill = false; |
5394 | failed_alloc = false; | 5470 | failed_alloc = false; |
5395 | BUG_ON(index != get_block_group_index(block_group)); | 5471 | BUG_ON(index != get_block_group_index(block_group)); |
5472 | if (used_block_group != block_group) | ||
5473 | btrfs_put_block_group(used_block_group); | ||
5396 | btrfs_put_block_group(block_group); | 5474 | btrfs_put_block_group(block_group); |
5397 | } | 5475 | } |
5398 | up_read(&space_info->groups_sem); | 5476 | up_read(&space_info->groups_sem); |