diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 340 |
1 files changed, 272 insertions, 68 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index df472ab1b5ac..1204c8ef6f32 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/ratelimit.h> | 26 | #include <linux/ratelimit.h> |
27 | #include <linux/percpu_counter.h> | ||
27 | #include "compat.h" | 28 | #include "compat.h" |
28 | #include "hash.h" | 29 | #include "hash.h" |
29 | #include "ctree.h" | 30 | #include "ctree.h" |
@@ -2526,6 +2527,51 @@ static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq, | |||
2526 | return 0; | 2527 | return 0; |
2527 | } | 2528 | } |
2528 | 2529 | ||
2530 | static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) | ||
2531 | { | ||
2532 | u64 num_bytes; | ||
2533 | |||
2534 | num_bytes = heads * (sizeof(struct btrfs_extent_item) + | ||
2535 | sizeof(struct btrfs_extent_inline_ref)); | ||
2536 | if (!btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) | ||
2537 | num_bytes += heads * sizeof(struct btrfs_tree_block_info); | ||
2538 | |||
2539 | /* | ||
2540 | * We don't ever fill up leaves all the way so multiply by 2 just to be | ||
2541 | * closer to what we're really going to want to ouse. | ||
2542 | */ | ||
2543 | return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); | ||
2544 | } | ||
2545 | |||
2546 | int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, | ||
2547 | struct btrfs_root *root) | ||
2548 | { | ||
2549 | struct btrfs_block_rsv *global_rsv; | ||
2550 | u64 num_heads = trans->transaction->delayed_refs.num_heads_ready; | ||
2551 | u64 num_bytes; | ||
2552 | int ret = 0; | ||
2553 | |||
2554 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | ||
2555 | num_heads = heads_to_leaves(root, num_heads); | ||
2556 | if (num_heads > 1) | ||
2557 | num_bytes += (num_heads - 1) * root->leafsize; | ||
2558 | num_bytes <<= 1; | ||
2559 | global_rsv = &root->fs_info->global_block_rsv; | ||
2560 | |||
2561 | /* | ||
2562 | * If we can't allocate any more chunks lets make sure we have _lots_ of | ||
2563 | * wiggle room since running delayed refs can create more delayed refs. | ||
2564 | */ | ||
2565 | if (global_rsv->space_info->full) | ||
2566 | num_bytes <<= 1; | ||
2567 | |||
2568 | spin_lock(&global_rsv->lock); | ||
2569 | if (global_rsv->reserved <= num_bytes) | ||
2570 | ret = 1; | ||
2571 | spin_unlock(&global_rsv->lock); | ||
2572 | return ret; | ||
2573 | } | ||
2574 | |||
2529 | /* | 2575 | /* |
2530 | * this starts processing the delayed reference count updates and | 2576 | * this starts processing the delayed reference count updates and |
2531 | * extent insertions we have queued up so far. count can be | 2577 | * extent insertions we have queued up so far. count can be |
@@ -2573,7 +2619,8 @@ progress: | |||
2573 | old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); | 2619 | old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); |
2574 | if (old) { | 2620 | if (old) { |
2575 | DEFINE_WAIT(__wait); | 2621 | DEFINE_WAIT(__wait); |
2576 | if (delayed_refs->num_entries < 16348) | 2622 | if (delayed_refs->flushing || |
2623 | !btrfs_should_throttle_delayed_refs(trans, root)) | ||
2577 | return 0; | 2624 | return 0; |
2578 | 2625 | ||
2579 | prepare_to_wait(&delayed_refs->wait, &__wait, | 2626 | prepare_to_wait(&delayed_refs->wait, &__wait, |
@@ -2608,7 +2655,7 @@ again: | |||
2608 | 2655 | ||
2609 | while (1) { | 2656 | while (1) { |
2610 | if (!(run_all || run_most) && | 2657 | if (!(run_all || run_most) && |
2611 | delayed_refs->num_heads_ready < 64) | 2658 | !btrfs_should_throttle_delayed_refs(trans, root)) |
2612 | break; | 2659 | break; |
2613 | 2660 | ||
2614 | /* | 2661 | /* |
@@ -2629,6 +2676,7 @@ again: | |||
2629 | spin_unlock(&delayed_refs->lock); | 2676 | spin_unlock(&delayed_refs->lock); |
2630 | btrfs_abort_transaction(trans, root, ret); | 2677 | btrfs_abort_transaction(trans, root, ret); |
2631 | atomic_dec(&delayed_refs->procs_running_refs); | 2678 | atomic_dec(&delayed_refs->procs_running_refs); |
2679 | wake_up(&delayed_refs->wait); | ||
2632 | return ret; | 2680 | return ret; |
2633 | } | 2681 | } |
2634 | 2682 | ||
@@ -3310,6 +3358,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3310 | struct btrfs_space_info *found; | 3358 | struct btrfs_space_info *found; |
3311 | int i; | 3359 | int i; |
3312 | int factor; | 3360 | int factor; |
3361 | int ret; | ||
3313 | 3362 | ||
3314 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | 3363 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | |
3315 | BTRFS_BLOCK_GROUP_RAID10)) | 3364 | BTRFS_BLOCK_GROUP_RAID10)) |
@@ -3333,6 +3382,12 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3333 | if (!found) | 3382 | if (!found) |
3334 | return -ENOMEM; | 3383 | return -ENOMEM; |
3335 | 3384 | ||
3385 | ret = percpu_counter_init(&found->total_bytes_pinned, 0); | ||
3386 | if (ret) { | ||
3387 | kfree(found); | ||
3388 | return ret; | ||
3389 | } | ||
3390 | |||
3336 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) | 3391 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
3337 | INIT_LIST_HEAD(&found->block_groups[i]); | 3392 | INIT_LIST_HEAD(&found->block_groups[i]); |
3338 | init_rwsem(&found->groups_sem); | 3393 | init_rwsem(&found->groups_sem); |
@@ -3565,10 +3620,11 @@ alloc: | |||
3565 | } | 3620 | } |
3566 | 3621 | ||
3567 | /* | 3622 | /* |
3568 | * If we have less pinned bytes than we want to allocate then | 3623 | * If we don't have enough pinned space to deal with this |
3569 | * don't bother committing the transaction, it won't help us. | 3624 | * allocation don't bother committing the transaction. |
3570 | */ | 3625 | */ |
3571 | if (data_sinfo->bytes_pinned < bytes) | 3626 | if (percpu_counter_compare(&data_sinfo->total_bytes_pinned, |
3627 | bytes) < 0) | ||
3572 | committed = 1; | 3628 | committed = 1; |
3573 | spin_unlock(&data_sinfo->lock); | 3629 | spin_unlock(&data_sinfo->lock); |
3574 | 3630 | ||
@@ -3577,6 +3633,7 @@ commit_trans: | |||
3577 | if (!committed && | 3633 | if (!committed && |
3578 | !atomic_read(&root->fs_info->open_ioctl_trans)) { | 3634 | !atomic_read(&root->fs_info->open_ioctl_trans)) { |
3579 | committed = 1; | 3635 | committed = 1; |
3636 | |||
3580 | trans = btrfs_join_transaction(root); | 3637 | trans = btrfs_join_transaction(root); |
3581 | if (IS_ERR(trans)) | 3638 | if (IS_ERR(trans)) |
3582 | return PTR_ERR(trans); | 3639 | return PTR_ERR(trans); |
@@ -3609,6 +3666,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | |||
3609 | 3666 | ||
3610 | data_sinfo = root->fs_info->data_sinfo; | 3667 | data_sinfo = root->fs_info->data_sinfo; |
3611 | spin_lock(&data_sinfo->lock); | 3668 | spin_lock(&data_sinfo->lock); |
3669 | WARN_ON(data_sinfo->bytes_may_use < bytes); | ||
3612 | data_sinfo->bytes_may_use -= bytes; | 3670 | data_sinfo->bytes_may_use -= bytes; |
3613 | trace_btrfs_space_reservation(root->fs_info, "space_info", | 3671 | trace_btrfs_space_reservation(root->fs_info, "space_info", |
3614 | data_sinfo->flags, bytes, 0); | 3672 | data_sinfo->flags, bytes, 0); |
@@ -3886,12 +3944,11 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | |||
3886 | unsigned long nr_pages) | 3944 | unsigned long nr_pages) |
3887 | { | 3945 | { |
3888 | struct super_block *sb = root->fs_info->sb; | 3946 | struct super_block *sb = root->fs_info->sb; |
3889 | int started; | ||
3890 | 3947 | ||
3891 | /* If we can not start writeback, just sync all the delalloc file. */ | 3948 | if (down_read_trylock(&sb->s_umount)) { |
3892 | started = try_to_writeback_inodes_sb_nr(sb, nr_pages, | 3949 | writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE); |
3893 | WB_REASON_FS_FREE_SPACE); | 3950 | up_read(&sb->s_umount); |
3894 | if (!started) { | 3951 | } else { |
3895 | /* | 3952 | /* |
3896 | * We needn't worry the filesystem going from r/w to r/o though | 3953 | * We needn't worry the filesystem going from r/w to r/o though |
3897 | * we don't acquire ->s_umount mutex, because the filesystem | 3954 | * we don't acquire ->s_umount mutex, because the filesystem |
@@ -3899,9 +3956,9 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | |||
3899 | * the filesystem is readonly(all dirty pages are written to | 3956 | * the filesystem is readonly(all dirty pages are written to |
3900 | * the disk). | 3957 | * the disk). |
3901 | */ | 3958 | */ |
3902 | btrfs_start_delalloc_inodes(root, 0); | 3959 | btrfs_start_all_delalloc_inodes(root->fs_info, 0); |
3903 | if (!current->journal_info) | 3960 | if (!current->journal_info) |
3904 | btrfs_wait_ordered_extents(root, 0); | 3961 | btrfs_wait_all_ordered_extents(root->fs_info, 0); |
3905 | } | 3962 | } |
3906 | } | 3963 | } |
3907 | 3964 | ||
@@ -3931,7 +3988,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3931 | if (delalloc_bytes == 0) { | 3988 | if (delalloc_bytes == 0) { |
3932 | if (trans) | 3989 | if (trans) |
3933 | return; | 3990 | return; |
3934 | btrfs_wait_ordered_extents(root, 0); | 3991 | btrfs_wait_all_ordered_extents(root->fs_info, 0); |
3935 | return; | 3992 | return; |
3936 | } | 3993 | } |
3937 | 3994 | ||
@@ -3959,7 +4016,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3959 | 4016 | ||
3960 | loops++; | 4017 | loops++; |
3961 | if (wait_ordered && !trans) { | 4018 | if (wait_ordered && !trans) { |
3962 | btrfs_wait_ordered_extents(root, 0); | 4019 | btrfs_wait_all_ordered_extents(root->fs_info, 0); |
3963 | } else { | 4020 | } else { |
3964 | time_left = schedule_timeout_killable(1); | 4021 | time_left = schedule_timeout_killable(1); |
3965 | if (time_left) | 4022 | if (time_left) |
@@ -3997,7 +4054,8 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
3997 | 4054 | ||
3998 | /* See if there is enough pinned space to make this reservation */ | 4055 | /* See if there is enough pinned space to make this reservation */ |
3999 | spin_lock(&space_info->lock); | 4056 | spin_lock(&space_info->lock); |
4000 | if (space_info->bytes_pinned >= bytes) { | 4057 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
4058 | bytes) >= 0) { | ||
4001 | spin_unlock(&space_info->lock); | 4059 | spin_unlock(&space_info->lock); |
4002 | goto commit; | 4060 | goto commit; |
4003 | } | 4061 | } |
@@ -4012,7 +4070,8 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
4012 | 4070 | ||
4013 | spin_lock(&space_info->lock); | 4071 | spin_lock(&space_info->lock); |
4014 | spin_lock(&delayed_rsv->lock); | 4072 | spin_lock(&delayed_rsv->lock); |
4015 | if (space_info->bytes_pinned + delayed_rsv->size < bytes) { | 4073 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
4074 | bytes - delayed_rsv->size) >= 0) { | ||
4016 | spin_unlock(&delayed_rsv->lock); | 4075 | spin_unlock(&delayed_rsv->lock); |
4017 | spin_unlock(&space_info->lock); | 4076 | spin_unlock(&space_info->lock); |
4018 | return -ENOSPC; | 4077 | return -ENOSPC; |
@@ -4297,6 +4356,31 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, | |||
4297 | spin_unlock(&block_rsv->lock); | 4356 | spin_unlock(&block_rsv->lock); |
4298 | } | 4357 | } |
4299 | 4358 | ||
4359 | int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, | ||
4360 | struct btrfs_block_rsv *dest, u64 num_bytes, | ||
4361 | int min_factor) | ||
4362 | { | ||
4363 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
4364 | u64 min_bytes; | ||
4365 | |||
4366 | if (global_rsv->space_info != dest->space_info) | ||
4367 | return -ENOSPC; | ||
4368 | |||
4369 | spin_lock(&global_rsv->lock); | ||
4370 | min_bytes = div_factor(global_rsv->size, min_factor); | ||
4371 | if (global_rsv->reserved < min_bytes + num_bytes) { | ||
4372 | spin_unlock(&global_rsv->lock); | ||
4373 | return -ENOSPC; | ||
4374 | } | ||
4375 | global_rsv->reserved -= num_bytes; | ||
4376 | if (global_rsv->reserved < global_rsv->size) | ||
4377 | global_rsv->full = 0; | ||
4378 | spin_unlock(&global_rsv->lock); | ||
4379 | |||
4380 | block_rsv_add_bytes(dest, num_bytes, 1); | ||
4381 | return 0; | ||
4382 | } | ||
4383 | |||
4300 | static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, | 4384 | static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, |
4301 | struct btrfs_block_rsv *block_rsv, | 4385 | struct btrfs_block_rsv *block_rsv, |
4302 | struct btrfs_block_rsv *dest, u64 num_bytes) | 4386 | struct btrfs_block_rsv *dest, u64 num_bytes) |
@@ -5030,14 +5114,14 @@ static int update_block_group(struct btrfs_root *root, | |||
5030 | int factor; | 5114 | int factor; |
5031 | 5115 | ||
5032 | /* block accounting for super block */ | 5116 | /* block accounting for super block */ |
5033 | spin_lock(&info->delalloc_lock); | 5117 | spin_lock(&info->delalloc_root_lock); |
5034 | old_val = btrfs_super_bytes_used(info->super_copy); | 5118 | old_val = btrfs_super_bytes_used(info->super_copy); |
5035 | if (alloc) | 5119 | if (alloc) |
5036 | old_val += num_bytes; | 5120 | old_val += num_bytes; |
5037 | else | 5121 | else |
5038 | old_val -= num_bytes; | 5122 | old_val -= num_bytes; |
5039 | btrfs_set_super_bytes_used(info->super_copy, old_val); | 5123 | btrfs_set_super_bytes_used(info->super_copy, old_val); |
5040 | spin_unlock(&info->delalloc_lock); | 5124 | spin_unlock(&info->delalloc_root_lock); |
5041 | 5125 | ||
5042 | while (total) { | 5126 | while (total) { |
5043 | cache = btrfs_lookup_block_group(info, bytenr); | 5127 | cache = btrfs_lookup_block_group(info, bytenr); |
@@ -5189,6 +5273,80 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, | |||
5189 | return ret; | 5273 | return ret; |
5190 | } | 5274 | } |
5191 | 5275 | ||
5276 | static int __exclude_logged_extent(struct btrfs_root *root, u64 start, u64 num_bytes) | ||
5277 | { | ||
5278 | int ret; | ||
5279 | struct btrfs_block_group_cache *block_group; | ||
5280 | struct btrfs_caching_control *caching_ctl; | ||
5281 | |||
5282 | block_group = btrfs_lookup_block_group(root->fs_info, start); | ||
5283 | if (!block_group) | ||
5284 | return -EINVAL; | ||
5285 | |||
5286 | cache_block_group(block_group, 0); | ||
5287 | caching_ctl = get_caching_control(block_group); | ||
5288 | |||
5289 | if (!caching_ctl) { | ||
5290 | /* Logic error */ | ||
5291 | BUG_ON(!block_group_cache_done(block_group)); | ||
5292 | ret = btrfs_remove_free_space(block_group, start, num_bytes); | ||
5293 | } else { | ||
5294 | mutex_lock(&caching_ctl->mutex); | ||
5295 | |||
5296 | if (start >= caching_ctl->progress) { | ||
5297 | ret = add_excluded_extent(root, start, num_bytes); | ||
5298 | } else if (start + num_bytes <= caching_ctl->progress) { | ||
5299 | ret = btrfs_remove_free_space(block_group, | ||
5300 | start, num_bytes); | ||
5301 | } else { | ||
5302 | num_bytes = caching_ctl->progress - start; | ||
5303 | ret = btrfs_remove_free_space(block_group, | ||
5304 | start, num_bytes); | ||
5305 | if (ret) | ||
5306 | goto out_lock; | ||
5307 | |||
5308 | num_bytes = (start + num_bytes) - | ||
5309 | caching_ctl->progress; | ||
5310 | start = caching_ctl->progress; | ||
5311 | ret = add_excluded_extent(root, start, num_bytes); | ||
5312 | } | ||
5313 | out_lock: | ||
5314 | mutex_unlock(&caching_ctl->mutex); | ||
5315 | put_caching_control(caching_ctl); | ||
5316 | } | ||
5317 | btrfs_put_block_group(block_group); | ||
5318 | return ret; | ||
5319 | } | ||
5320 | |||
5321 | int btrfs_exclude_logged_extents(struct btrfs_root *log, | ||
5322 | struct extent_buffer *eb) | ||
5323 | { | ||
5324 | struct btrfs_file_extent_item *item; | ||
5325 | struct btrfs_key key; | ||
5326 | int found_type; | ||
5327 | int i; | ||
5328 | |||
5329 | if (!btrfs_fs_incompat(log->fs_info, MIXED_GROUPS)) | ||
5330 | return 0; | ||
5331 | |||
5332 | for (i = 0; i < btrfs_header_nritems(eb); i++) { | ||
5333 | btrfs_item_key_to_cpu(eb, &key, i); | ||
5334 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
5335 | continue; | ||
5336 | item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); | ||
5337 | found_type = btrfs_file_extent_type(eb, item); | ||
5338 | if (found_type == BTRFS_FILE_EXTENT_INLINE) | ||
5339 | continue; | ||
5340 | if (btrfs_file_extent_disk_bytenr(eb, item) == 0) | ||
5341 | continue; | ||
5342 | key.objectid = btrfs_file_extent_disk_bytenr(eb, item); | ||
5343 | key.offset = btrfs_file_extent_disk_num_bytes(eb, item); | ||
5344 | __exclude_logged_extent(log, key.objectid, key.offset); | ||
5345 | } | ||
5346 | |||
5347 | return 0; | ||
5348 | } | ||
5349 | |||
5192 | /** | 5350 | /** |
5193 | * btrfs_update_reserved_bytes - update the block_group and space info counters | 5351 | * btrfs_update_reserved_bytes - update the block_group and space info counters |
5194 | * @cache: The cache we are manipulating | 5352 | * @cache: The cache we are manipulating |
@@ -5251,6 +5409,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
5251 | struct btrfs_caching_control *next; | 5409 | struct btrfs_caching_control *next; |
5252 | struct btrfs_caching_control *caching_ctl; | 5410 | struct btrfs_caching_control *caching_ctl; |
5253 | struct btrfs_block_group_cache *cache; | 5411 | struct btrfs_block_group_cache *cache; |
5412 | struct btrfs_space_info *space_info; | ||
5254 | 5413 | ||
5255 | down_write(&fs_info->extent_commit_sem); | 5414 | down_write(&fs_info->extent_commit_sem); |
5256 | 5415 | ||
@@ -5273,6 +5432,9 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
5273 | 5432 | ||
5274 | up_write(&fs_info->extent_commit_sem); | 5433 | up_write(&fs_info->extent_commit_sem); |
5275 | 5434 | ||
5435 | list_for_each_entry_rcu(space_info, &fs_info->space_info, list) | ||
5436 | percpu_counter_set(&space_info->total_bytes_pinned, 0); | ||
5437 | |||
5276 | update_global_block_rsv(fs_info); | 5438 | update_global_block_rsv(fs_info); |
5277 | } | 5439 | } |
5278 | 5440 | ||
@@ -5370,6 +5532,27 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
5370 | return 0; | 5532 | return 0; |
5371 | } | 5533 | } |
5372 | 5534 | ||
5535 | static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes, | ||
5536 | u64 owner, u64 root_objectid) | ||
5537 | { | ||
5538 | struct btrfs_space_info *space_info; | ||
5539 | u64 flags; | ||
5540 | |||
5541 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | ||
5542 | if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID) | ||
5543 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
5544 | else | ||
5545 | flags = BTRFS_BLOCK_GROUP_METADATA; | ||
5546 | } else { | ||
5547 | flags = BTRFS_BLOCK_GROUP_DATA; | ||
5548 | } | ||
5549 | |||
5550 | space_info = __find_space_info(fs_info, flags); | ||
5551 | BUG_ON(!space_info); /* Logic bug */ | ||
5552 | percpu_counter_add(&space_info->total_bytes_pinned, num_bytes); | ||
5553 | } | ||
5554 | |||
5555 | |||
5373 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 5556 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
5374 | struct btrfs_root *root, | 5557 | struct btrfs_root *root, |
5375 | u64 bytenr, u64 num_bytes, u64 parent, | 5558 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -5590,6 +5773,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5590 | goto out; | 5773 | goto out; |
5591 | } | 5774 | } |
5592 | } | 5775 | } |
5776 | add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid, | ||
5777 | root_objectid); | ||
5593 | } else { | 5778 | } else { |
5594 | if (found_extent) { | 5779 | if (found_extent) { |
5595 | BUG_ON(is_data && refs_to_drop != | 5780 | BUG_ON(is_data && refs_to_drop != |
@@ -5713,6 +5898,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
5713 | u64 parent, int last_ref) | 5898 | u64 parent, int last_ref) |
5714 | { | 5899 | { |
5715 | struct btrfs_block_group_cache *cache = NULL; | 5900 | struct btrfs_block_group_cache *cache = NULL; |
5901 | int pin = 1; | ||
5716 | int ret; | 5902 | int ret; |
5717 | 5903 | ||
5718 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | 5904 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
@@ -5745,8 +5931,14 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
5745 | 5931 | ||
5746 | btrfs_add_free_space(cache, buf->start, buf->len); | 5932 | btrfs_add_free_space(cache, buf->start, buf->len); |
5747 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); | 5933 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); |
5934 | pin = 0; | ||
5748 | } | 5935 | } |
5749 | out: | 5936 | out: |
5937 | if (pin) | ||
5938 | add_pinned_bytes(root->fs_info, buf->len, | ||
5939 | btrfs_header_level(buf), | ||
5940 | root->root_key.objectid); | ||
5941 | |||
5750 | /* | 5942 | /* |
5751 | * Deleting the buffer, clear the corrupt flag since it doesn't matter | 5943 | * Deleting the buffer, clear the corrupt flag since it doesn't matter |
5752 | * anymore. | 5944 | * anymore. |
@@ -5763,6 +5955,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
5763 | int ret; | 5955 | int ret; |
5764 | struct btrfs_fs_info *fs_info = root->fs_info; | 5956 | struct btrfs_fs_info *fs_info = root->fs_info; |
5765 | 5957 | ||
5958 | add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); | ||
5959 | |||
5766 | /* | 5960 | /* |
5767 | * tree log blocks never actually go into the extent allocation | 5961 | * tree log blocks never actually go into the extent allocation |
5768 | * tree, just update pinning info and exit early. | 5962 | * tree, just update pinning info and exit early. |
@@ -6560,52 +6754,26 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
6560 | { | 6754 | { |
6561 | int ret; | 6755 | int ret; |
6562 | struct btrfs_block_group_cache *block_group; | 6756 | struct btrfs_block_group_cache *block_group; |
6563 | struct btrfs_caching_control *caching_ctl; | ||
6564 | u64 start = ins->objectid; | ||
6565 | u64 num_bytes = ins->offset; | ||
6566 | |||
6567 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | ||
6568 | cache_block_group(block_group, 0); | ||
6569 | caching_ctl = get_caching_control(block_group); | ||
6570 | |||
6571 | if (!caching_ctl) { | ||
6572 | BUG_ON(!block_group_cache_done(block_group)); | ||
6573 | ret = btrfs_remove_free_space(block_group, start, num_bytes); | ||
6574 | if (ret) | ||
6575 | goto out; | ||
6576 | } else { | ||
6577 | mutex_lock(&caching_ctl->mutex); | ||
6578 | |||
6579 | if (start >= caching_ctl->progress) { | ||
6580 | ret = add_excluded_extent(root, start, num_bytes); | ||
6581 | } else if (start + num_bytes <= caching_ctl->progress) { | ||
6582 | ret = btrfs_remove_free_space(block_group, | ||
6583 | start, num_bytes); | ||
6584 | } else { | ||
6585 | num_bytes = caching_ctl->progress - start; | ||
6586 | ret = btrfs_remove_free_space(block_group, | ||
6587 | start, num_bytes); | ||
6588 | if (ret) | ||
6589 | goto out_lock; | ||
6590 | 6757 | ||
6591 | start = caching_ctl->progress; | 6758 | /* |
6592 | num_bytes = ins->objectid + ins->offset - | 6759 | * Mixed block groups will exclude before processing the log so we only |
6593 | caching_ctl->progress; | 6760 | * need to do the exlude dance if this fs isn't mixed. |
6594 | ret = add_excluded_extent(root, start, num_bytes); | 6761 | */ |
6595 | } | 6762 | if (!btrfs_fs_incompat(root->fs_info, MIXED_GROUPS)) { |
6596 | out_lock: | 6763 | ret = __exclude_logged_extent(root, ins->objectid, ins->offset); |
6597 | mutex_unlock(&caching_ctl->mutex); | ||
6598 | put_caching_control(caching_ctl); | ||
6599 | if (ret) | 6764 | if (ret) |
6600 | goto out; | 6765 | return ret; |
6601 | } | 6766 | } |
6602 | 6767 | ||
6768 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | ||
6769 | if (!block_group) | ||
6770 | return -EINVAL; | ||
6771 | |||
6603 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, | 6772 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, |
6604 | RESERVE_ALLOC_NO_ACCOUNT); | 6773 | RESERVE_ALLOC_NO_ACCOUNT); |
6605 | BUG_ON(ret); /* logic error */ | 6774 | BUG_ON(ret); /* logic error */ |
6606 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 6775 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
6607 | 0, owner, offset, ins, 1); | 6776 | 0, owner, offset, ins, 1); |
6608 | out: | ||
6609 | btrfs_put_block_group(block_group); | 6777 | btrfs_put_block_group(block_group); |
6610 | return ret; | 6778 | return ret; |
6611 | } | 6779 | } |
@@ -7298,6 +7466,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7298 | int err = 0; | 7466 | int err = 0; |
7299 | int ret; | 7467 | int ret; |
7300 | int level; | 7468 | int level; |
7469 | bool root_dropped = false; | ||
7301 | 7470 | ||
7302 | path = btrfs_alloc_path(); | 7471 | path = btrfs_alloc_path(); |
7303 | if (!path) { | 7472 | if (!path) { |
@@ -7355,6 +7524,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7355 | while (1) { | 7524 | while (1) { |
7356 | btrfs_tree_lock(path->nodes[level]); | 7525 | btrfs_tree_lock(path->nodes[level]); |
7357 | btrfs_set_lock_blocking(path->nodes[level]); | 7526 | btrfs_set_lock_blocking(path->nodes[level]); |
7527 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; | ||
7358 | 7528 | ||
7359 | ret = btrfs_lookup_extent_info(trans, root, | 7529 | ret = btrfs_lookup_extent_info(trans, root, |
7360 | path->nodes[level]->start, | 7530 | path->nodes[level]->start, |
@@ -7370,6 +7540,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7370 | break; | 7540 | break; |
7371 | 7541 | ||
7372 | btrfs_tree_unlock(path->nodes[level]); | 7542 | btrfs_tree_unlock(path->nodes[level]); |
7543 | path->locks[level] = 0; | ||
7373 | WARN_ON(wc->refs[level] != 1); | 7544 | WARN_ON(wc->refs[level] != 1); |
7374 | level--; | 7545 | level--; |
7375 | } | 7546 | } |
@@ -7384,11 +7555,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7384 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | 7555 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); |
7385 | 7556 | ||
7386 | while (1) { | 7557 | while (1) { |
7387 | if (!for_reloc && btrfs_fs_closing(root->fs_info)) { | ||
7388 | pr_debug("btrfs: drop snapshot early exit\n"); | ||
7389 | err = -EAGAIN; | ||
7390 | goto out_end_trans; | ||
7391 | } | ||
7392 | 7558 | ||
7393 | ret = walk_down_tree(trans, root, path, wc); | 7559 | ret = walk_down_tree(trans, root, path, wc); |
7394 | if (ret < 0) { | 7560 | if (ret < 0) { |
@@ -7416,7 +7582,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7416 | } | 7582 | } |
7417 | 7583 | ||
7418 | BUG_ON(wc->level == 0); | 7584 | BUG_ON(wc->level == 0); |
7419 | if (btrfs_should_end_transaction(trans, tree_root)) { | 7585 | if (btrfs_should_end_transaction(trans, tree_root) || |
7586 | (!for_reloc && btrfs_need_cleaner_sleep(root))) { | ||
7420 | ret = btrfs_update_root(trans, tree_root, | 7587 | ret = btrfs_update_root(trans, tree_root, |
7421 | &root->root_key, | 7588 | &root->root_key, |
7422 | root_item); | 7589 | root_item); |
@@ -7427,6 +7594,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7427 | } | 7594 | } |
7428 | 7595 | ||
7429 | btrfs_end_transaction_throttle(trans, tree_root); | 7596 | btrfs_end_transaction_throttle(trans, tree_root); |
7597 | if (!for_reloc && btrfs_need_cleaner_sleep(root)) { | ||
7598 | pr_debug("btrfs: drop snapshot early exit\n"); | ||
7599 | err = -EAGAIN; | ||
7600 | goto out_free; | ||
7601 | } | ||
7602 | |||
7430 | trans = btrfs_start_transaction(tree_root, 0); | 7603 | trans = btrfs_start_transaction(tree_root, 0); |
7431 | if (IS_ERR(trans)) { | 7604 | if (IS_ERR(trans)) { |
7432 | err = PTR_ERR(trans); | 7605 | err = PTR_ERR(trans); |
@@ -7447,8 +7620,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7447 | } | 7620 | } |
7448 | 7621 | ||
7449 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { | 7622 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
7450 | ret = btrfs_find_last_root(tree_root, root->root_key.objectid, | 7623 | ret = btrfs_find_root(tree_root, &root->root_key, path, |
7451 | NULL, NULL); | 7624 | NULL, NULL); |
7452 | if (ret < 0) { | 7625 | if (ret < 0) { |
7453 | btrfs_abort_transaction(trans, tree_root, ret); | 7626 | btrfs_abort_transaction(trans, tree_root, ret); |
7454 | err = ret; | 7627 | err = ret; |
@@ -7465,18 +7638,28 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7465 | } | 7638 | } |
7466 | 7639 | ||
7467 | if (root->in_radix) { | 7640 | if (root->in_radix) { |
7468 | btrfs_free_fs_root(tree_root->fs_info, root); | 7641 | btrfs_drop_and_free_fs_root(tree_root->fs_info, root); |
7469 | } else { | 7642 | } else { |
7470 | free_extent_buffer(root->node); | 7643 | free_extent_buffer(root->node); |
7471 | free_extent_buffer(root->commit_root); | 7644 | free_extent_buffer(root->commit_root); |
7472 | kfree(root); | 7645 | btrfs_put_fs_root(root); |
7473 | } | 7646 | } |
7647 | root_dropped = true; | ||
7474 | out_end_trans: | 7648 | out_end_trans: |
7475 | btrfs_end_transaction_throttle(trans, tree_root); | 7649 | btrfs_end_transaction_throttle(trans, tree_root); |
7476 | out_free: | 7650 | out_free: |
7477 | kfree(wc); | 7651 | kfree(wc); |
7478 | btrfs_free_path(path); | 7652 | btrfs_free_path(path); |
7479 | out: | 7653 | out: |
7654 | /* | ||
7655 | * So if we need to stop dropping the snapshot for whatever reason we | ||
7656 | * need to make sure to add it back to the dead root list so that we | ||
7657 | * keep trying to do the work later. This also cleans up roots if we | ||
7658 | * don't have it in the radix (like when we recover after a power fail | ||
7659 | * or unmount) so we don't leak memory. | ||
7660 | */ | ||
7661 | if (root_dropped == false) | ||
7662 | btrfs_add_dead_root(root); | ||
7480 | if (err) | 7663 | if (err) |
7481 | btrfs_std_error(root->fs_info, err); | 7664 | btrfs_std_error(root->fs_info, err); |
7482 | return err; | 7665 | return err; |
@@ -7782,6 +7965,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7782 | struct btrfs_space_info *space_info; | 7965 | struct btrfs_space_info *space_info; |
7783 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 7966 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
7784 | struct btrfs_device *device; | 7967 | struct btrfs_device *device; |
7968 | struct btrfs_trans_handle *trans; | ||
7785 | u64 min_free; | 7969 | u64 min_free; |
7786 | u64 dev_min = 1; | 7970 | u64 dev_min = 1; |
7787 | u64 dev_nr = 0; | 7971 | u64 dev_nr = 0; |
@@ -7868,6 +8052,13 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7868 | do_div(min_free, dev_min); | 8052 | do_div(min_free, dev_min); |
7869 | } | 8053 | } |
7870 | 8054 | ||
8055 | /* We need to do this so that we can look at pending chunks */ | ||
8056 | trans = btrfs_join_transaction(root); | ||
8057 | if (IS_ERR(trans)) { | ||
8058 | ret = PTR_ERR(trans); | ||
8059 | goto out; | ||
8060 | } | ||
8061 | |||
7871 | mutex_lock(&root->fs_info->chunk_mutex); | 8062 | mutex_lock(&root->fs_info->chunk_mutex); |
7872 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 8063 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
7873 | u64 dev_offset; | 8064 | u64 dev_offset; |
@@ -7878,7 +8069,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7878 | */ | 8069 | */ |
7879 | if (device->total_bytes > device->bytes_used + min_free && | 8070 | if (device->total_bytes > device->bytes_used + min_free && |
7880 | !device->is_tgtdev_for_dev_replace) { | 8071 | !device->is_tgtdev_for_dev_replace) { |
7881 | ret = find_free_dev_extent(device, min_free, | 8072 | ret = find_free_dev_extent(trans, device, min_free, |
7882 | &dev_offset, NULL); | 8073 | &dev_offset, NULL); |
7883 | if (!ret) | 8074 | if (!ret) |
7884 | dev_nr++; | 8075 | dev_nr++; |
@@ -7890,6 +8081,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7890 | } | 8081 | } |
7891 | } | 8082 | } |
7892 | mutex_unlock(&root->fs_info->chunk_mutex); | 8083 | mutex_unlock(&root->fs_info->chunk_mutex); |
8084 | btrfs_end_transaction(trans, root); | ||
7893 | out: | 8085 | out: |
7894 | btrfs_put_block_group(block_group); | 8086 | btrfs_put_block_group(block_group); |
7895 | return ret; | 8087 | return ret; |
@@ -8032,6 +8224,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
8032 | dump_space_info(space_info, 0, 0); | 8224 | dump_space_info(space_info, 0, 0); |
8033 | } | 8225 | } |
8034 | } | 8226 | } |
8227 | percpu_counter_destroy(&space_info->total_bytes_pinned); | ||
8035 | list_del(&space_info->list); | 8228 | list_del(&space_info->list); |
8036 | kfree(space_info); | 8229 | kfree(space_info); |
8037 | } | 8230 | } |
@@ -8254,6 +8447,10 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | |||
8254 | sizeof(item)); | 8447 | sizeof(item)); |
8255 | if (ret) | 8448 | if (ret) |
8256 | btrfs_abort_transaction(trans, extent_root, ret); | 8449 | btrfs_abort_transaction(trans, extent_root, ret); |
8450 | ret = btrfs_finish_chunk_alloc(trans, extent_root, | ||
8451 | key.objectid, key.offset); | ||
8452 | if (ret) | ||
8453 | btrfs_abort_transaction(trans, extent_root, ret); | ||
8257 | } | 8454 | } |
8258 | } | 8455 | } |
8259 | 8456 | ||
@@ -8591,8 +8788,15 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
8591 | if (end - start >= range->minlen) { | 8788 | if (end - start >= range->minlen) { |
8592 | if (!block_group_cache_done(cache)) { | 8789 | if (!block_group_cache_done(cache)) { |
8593 | ret = cache_block_group(cache, 0); | 8790 | ret = cache_block_group(cache, 0); |
8594 | if (!ret) | 8791 | if (ret) { |
8595 | wait_block_group_cache_done(cache); | 8792 | btrfs_put_block_group(cache); |
8793 | break; | ||
8794 | } | ||
8795 | ret = wait_block_group_cache_done(cache); | ||
8796 | if (ret) { | ||
8797 | btrfs_put_block_group(cache); | ||
8798 | break; | ||
8799 | } | ||
8596 | } | 8800 | } |
8597 | ret = btrfs_trim_block_group(cache, | 8801 | ret = btrfs_trim_block_group(cache, |
8598 | &group_trimmed, | 8802 | &group_trimmed, |