diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-31 21:27:32 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-31 21:27:32 -0400 |
commit | ba929b6646c5b87c7bb15cd8d3e51617725c983b (patch) | |
tree | 153731dffbd8353369fd37a846f2de91f6662717 /fs/btrfs | |
parent | c9b95e5961c0294e0efffeaa847c1a1e6369204c (diff) | |
parent | 8b8b08cbfb9021af4b54b4175fc4c51d655aac8c (diff) |
Merge branch 'for-linus-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason:
"This pull is dedicated to Josef's enospc rework, which we've been
testing for a few releases now. It fixes some early enospc problems
and is dramatically faster.
This also includes an updated fix for the delalloc accounting that
happens after a fault in copy_from_user. My patch in v4.7 was almost
but not quite enough"
* 'for-linus-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
Btrfs: fix delalloc accounting after copy_from_user faults
Btrfs: avoid deadlocks during reservations in btrfs_truncate_block
Btrfs: use FLUSH_LIMIT for relocation in reserve_metadata_bytes
Btrfs: fill relocation block rsv after allocation
Btrfs: always use trans->block_rsv for orphans
Btrfs: change how we calculate the global block rsv
Btrfs: use root when checking need_async_flush
Btrfs: don't bother kicking async if there's nothing to reclaim
Btrfs: fix release reserved extents trace points
Btrfs: add fsid to some tracepoints
Btrfs: add tracepoints for flush events
Btrfs: fix delalloc reservation amount tracepoint
Btrfs: trace pinned extents
Btrfs: introduce ticketed enospc infrastructure
Btrfs: add tracepoint for adding block groups
Btrfs: warn_on for unaccounted spaces
Btrfs: change delayed reservation fallback behavior
Btrfs: always reserve metadata for delalloc extents
Btrfs: fix callers of btrfs_block_rsv_migrate
Btrfs: add bytes_readonly to the spaceinfo at once
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/ctree.h | 15 | ||||
-rw-r--r-- | fs/btrfs/delayed-inode.c | 68 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 731 | ||||
-rw-r--r-- | fs/btrfs/file.c | 16 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 7 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 45 |
6 files changed, 544 insertions, 338 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b2620d1f883f..443fcc402114 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -439,6 +439,8 @@ struct btrfs_space_info { | |||
439 | struct list_head list; | 439 | struct list_head list; |
440 | /* Protected by the spinlock 'lock'. */ | 440 | /* Protected by the spinlock 'lock'. */ |
441 | struct list_head ro_bgs; | 441 | struct list_head ro_bgs; |
442 | struct list_head priority_tickets; | ||
443 | struct list_head tickets; | ||
442 | 444 | ||
443 | struct rw_semaphore groups_sem; | 445 | struct rw_semaphore groups_sem; |
444 | /* for block groups in our same type */ | 446 | /* for block groups in our same type */ |
@@ -2624,6 +2626,15 @@ enum btrfs_reserve_flush_enum { | |||
2624 | BTRFS_RESERVE_FLUSH_ALL, | 2626 | BTRFS_RESERVE_FLUSH_ALL, |
2625 | }; | 2627 | }; |
2626 | 2628 | ||
2629 | enum btrfs_flush_state { | ||
2630 | FLUSH_DELAYED_ITEMS_NR = 1, | ||
2631 | FLUSH_DELAYED_ITEMS = 2, | ||
2632 | FLUSH_DELALLOC = 3, | ||
2633 | FLUSH_DELALLOC_WAIT = 4, | ||
2634 | ALLOC_CHUNK = 5, | ||
2635 | COMMIT_TRANS = 6, | ||
2636 | }; | ||
2637 | |||
2627 | int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len); | 2638 | int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len); |
2628 | int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes); | 2639 | int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes); |
2629 | void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len); | 2640 | void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len); |
@@ -2661,8 +2672,8 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, | |||
2661 | struct btrfs_block_rsv *block_rsv, u64 min_reserved, | 2672 | struct btrfs_block_rsv *block_rsv, u64 min_reserved, |
2662 | enum btrfs_reserve_flush_enum flush); | 2673 | enum btrfs_reserve_flush_enum flush); |
2663 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 2674 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
2664 | struct btrfs_block_rsv *dst_rsv, | 2675 | struct btrfs_block_rsv *dst_rsv, u64 num_bytes, |
2665 | u64 num_bytes); | 2676 | int update_size); |
2666 | int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, | 2677 | int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, |
2667 | struct btrfs_block_rsv *dest, u64 num_bytes, | 2678 | struct btrfs_block_rsv *dest, u64 num_bytes, |
2668 | int min_factor); | 2679 | int min_factor); |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index d3aaabbfada0..dd3c040139a2 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -553,7 +553,7 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, | |||
553 | dst_rsv = &root->fs_info->delayed_block_rsv; | 553 | dst_rsv = &root->fs_info->delayed_block_rsv; |
554 | 554 | ||
555 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | 555 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
556 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); | 556 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); |
557 | if (!ret) { | 557 | if (!ret) { |
558 | trace_btrfs_space_reservation(root->fs_info, "delayed_item", | 558 | trace_btrfs_space_reservation(root->fs_info, "delayed_item", |
559 | item->key.objectid, | 559 | item->key.objectid, |
@@ -598,6 +598,29 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
598 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | 598 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
599 | 599 | ||
600 | /* | 600 | /* |
601 | * If our block_rsv is the delalloc block reserve then check and see if | ||
602 | * we have our extra reservation for updating the inode. If not fall | ||
603 | * through and try to reserve space quickly. | ||
604 | * | ||
605 | * We used to try and steal from the delalloc block rsv or the global | ||
606 | * reserve, but we'd steal a full reservation, which isn't kind. We are | ||
607 | * here through delalloc which means we've likely just cowed down close | ||
608 | * to the leaf that contains the inode, so we would steal less just | ||
609 | * doing the fallback inode update, so if we do end up having to steal | ||
610 | * from the global block rsv we hopefully only steal one or two blocks | ||
611 | * worth which is less likely to hurt us. | ||
612 | */ | ||
613 | if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) { | ||
614 | spin_lock(&BTRFS_I(inode)->lock); | ||
615 | if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, | ||
616 | &BTRFS_I(inode)->runtime_flags)) | ||
617 | release = true; | ||
618 | else | ||
619 | src_rsv = NULL; | ||
620 | spin_unlock(&BTRFS_I(inode)->lock); | ||
621 | } | ||
622 | |||
623 | /* | ||
601 | * btrfs_dirty_inode will update the inode under btrfs_join_transaction | 624 | * btrfs_dirty_inode will update the inode under btrfs_join_transaction |
602 | * which doesn't reserve space for speed. This is a problem since we | 625 | * which doesn't reserve space for speed. This is a problem since we |
603 | * still need to reserve space for this update, so try to reserve the | 626 | * still need to reserve space for this update, so try to reserve the |
@@ -626,51 +649,10 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
626 | num_bytes, 1); | 649 | num_bytes, 1); |
627 | } | 650 | } |
628 | return ret; | 651 | return ret; |
629 | } else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) { | ||
630 | spin_lock(&BTRFS_I(inode)->lock); | ||
631 | if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, | ||
632 | &BTRFS_I(inode)->runtime_flags)) { | ||
633 | spin_unlock(&BTRFS_I(inode)->lock); | ||
634 | release = true; | ||
635 | goto migrate; | ||
636 | } | ||
637 | spin_unlock(&BTRFS_I(inode)->lock); | ||
638 | |||
639 | /* Ok we didn't have space pre-reserved. This shouldn't happen | ||
640 | * too often but it can happen if we do delalloc to an existing | ||
641 | * inode which gets dirtied because of the time update, and then | ||
642 | * isn't touched again until after the transaction commits and | ||
643 | * then we try to write out the data. First try to be nice and | ||
644 | * reserve something strictly for us. If not be a pain and try | ||
645 | * to steal from the delalloc block rsv. | ||
646 | */ | ||
647 | ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, | ||
648 | BTRFS_RESERVE_NO_FLUSH); | ||
649 | if (!ret) | ||
650 | goto out; | ||
651 | |||
652 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); | ||
653 | if (!ret) | ||
654 | goto out; | ||
655 | |||
656 | if (btrfs_test_opt(root, ENOSPC_DEBUG)) { | ||
657 | btrfs_debug(root->fs_info, | ||
658 | "block rsv migrate returned %d", ret); | ||
659 | WARN_ON(1); | ||
660 | } | ||
661 | /* | ||
662 | * Ok this is a problem, let's just steal from the global rsv | ||
663 | * since this really shouldn't happen that often. | ||
664 | */ | ||
665 | ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv, | ||
666 | dst_rsv, num_bytes); | ||
667 | goto out; | ||
668 | } | 652 | } |
669 | 653 | ||
670 | migrate: | 654 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); |
671 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); | ||
672 | 655 | ||
673 | out: | ||
674 | /* | 656 | /* |
675 | * Migrate only takes a reservation, it doesn't touch the size of the | 657 | * Migrate only takes a reservation, it doesn't touch the size of the |
676 | * block_rsv. This is to simplify people who don't normally have things | 658 | * block_rsv. This is to simplify people who don't normally have things |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b480fd555774..e9376b1657e2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -111,6 +111,16 @@ static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | |||
111 | u64 num_bytes); | 111 | u64 num_bytes); |
112 | int btrfs_pin_extent(struct btrfs_root *root, | 112 | int btrfs_pin_extent(struct btrfs_root *root, |
113 | u64 bytenr, u64 num_bytes, int reserved); | 113 | u64 bytenr, u64 num_bytes, int reserved); |
114 | static int __reserve_metadata_bytes(struct btrfs_root *root, | ||
115 | struct btrfs_space_info *space_info, | ||
116 | u64 orig_bytes, | ||
117 | enum btrfs_reserve_flush_enum flush); | ||
118 | static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, | ||
119 | struct btrfs_space_info *space_info, | ||
120 | u64 num_bytes); | ||
121 | static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info, | ||
122 | struct btrfs_space_info *space_info, | ||
123 | u64 num_bytes); | ||
114 | 124 | ||
115 | static noinline int | 125 | static noinline int |
116 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 126 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
@@ -3913,6 +3923,7 @@ static const char *alloc_name(u64 flags) | |||
3913 | 3923 | ||
3914 | static int update_space_info(struct btrfs_fs_info *info, u64 flags, | 3924 | static int update_space_info(struct btrfs_fs_info *info, u64 flags, |
3915 | u64 total_bytes, u64 bytes_used, | 3925 | u64 total_bytes, u64 bytes_used, |
3926 | u64 bytes_readonly, | ||
3916 | struct btrfs_space_info **space_info) | 3927 | struct btrfs_space_info **space_info) |
3917 | { | 3928 | { |
3918 | struct btrfs_space_info *found; | 3929 | struct btrfs_space_info *found; |
@@ -3933,8 +3944,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3933 | found->disk_total += total_bytes * factor; | 3944 | found->disk_total += total_bytes * factor; |
3934 | found->bytes_used += bytes_used; | 3945 | found->bytes_used += bytes_used; |
3935 | found->disk_used += bytes_used * factor; | 3946 | found->disk_used += bytes_used * factor; |
3947 | found->bytes_readonly += bytes_readonly; | ||
3936 | if (total_bytes > 0) | 3948 | if (total_bytes > 0) |
3937 | found->full = 0; | 3949 | found->full = 0; |
3950 | space_info_add_new_bytes(info, found, total_bytes - | ||
3951 | bytes_used - bytes_readonly); | ||
3938 | spin_unlock(&found->lock); | 3952 | spin_unlock(&found->lock); |
3939 | *space_info = found; | 3953 | *space_info = found; |
3940 | return 0; | 3954 | return 0; |
@@ -3960,7 +3974,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3960 | found->disk_used = bytes_used * factor; | 3974 | found->disk_used = bytes_used * factor; |
3961 | found->bytes_pinned = 0; | 3975 | found->bytes_pinned = 0; |
3962 | found->bytes_reserved = 0; | 3976 | found->bytes_reserved = 0; |
3963 | found->bytes_readonly = 0; | 3977 | found->bytes_readonly = bytes_readonly; |
3964 | found->bytes_may_use = 0; | 3978 | found->bytes_may_use = 0; |
3965 | found->full = 0; | 3979 | found->full = 0; |
3966 | found->max_extent_size = 0; | 3980 | found->max_extent_size = 0; |
@@ -3969,6 +3983,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3969 | found->flush = 0; | 3983 | found->flush = 0; |
3970 | init_waitqueue_head(&found->wait); | 3984 | init_waitqueue_head(&found->wait); |
3971 | INIT_LIST_HEAD(&found->ro_bgs); | 3985 | INIT_LIST_HEAD(&found->ro_bgs); |
3986 | INIT_LIST_HEAD(&found->tickets); | ||
3987 | INIT_LIST_HEAD(&found->priority_tickets); | ||
3972 | 3988 | ||
3973 | ret = kobject_init_and_add(&found->kobj, &space_info_ktype, | 3989 | ret = kobject_init_and_add(&found->kobj, &space_info_ktype, |
3974 | info->space_info_kobj, "%s", | 3990 | info->space_info_kobj, "%s", |
@@ -4470,7 +4486,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
4470 | space_info = __find_space_info(extent_root->fs_info, flags); | 4486 | space_info = __find_space_info(extent_root->fs_info, flags); |
4471 | if (!space_info) { | 4487 | if (!space_info) { |
4472 | ret = update_space_info(extent_root->fs_info, flags, | 4488 | ret = update_space_info(extent_root->fs_info, flags, |
4473 | 0, 0, &space_info); | 4489 | 0, 0, 0, &space_info); |
4474 | BUG_ON(ret); /* -ENOMEM */ | 4490 | BUG_ON(ret); /* -ENOMEM */ |
4475 | } | 4491 | } |
4476 | BUG_ON(!space_info); /* Logic error */ | 4492 | BUG_ON(!space_info); /* Logic error */ |
@@ -4582,12 +4598,19 @@ static int can_overcommit(struct btrfs_root *root, | |||
4582 | struct btrfs_space_info *space_info, u64 bytes, | 4598 | struct btrfs_space_info *space_info, u64 bytes, |
4583 | enum btrfs_reserve_flush_enum flush) | 4599 | enum btrfs_reserve_flush_enum flush) |
4584 | { | 4600 | { |
4585 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | 4601 | struct btrfs_block_rsv *global_rsv; |
4586 | u64 profile = btrfs_get_alloc_profile(root, 0); | 4602 | u64 profile; |
4587 | u64 space_size; | 4603 | u64 space_size; |
4588 | u64 avail; | 4604 | u64 avail; |
4589 | u64 used; | 4605 | u64 used; |
4590 | 4606 | ||
4607 | /* Don't overcommit when in mixed mode. */ | ||
4608 | if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) | ||
4609 | return 0; | ||
4610 | |||
4611 | BUG_ON(root->fs_info == NULL); | ||
4612 | global_rsv = &root->fs_info->global_block_rsv; | ||
4613 | profile = btrfs_get_alloc_profile(root, 0); | ||
4591 | used = space_info->bytes_used + space_info->bytes_reserved + | 4614 | used = space_info->bytes_used + space_info->bytes_reserved + |
4592 | space_info->bytes_pinned + space_info->bytes_readonly; | 4615 | space_info->bytes_pinned + space_info->bytes_readonly; |
4593 | 4616 | ||
@@ -4739,6 +4762,11 @@ skip_async: | |||
4739 | spin_unlock(&space_info->lock); | 4762 | spin_unlock(&space_info->lock); |
4740 | break; | 4763 | break; |
4741 | } | 4764 | } |
4765 | if (list_empty(&space_info->tickets) && | ||
4766 | list_empty(&space_info->priority_tickets)) { | ||
4767 | spin_unlock(&space_info->lock); | ||
4768 | break; | ||
4769 | } | ||
4742 | spin_unlock(&space_info->lock); | 4770 | spin_unlock(&space_info->lock); |
4743 | 4771 | ||
4744 | loops++; | 4772 | loops++; |
@@ -4807,13 +4835,11 @@ commit: | |||
4807 | return btrfs_commit_transaction(trans, root); | 4835 | return btrfs_commit_transaction(trans, root); |
4808 | } | 4836 | } |
4809 | 4837 | ||
4810 | enum flush_state { | 4838 | struct reserve_ticket { |
4811 | FLUSH_DELAYED_ITEMS_NR = 1, | 4839 | u64 bytes; |
4812 | FLUSH_DELAYED_ITEMS = 2, | 4840 | int error; |
4813 | FLUSH_DELALLOC = 3, | 4841 | struct list_head list; |
4814 | FLUSH_DELALLOC_WAIT = 4, | 4842 | wait_queue_head_t wait; |
4815 | ALLOC_CHUNK = 5, | ||
4816 | COMMIT_TRANS = 6, | ||
4817 | }; | 4843 | }; |
4818 | 4844 | ||
4819 | static int flush_space(struct btrfs_root *root, | 4845 | static int flush_space(struct btrfs_root *root, |
@@ -4866,6 +4892,8 @@ static int flush_space(struct btrfs_root *root, | |||
4866 | break; | 4892 | break; |
4867 | } | 4893 | } |
4868 | 4894 | ||
4895 | trace_btrfs_flush_space(root->fs_info, space_info->flags, num_bytes, | ||
4896 | orig_bytes, state, ret); | ||
4869 | return ret; | 4897 | return ret; |
4870 | } | 4898 | } |
4871 | 4899 | ||
@@ -4873,17 +4901,22 @@ static inline u64 | |||
4873 | btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, | 4901 | btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, |
4874 | struct btrfs_space_info *space_info) | 4902 | struct btrfs_space_info *space_info) |
4875 | { | 4903 | { |
4904 | struct reserve_ticket *ticket; | ||
4876 | u64 used; | 4905 | u64 used; |
4877 | u64 expected; | 4906 | u64 expected; |
4878 | u64 to_reclaim; | 4907 | u64 to_reclaim = 0; |
4879 | 4908 | ||
4880 | to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); | 4909 | to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); |
4881 | spin_lock(&space_info->lock); | ||
4882 | if (can_overcommit(root, space_info, to_reclaim, | 4910 | if (can_overcommit(root, space_info, to_reclaim, |
4883 | BTRFS_RESERVE_FLUSH_ALL)) { | 4911 | BTRFS_RESERVE_FLUSH_ALL)) |
4884 | to_reclaim = 0; | 4912 | return 0; |
4885 | goto out; | 4913 | |
4886 | } | 4914 | list_for_each_entry(ticket, &space_info->tickets, list) |
4915 | to_reclaim += ticket->bytes; | ||
4916 | list_for_each_entry(ticket, &space_info->priority_tickets, list) | ||
4917 | to_reclaim += ticket->bytes; | ||
4918 | if (to_reclaim) | ||
4919 | return to_reclaim; | ||
4887 | 4920 | ||
4888 | used = space_info->bytes_used + space_info->bytes_reserved + | 4921 | used = space_info->bytes_used + space_info->bytes_reserved + |
4889 | space_info->bytes_pinned + space_info->bytes_readonly + | 4922 | space_info->bytes_pinned + space_info->bytes_readonly + |
@@ -4899,14 +4932,11 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, | |||
4899 | to_reclaim = 0; | 4932 | to_reclaim = 0; |
4900 | to_reclaim = min(to_reclaim, space_info->bytes_may_use + | 4933 | to_reclaim = min(to_reclaim, space_info->bytes_may_use + |
4901 | space_info->bytes_reserved); | 4934 | space_info->bytes_reserved); |
4902 | out: | ||
4903 | spin_unlock(&space_info->lock); | ||
4904 | |||
4905 | return to_reclaim; | 4935 | return to_reclaim; |
4906 | } | 4936 | } |
4907 | 4937 | ||
4908 | static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, | 4938 | static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, |
4909 | struct btrfs_fs_info *fs_info, u64 used) | 4939 | struct btrfs_root *root, u64 used) |
4910 | { | 4940 | { |
4911 | u64 thresh = div_factor_fine(space_info->total_bytes, 98); | 4941 | u64 thresh = div_factor_fine(space_info->total_bytes, 98); |
4912 | 4942 | ||
@@ -4914,73 +4944,177 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, | |||
4914 | if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) | 4944 | if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) |
4915 | return 0; | 4945 | return 0; |
4916 | 4946 | ||
4917 | return (used >= thresh && !btrfs_fs_closing(fs_info) && | 4947 | if (!btrfs_calc_reclaim_metadata_size(root, space_info)) |
4918 | !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); | 4948 | return 0; |
4949 | |||
4950 | return (used >= thresh && !btrfs_fs_closing(root->fs_info) && | ||
4951 | !test_bit(BTRFS_FS_STATE_REMOUNTING, | ||
4952 | &root->fs_info->fs_state)); | ||
4919 | } | 4953 | } |
4920 | 4954 | ||
4921 | static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info, | 4955 | static void wake_all_tickets(struct list_head *head) |
4922 | struct btrfs_fs_info *fs_info, | ||
4923 | int flush_state) | ||
4924 | { | 4956 | { |
4925 | u64 used; | 4957 | struct reserve_ticket *ticket; |
4926 | |||
4927 | spin_lock(&space_info->lock); | ||
4928 | /* | ||
4929 | * We run out of space and have not got any free space via flush_space, | ||
4930 | * so don't bother doing async reclaim. | ||
4931 | */ | ||
4932 | if (flush_state > COMMIT_TRANS && space_info->full) { | ||
4933 | spin_unlock(&space_info->lock); | ||
4934 | return 0; | ||
4935 | } | ||
4936 | 4958 | ||
4937 | used = space_info->bytes_used + space_info->bytes_reserved + | 4959 | while (!list_empty(head)) { |
4938 | space_info->bytes_pinned + space_info->bytes_readonly + | 4960 | ticket = list_first_entry(head, struct reserve_ticket, list); |
4939 | space_info->bytes_may_use; | 4961 | list_del_init(&ticket->list); |
4940 | if (need_do_async_reclaim(space_info, fs_info, used)) { | 4962 | ticket->error = -ENOSPC; |
4941 | spin_unlock(&space_info->lock); | 4963 | wake_up(&ticket->wait); |
4942 | return 1; | ||
4943 | } | 4964 | } |
4944 | spin_unlock(&space_info->lock); | ||
4945 | |||
4946 | return 0; | ||
4947 | } | 4965 | } |
4948 | 4966 | ||
4967 | /* | ||
4968 | * This is for normal flushers, we can wait all goddamned day if we want to. We | ||
4969 | * will loop and continuously try to flush as long as we are making progress. | ||
4970 | * We count progress as clearing off tickets each time we have to loop. | ||
4971 | */ | ||
4949 | static void btrfs_async_reclaim_metadata_space(struct work_struct *work) | 4972 | static void btrfs_async_reclaim_metadata_space(struct work_struct *work) |
4950 | { | 4973 | { |
4974 | struct reserve_ticket *last_ticket = NULL; | ||
4951 | struct btrfs_fs_info *fs_info; | 4975 | struct btrfs_fs_info *fs_info; |
4952 | struct btrfs_space_info *space_info; | 4976 | struct btrfs_space_info *space_info; |
4953 | u64 to_reclaim; | 4977 | u64 to_reclaim; |
4954 | int flush_state; | 4978 | int flush_state; |
4979 | int commit_cycles = 0; | ||
4955 | 4980 | ||
4956 | fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); | 4981 | fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); |
4957 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | 4982 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
4958 | 4983 | ||
4984 | spin_lock(&space_info->lock); | ||
4959 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, | 4985 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, |
4960 | space_info); | 4986 | space_info); |
4961 | if (!to_reclaim) | 4987 | if (!to_reclaim) { |
4988 | space_info->flush = 0; | ||
4989 | spin_unlock(&space_info->lock); | ||
4962 | return; | 4990 | return; |
4991 | } | ||
4992 | last_ticket = list_first_entry(&space_info->tickets, | ||
4993 | struct reserve_ticket, list); | ||
4994 | spin_unlock(&space_info->lock); | ||
4963 | 4995 | ||
4964 | flush_state = FLUSH_DELAYED_ITEMS_NR; | 4996 | flush_state = FLUSH_DELAYED_ITEMS_NR; |
4965 | do { | 4997 | do { |
4998 | struct reserve_ticket *ticket; | ||
4999 | int ret; | ||
5000 | |||
5001 | ret = flush_space(fs_info->fs_root, space_info, to_reclaim, | ||
5002 | to_reclaim, flush_state); | ||
5003 | spin_lock(&space_info->lock); | ||
5004 | if (list_empty(&space_info->tickets)) { | ||
5005 | space_info->flush = 0; | ||
5006 | spin_unlock(&space_info->lock); | ||
5007 | return; | ||
5008 | } | ||
5009 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, | ||
5010 | space_info); | ||
5011 | ticket = list_first_entry(&space_info->tickets, | ||
5012 | struct reserve_ticket, list); | ||
5013 | if (last_ticket == ticket) { | ||
5014 | flush_state++; | ||
5015 | } else { | ||
5016 | last_ticket = ticket; | ||
5017 | flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
5018 | if (commit_cycles) | ||
5019 | commit_cycles--; | ||
5020 | } | ||
5021 | |||
5022 | if (flush_state > COMMIT_TRANS) { | ||
5023 | commit_cycles++; | ||
5024 | if (commit_cycles > 2) { | ||
5025 | wake_all_tickets(&space_info->tickets); | ||
5026 | space_info->flush = 0; | ||
5027 | } else { | ||
5028 | flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
5029 | } | ||
5030 | } | ||
5031 | spin_unlock(&space_info->lock); | ||
5032 | } while (flush_state <= COMMIT_TRANS); | ||
5033 | } | ||
5034 | |||
5035 | void btrfs_init_async_reclaim_work(struct work_struct *work) | ||
5036 | { | ||
5037 | INIT_WORK(work, btrfs_async_reclaim_metadata_space); | ||
5038 | } | ||
5039 | |||
5040 | static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, | ||
5041 | struct btrfs_space_info *space_info, | ||
5042 | struct reserve_ticket *ticket) | ||
5043 | { | ||
5044 | u64 to_reclaim; | ||
5045 | int flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
5046 | |||
5047 | spin_lock(&space_info->lock); | ||
5048 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, | ||
5049 | space_info); | ||
5050 | if (!to_reclaim) { | ||
5051 | spin_unlock(&space_info->lock); | ||
5052 | return; | ||
5053 | } | ||
5054 | spin_unlock(&space_info->lock); | ||
5055 | |||
5056 | do { | ||
4966 | flush_space(fs_info->fs_root, space_info, to_reclaim, | 5057 | flush_space(fs_info->fs_root, space_info, to_reclaim, |
4967 | to_reclaim, flush_state); | 5058 | to_reclaim, flush_state); |
4968 | flush_state++; | 5059 | flush_state++; |
4969 | if (!btrfs_need_do_async_reclaim(space_info, fs_info, | 5060 | spin_lock(&space_info->lock); |
4970 | flush_state)) | 5061 | if (ticket->bytes == 0) { |
5062 | spin_unlock(&space_info->lock); | ||
4971 | return; | 5063 | return; |
5064 | } | ||
5065 | spin_unlock(&space_info->lock); | ||
5066 | |||
5067 | /* | ||
5068 | * Priority flushers can't wait on delalloc without | ||
5069 | * deadlocking. | ||
5070 | */ | ||
5071 | if (flush_state == FLUSH_DELALLOC || | ||
5072 | flush_state == FLUSH_DELALLOC_WAIT) | ||
5073 | flush_state = ALLOC_CHUNK; | ||
4972 | } while (flush_state < COMMIT_TRANS); | 5074 | } while (flush_state < COMMIT_TRANS); |
4973 | } | 5075 | } |
4974 | 5076 | ||
4975 | void btrfs_init_async_reclaim_work(struct work_struct *work) | 5077 | static int wait_reserve_ticket(struct btrfs_fs_info *fs_info, |
5078 | struct btrfs_space_info *space_info, | ||
5079 | struct reserve_ticket *ticket, u64 orig_bytes) | ||
5080 | |||
4976 | { | 5081 | { |
4977 | INIT_WORK(work, btrfs_async_reclaim_metadata_space); | 5082 | DEFINE_WAIT(wait); |
5083 | int ret = 0; | ||
5084 | |||
5085 | spin_lock(&space_info->lock); | ||
5086 | while (ticket->bytes > 0 && ticket->error == 0) { | ||
5087 | ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE); | ||
5088 | if (ret) { | ||
5089 | ret = -EINTR; | ||
5090 | break; | ||
5091 | } | ||
5092 | spin_unlock(&space_info->lock); | ||
5093 | |||
5094 | schedule(); | ||
5095 | |||
5096 | finish_wait(&ticket->wait, &wait); | ||
5097 | spin_lock(&space_info->lock); | ||
5098 | } | ||
5099 | if (!ret) | ||
5100 | ret = ticket->error; | ||
5101 | if (!list_empty(&ticket->list)) | ||
5102 | list_del_init(&ticket->list); | ||
5103 | if (ticket->bytes && ticket->bytes < orig_bytes) { | ||
5104 | u64 num_bytes = orig_bytes - ticket->bytes; | ||
5105 | space_info->bytes_may_use -= num_bytes; | ||
5106 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5107 | space_info->flags, num_bytes, 0); | ||
5108 | } | ||
5109 | spin_unlock(&space_info->lock); | ||
5110 | |||
5111 | return ret; | ||
4978 | } | 5112 | } |
4979 | 5113 | ||
4980 | /** | 5114 | /** |
4981 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | 5115 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space |
4982 | * @root - the root we're allocating for | 5116 | * @root - the root we're allocating for |
4983 | * @block_rsv - the block_rsv we're allocating for | 5117 | * @space_info - the space info we want to allocate from |
4984 | * @orig_bytes - the number of bytes we want | 5118 | * @orig_bytes - the number of bytes we want |
4985 | * @flush - whether or not we can flush to make our reservation | 5119 | * @flush - whether or not we can flush to make our reservation |
4986 | * | 5120 | * |
@@ -4991,81 +5125,36 @@ void btrfs_init_async_reclaim_work(struct work_struct *work) | |||
4991 | * regain reservations will be made and this will fail if there is not enough | 5125 | * regain reservations will be made and this will fail if there is not enough |
4992 | * space already. | 5126 | * space already. |
4993 | */ | 5127 | */ |
4994 | static int reserve_metadata_bytes(struct btrfs_root *root, | 5128 | static int __reserve_metadata_bytes(struct btrfs_root *root, |
4995 | struct btrfs_block_rsv *block_rsv, | 5129 | struct btrfs_space_info *space_info, |
4996 | u64 orig_bytes, | 5130 | u64 orig_bytes, |
4997 | enum btrfs_reserve_flush_enum flush) | 5131 | enum btrfs_reserve_flush_enum flush) |
4998 | { | 5132 | { |
4999 | struct btrfs_space_info *space_info = block_rsv->space_info; | 5133 | struct reserve_ticket ticket; |
5000 | u64 used; | 5134 | u64 used; |
5001 | u64 num_bytes = orig_bytes; | ||
5002 | int flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
5003 | int ret = 0; | 5135 | int ret = 0; |
5004 | bool flushing = false; | ||
5005 | 5136 | ||
5006 | again: | 5137 | ASSERT(orig_bytes); |
5007 | ret = 0; | 5138 | ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL); |
5008 | spin_lock(&space_info->lock); | ||
5009 | /* | ||
5010 | * We only want to wait if somebody other than us is flushing and we | ||
5011 | * are actually allowed to flush all things. | ||
5012 | */ | ||
5013 | while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing && | ||
5014 | space_info->flush) { | ||
5015 | spin_unlock(&space_info->lock); | ||
5016 | /* | ||
5017 | * If we have a trans handle we can't wait because the flusher | ||
5018 | * may have to commit the transaction, which would mean we would | ||
5019 | * deadlock since we are waiting for the flusher to finish, but | ||
5020 | * hold the current transaction open. | ||
5021 | */ | ||
5022 | if (current->journal_info) | ||
5023 | return -EAGAIN; | ||
5024 | ret = wait_event_killable(space_info->wait, !space_info->flush); | ||
5025 | /* Must have been killed, return */ | ||
5026 | if (ret) | ||
5027 | return -EINTR; | ||
5028 | |||
5029 | spin_lock(&space_info->lock); | ||
5030 | } | ||
5031 | 5139 | ||
5140 | spin_lock(&space_info->lock); | ||
5032 | ret = -ENOSPC; | 5141 | ret = -ENOSPC; |
5033 | used = space_info->bytes_used + space_info->bytes_reserved + | 5142 | used = space_info->bytes_used + space_info->bytes_reserved + |
5034 | space_info->bytes_pinned + space_info->bytes_readonly + | 5143 | space_info->bytes_pinned + space_info->bytes_readonly + |
5035 | space_info->bytes_may_use; | 5144 | space_info->bytes_may_use; |
5036 | 5145 | ||
5037 | /* | 5146 | /* |
5038 | * The idea here is that we've not already over-reserved the block group | 5147 | * If we have enough space then hooray, make our reservation and carry |
5039 | * then we can go ahead and save our reservation first and then start | 5148 | * on. If not see if we can overcommit, and if we can, hooray carry on. |
5040 | * flushing if we need to. Otherwise if we've already overcommitted | 5149 | * If not things get more complicated. |
5041 | * lets start flushing stuff first and then come back and try to make | ||
5042 | * our reservation. | ||
5043 | */ | 5150 | */ |
5044 | if (used <= space_info->total_bytes) { | 5151 | if (used + orig_bytes <= space_info->total_bytes) { |
5045 | if (used + orig_bytes <= space_info->total_bytes) { | 5152 | space_info->bytes_may_use += orig_bytes; |
5046 | space_info->bytes_may_use += orig_bytes; | 5153 | trace_btrfs_space_reservation(root->fs_info, "space_info", |
5047 | trace_btrfs_space_reservation(root->fs_info, | 5154 | space_info->flags, orig_bytes, |
5048 | "space_info", space_info->flags, orig_bytes, 1); | 5155 | 1); |
5049 | ret = 0; | 5156 | ret = 0; |
5050 | } else { | 5157 | } else if (can_overcommit(root, space_info, orig_bytes, flush)) { |
5051 | /* | ||
5052 | * Ok set num_bytes to orig_bytes since we aren't | ||
5053 | * overocmmitted, this way we only try and reclaim what | ||
5054 | * we need. | ||
5055 | */ | ||
5056 | num_bytes = orig_bytes; | ||
5057 | } | ||
5058 | } else { | ||
5059 | /* | ||
5060 | * Ok we're over committed, set num_bytes to the overcommitted | ||
5061 | * amount plus the amount of bytes that we need for this | ||
5062 | * reservation. | ||
5063 | */ | ||
5064 | num_bytes = used - space_info->total_bytes + | ||
5065 | (orig_bytes * 2); | ||
5066 | } | ||
5067 | |||
5068 | if (ret && can_overcommit(root, space_info, orig_bytes, flush)) { | ||
5069 | space_info->bytes_may_use += orig_bytes; | 5158 | space_info->bytes_may_use += orig_bytes; |
5070 | trace_btrfs_space_reservation(root->fs_info, "space_info", | 5159 | trace_btrfs_space_reservation(root->fs_info, "space_info", |
5071 | space_info->flags, orig_bytes, | 5160 | space_info->flags, orig_bytes, |
@@ -5074,16 +5163,31 @@ again: | |||
5074 | } | 5163 | } |
5075 | 5164 | ||
5076 | /* | 5165 | /* |
5077 | * Couldn't make our reservation, save our place so while we're trying | 5166 | * If we couldn't make a reservation then setup our reservation ticket |
5078 | * to reclaim space we can actually use it instead of somebody else | 5167 | * and kick the async worker if it's not already running. |
5079 | * stealing it from us. | ||
5080 | * | 5168 | * |
5081 | * We make the other tasks wait for the flush only when we can flush | 5169 | * If we are a priority flusher then we just need to add our ticket to |
5082 | * all things. | 5170 | * the list and we will do our own flushing further down. |
5083 | */ | 5171 | */ |
5084 | if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { | 5172 | if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { |
5085 | flushing = true; | 5173 | ticket.bytes = orig_bytes; |
5086 | space_info->flush = 1; | 5174 | ticket.error = 0; |
5175 | init_waitqueue_head(&ticket.wait); | ||
5176 | if (flush == BTRFS_RESERVE_FLUSH_ALL) { | ||
5177 | list_add_tail(&ticket.list, &space_info->tickets); | ||
5178 | if (!space_info->flush) { | ||
5179 | space_info->flush = 1; | ||
5180 | trace_btrfs_trigger_flush(root->fs_info, | ||
5181 | space_info->flags, | ||
5182 | orig_bytes, flush, | ||
5183 | "enospc"); | ||
5184 | queue_work(system_unbound_wq, | ||
5185 | &root->fs_info->async_reclaim_work); | ||
5186 | } | ||
5187 | } else { | ||
5188 | list_add_tail(&ticket.list, | ||
5189 | &space_info->priority_tickets); | ||
5190 | } | ||
5087 | } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { | 5191 | } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { |
5088 | used += orig_bytes; | 5192 | used += orig_bytes; |
5089 | /* | 5193 | /* |
@@ -5092,39 +5196,67 @@ again: | |||
5092 | * the async reclaim as we will panic. | 5196 | * the async reclaim as we will panic. |
5093 | */ | 5197 | */ |
5094 | if (!root->fs_info->log_root_recovering && | 5198 | if (!root->fs_info->log_root_recovering && |
5095 | need_do_async_reclaim(space_info, root->fs_info, used) && | 5199 | need_do_async_reclaim(space_info, root, used) && |
5096 | !work_busy(&root->fs_info->async_reclaim_work)) | 5200 | !work_busy(&root->fs_info->async_reclaim_work)) { |
5201 | trace_btrfs_trigger_flush(root->fs_info, | ||
5202 | space_info->flags, | ||
5203 | orig_bytes, flush, | ||
5204 | "preempt"); | ||
5097 | queue_work(system_unbound_wq, | 5205 | queue_work(system_unbound_wq, |
5098 | &root->fs_info->async_reclaim_work); | 5206 | &root->fs_info->async_reclaim_work); |
5207 | } | ||
5099 | } | 5208 | } |
5100 | spin_unlock(&space_info->lock); | 5209 | spin_unlock(&space_info->lock); |
5101 | |||
5102 | if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) | 5210 | if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) |
5103 | goto out; | 5211 | return ret; |
5104 | 5212 | ||
5105 | ret = flush_space(root, space_info, num_bytes, orig_bytes, | 5213 | if (flush == BTRFS_RESERVE_FLUSH_ALL) |
5106 | flush_state); | 5214 | return wait_reserve_ticket(root->fs_info, space_info, &ticket, |
5107 | flush_state++; | 5215 | orig_bytes); |
5108 | 5216 | ||
5109 | /* | 5217 | ret = 0; |
5110 | * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock | 5218 | priority_reclaim_metadata_space(root->fs_info, space_info, &ticket); |
5111 | * would happen. So skip delalloc flush. | 5219 | spin_lock(&space_info->lock); |
5112 | */ | 5220 | if (ticket.bytes) { |
5113 | if (flush == BTRFS_RESERVE_FLUSH_LIMIT && | 5221 | if (ticket.bytes < orig_bytes) { |
5114 | (flush_state == FLUSH_DELALLOC || | 5222 | u64 num_bytes = orig_bytes - ticket.bytes; |
5115 | flush_state == FLUSH_DELALLOC_WAIT)) | 5223 | space_info->bytes_may_use -= num_bytes; |
5116 | flush_state = ALLOC_CHUNK; | 5224 | trace_btrfs_space_reservation(root->fs_info, |
5225 | "space_info", space_info->flags, | ||
5226 | num_bytes, 0); | ||
5117 | 5227 | ||
5118 | if (!ret) | 5228 | } |
5119 | goto again; | 5229 | list_del_init(&ticket.list); |
5120 | else if (flush == BTRFS_RESERVE_FLUSH_LIMIT && | 5230 | ret = -ENOSPC; |
5121 | flush_state < COMMIT_TRANS) | 5231 | } |
5122 | goto again; | 5232 | spin_unlock(&space_info->lock); |
5123 | else if (flush == BTRFS_RESERVE_FLUSH_ALL && | 5233 | ASSERT(list_empty(&ticket.list)); |
5124 | flush_state <= COMMIT_TRANS) | 5234 | return ret; |
5125 | goto again; | 5235 | } |
5126 | 5236 | ||
5127 | out: | 5237 | /** |
5238 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | ||
5239 | * @root - the root we're allocating for | ||
5240 | * @block_rsv - the block_rsv we're allocating for | ||
5241 | * @orig_bytes - the number of bytes we want | ||
5242 | * @flush - whether or not we can flush to make our reservation | ||
5243 | * | ||
5244 | * This will reserve orgi_bytes number of bytes from the space info associated | ||
5245 | * with the block_rsv. If there is not enough space it will make an attempt to | ||
5246 | * flush out space to make room. It will do this by flushing delalloc if | ||
5247 | * possible or committing the transaction. If flush is 0 then no attempts to | ||
5248 | * regain reservations will be made and this will fail if there is not enough | ||
5249 | * space already. | ||
5250 | */ | ||
5251 | static int reserve_metadata_bytes(struct btrfs_root *root, | ||
5252 | struct btrfs_block_rsv *block_rsv, | ||
5253 | u64 orig_bytes, | ||
5254 | enum btrfs_reserve_flush_enum flush) | ||
5255 | { | ||
5256 | int ret; | ||
5257 | |||
5258 | ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes, | ||
5259 | flush); | ||
5128 | if (ret == -ENOSPC && | 5260 | if (ret == -ENOSPC && |
5129 | unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { | 5261 | unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { |
5130 | struct btrfs_block_rsv *global_rsv = | 5262 | struct btrfs_block_rsv *global_rsv = |
@@ -5137,13 +5269,8 @@ out: | |||
5137 | if (ret == -ENOSPC) | 5269 | if (ret == -ENOSPC) |
5138 | trace_btrfs_space_reservation(root->fs_info, | 5270 | trace_btrfs_space_reservation(root->fs_info, |
5139 | "space_info:enospc", | 5271 | "space_info:enospc", |
5140 | space_info->flags, orig_bytes, 1); | 5272 | block_rsv->space_info->flags, |
5141 | if (flushing) { | 5273 | orig_bytes, 1); |
5142 | spin_lock(&space_info->lock); | ||
5143 | space_info->flush = 0; | ||
5144 | wake_up_all(&space_info->wait); | ||
5145 | spin_unlock(&space_info->lock); | ||
5146 | } | ||
5147 | return ret; | 5274 | return ret; |
5148 | } | 5275 | } |
5149 | 5276 | ||
@@ -5219,6 +5346,108 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, | |||
5219 | return 0; | 5346 | return 0; |
5220 | } | 5347 | } |
5221 | 5348 | ||
5349 | /* | ||
5350 | * This is for space we already have accounted in space_info->bytes_may_use, so | ||
5351 | * basically when we're returning space from block_rsv's. | ||
5352 | */ | ||
5353 | static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info, | ||
5354 | struct btrfs_space_info *space_info, | ||
5355 | u64 num_bytes) | ||
5356 | { | ||
5357 | struct reserve_ticket *ticket; | ||
5358 | struct list_head *head; | ||
5359 | u64 used; | ||
5360 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH; | ||
5361 | bool check_overcommit = false; | ||
5362 | |||
5363 | spin_lock(&space_info->lock); | ||
5364 | head = &space_info->priority_tickets; | ||
5365 | |||
5366 | /* | ||
5367 | * If we are over our limit then we need to check and see if we can | ||
5368 | * overcommit, and if we can't then we just need to free up our space | ||
5369 | * and not satisfy any requests. | ||
5370 | */ | ||
5371 | used = space_info->bytes_used + space_info->bytes_reserved + | ||
5372 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
5373 | space_info->bytes_may_use; | ||
5374 | if (used - num_bytes >= space_info->total_bytes) | ||
5375 | check_overcommit = true; | ||
5376 | again: | ||
5377 | while (!list_empty(head) && num_bytes) { | ||
5378 | ticket = list_first_entry(head, struct reserve_ticket, | ||
5379 | list); | ||
5380 | /* | ||
5381 | * We use 0 bytes because this space is already reserved, so | ||
5382 | * adding the ticket space would be a double count. | ||
5383 | */ | ||
5384 | if (check_overcommit && | ||
5385 | !can_overcommit(fs_info->extent_root, space_info, 0, | ||
5386 | flush)) | ||
5387 | break; | ||
5388 | if (num_bytes >= ticket->bytes) { | ||
5389 | list_del_init(&ticket->list); | ||
5390 | num_bytes -= ticket->bytes; | ||
5391 | ticket->bytes = 0; | ||
5392 | wake_up(&ticket->wait); | ||
5393 | } else { | ||
5394 | ticket->bytes -= num_bytes; | ||
5395 | num_bytes = 0; | ||
5396 | } | ||
5397 | } | ||
5398 | |||
5399 | if (num_bytes && head == &space_info->priority_tickets) { | ||
5400 | head = &space_info->tickets; | ||
5401 | flush = BTRFS_RESERVE_FLUSH_ALL; | ||
5402 | goto again; | ||
5403 | } | ||
5404 | space_info->bytes_may_use -= num_bytes; | ||
5405 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5406 | space_info->flags, num_bytes, 0); | ||
5407 | spin_unlock(&space_info->lock); | ||
5408 | } | ||
5409 | |||
5410 | /* | ||
5411 | * This is for newly allocated space that isn't accounted in | ||
5412 | * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent | ||
5413 | * we use this helper. | ||
5414 | */ | ||
5415 | static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, | ||
5416 | struct btrfs_space_info *space_info, | ||
5417 | u64 num_bytes) | ||
5418 | { | ||
5419 | struct reserve_ticket *ticket; | ||
5420 | struct list_head *head = &space_info->priority_tickets; | ||
5421 | |||
5422 | again: | ||
5423 | while (!list_empty(head) && num_bytes) { | ||
5424 | ticket = list_first_entry(head, struct reserve_ticket, | ||
5425 | list); | ||
5426 | if (num_bytes >= ticket->bytes) { | ||
5427 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5428 | space_info->flags, | ||
5429 | ticket->bytes, 1); | ||
5430 | list_del_init(&ticket->list); | ||
5431 | num_bytes -= ticket->bytes; | ||
5432 | space_info->bytes_may_use += ticket->bytes; | ||
5433 | ticket->bytes = 0; | ||
5434 | wake_up(&ticket->wait); | ||
5435 | } else { | ||
5436 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5437 | space_info->flags, | ||
5438 | num_bytes, 1); | ||
5439 | space_info->bytes_may_use += num_bytes; | ||
5440 | ticket->bytes -= num_bytes; | ||
5441 | num_bytes = 0; | ||
5442 | } | ||
5443 | } | ||
5444 | |||
5445 | if (num_bytes && head == &space_info->priority_tickets) { | ||
5446 | head = &space_info->tickets; | ||
5447 | goto again; | ||
5448 | } | ||
5449 | } | ||
5450 | |||
5222 | static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, | 5451 | static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, |
5223 | struct btrfs_block_rsv *block_rsv, | 5452 | struct btrfs_block_rsv *block_rsv, |
5224 | struct btrfs_block_rsv *dest, u64 num_bytes) | 5453 | struct btrfs_block_rsv *dest, u64 num_bytes) |
@@ -5253,18 +5482,15 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, | |||
5253 | } | 5482 | } |
5254 | spin_unlock(&dest->lock); | 5483 | spin_unlock(&dest->lock); |
5255 | } | 5484 | } |
5256 | if (num_bytes) { | 5485 | if (num_bytes) |
5257 | spin_lock(&space_info->lock); | 5486 | space_info_add_old_bytes(fs_info, space_info, |
5258 | space_info->bytes_may_use -= num_bytes; | 5487 | num_bytes); |
5259 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5260 | space_info->flags, num_bytes, 0); | ||
5261 | spin_unlock(&space_info->lock); | ||
5262 | } | ||
5263 | } | 5488 | } |
5264 | } | 5489 | } |
5265 | 5490 | ||
5266 | static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, | 5491 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src, |
5267 | struct btrfs_block_rsv *dst, u64 num_bytes) | 5492 | struct btrfs_block_rsv *dst, u64 num_bytes, |
5493 | int update_size) | ||
5268 | { | 5494 | { |
5269 | int ret; | 5495 | int ret; |
5270 | 5496 | ||
@@ -5272,7 +5498,7 @@ static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, | |||
5272 | if (ret) | 5498 | if (ret) |
5273 | return ret; | 5499 | return ret; |
5274 | 5500 | ||
5275 | block_rsv_add_bytes(dst, num_bytes, 1); | 5501 | block_rsv_add_bytes(dst, num_bytes, update_size); |
5276 | return 0; | 5502 | return 0; |
5277 | } | 5503 | } |
5278 | 5504 | ||
@@ -5379,13 +5605,6 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, | |||
5379 | return ret; | 5605 | return ret; |
5380 | } | 5606 | } |
5381 | 5607 | ||
5382 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | ||
5383 | struct btrfs_block_rsv *dst_rsv, | ||
5384 | u64 num_bytes) | ||
5385 | { | ||
5386 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
5387 | } | ||
5388 | |||
5389 | void btrfs_block_rsv_release(struct btrfs_root *root, | 5608 | void btrfs_block_rsv_release(struct btrfs_root *root, |
5390 | struct btrfs_block_rsv *block_rsv, | 5609 | struct btrfs_block_rsv *block_rsv, |
5391 | u64 num_bytes) | 5610 | u64 num_bytes) |
@@ -5398,48 +5617,21 @@ void btrfs_block_rsv_release(struct btrfs_root *root, | |||
5398 | num_bytes); | 5617 | num_bytes); |
5399 | } | 5618 | } |
5400 | 5619 | ||
5401 | /* | ||
5402 | * helper to calculate size of global block reservation. | ||
5403 | * the desired value is sum of space used by extent tree, | ||
5404 | * checksum tree and root tree | ||
5405 | */ | ||
5406 | static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | ||
5407 | { | ||
5408 | struct btrfs_space_info *sinfo; | ||
5409 | u64 num_bytes; | ||
5410 | u64 meta_used; | ||
5411 | u64 data_used; | ||
5412 | int csum_size = btrfs_super_csum_size(fs_info->super_copy); | ||
5413 | |||
5414 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); | ||
5415 | spin_lock(&sinfo->lock); | ||
5416 | data_used = sinfo->bytes_used; | ||
5417 | spin_unlock(&sinfo->lock); | ||
5418 | |||
5419 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
5420 | spin_lock(&sinfo->lock); | ||
5421 | if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) | ||
5422 | data_used = 0; | ||
5423 | meta_used = sinfo->bytes_used; | ||
5424 | spin_unlock(&sinfo->lock); | ||
5425 | |||
5426 | num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * | ||
5427 | csum_size * 2; | ||
5428 | num_bytes += div_u64(data_used + meta_used, 50); | ||
5429 | |||
5430 | if (num_bytes * 3 > meta_used) | ||
5431 | num_bytes = div_u64(meta_used, 3); | ||
5432 | |||
5433 | return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10); | ||
5434 | } | ||
5435 | |||
5436 | static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | 5620 | static void update_global_block_rsv(struct btrfs_fs_info *fs_info) |
5437 | { | 5621 | { |
5438 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | 5622 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; |
5439 | struct btrfs_space_info *sinfo = block_rsv->space_info; | 5623 | struct btrfs_space_info *sinfo = block_rsv->space_info; |
5440 | u64 num_bytes; | 5624 | u64 num_bytes; |
5441 | 5625 | ||
5442 | num_bytes = calc_global_metadata_size(fs_info); | 5626 | /* |
5627 | * The global block rsv is based on the size of the extent tree, the | ||
5628 | * checksum tree and the root tree. If the fs is empty we want to set | ||
5629 | * it to a minimal amount for safety. | ||
5630 | */ | ||
5631 | num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) + | ||
5632 | btrfs_root_used(&fs_info->csum_root->root_item) + | ||
5633 | btrfs_root_used(&fs_info->tree_root->root_item); | ||
5634 | num_bytes = max_t(u64, num_bytes, SZ_16M); | ||
5443 | 5635 | ||
5444 | spin_lock(&sinfo->lock); | 5636 | spin_lock(&sinfo->lock); |
5445 | spin_lock(&block_rsv->lock); | 5637 | spin_lock(&block_rsv->lock); |
@@ -5554,7 +5746,13 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | |||
5554 | struct inode *inode) | 5746 | struct inode *inode) |
5555 | { | 5747 | { |
5556 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5748 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5557 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | 5749 | /* |
5750 | * We always use trans->block_rsv here as we will have reserved space | ||
5751 | * for our orphan when starting the transaction, using get_block_rsv() | ||
5752 | * here will sometimes make us choose the wrong block rsv as we could be | ||
5753 | * doing a reloc inode for a non refcounted root. | ||
5754 | */ | ||
5755 | struct btrfs_block_rsv *src_rsv = trans->block_rsv; | ||
5558 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; | 5756 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; |
5559 | 5757 | ||
5560 | /* | 5758 | /* |
@@ -5565,7 +5763,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | |||
5565 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); | 5763 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
5566 | trace_btrfs_space_reservation(root->fs_info, "orphan", | 5764 | trace_btrfs_space_reservation(root->fs_info, "orphan", |
5567 | btrfs_ino(inode), num_bytes, 1); | 5765 | btrfs_ino(inode), num_bytes, 1); |
5568 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 5766 | return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); |
5569 | } | 5767 | } |
5570 | 5768 | ||
5571 | void btrfs_orphan_release_metadata(struct inode *inode) | 5769 | void btrfs_orphan_release_metadata(struct inode *inode) |
@@ -5620,7 +5818,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, | |||
5620 | BTRFS_RESERVE_FLUSH_ALL); | 5818 | BTRFS_RESERVE_FLUSH_ALL); |
5621 | 5819 | ||
5622 | if (ret == -ENOSPC && use_global_rsv) | 5820 | if (ret == -ENOSPC && use_global_rsv) |
5623 | ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes); | 5821 | ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1); |
5624 | 5822 | ||
5625 | if (ret && *qgroup_reserved) | 5823 | if (ret && *qgroup_reserved) |
5626 | btrfs_qgroup_free_meta(root, *qgroup_reserved); | 5824 | btrfs_qgroup_free_meta(root, *qgroup_reserved); |
@@ -5730,21 +5928,26 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
5730 | u64 to_reserve = 0; | 5928 | u64 to_reserve = 0; |
5731 | u64 csum_bytes; | 5929 | u64 csum_bytes; |
5732 | unsigned nr_extents = 0; | 5930 | unsigned nr_extents = 0; |
5733 | int extra_reserve = 0; | ||
5734 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; | 5931 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; |
5735 | int ret = 0; | 5932 | int ret = 0; |
5736 | bool delalloc_lock = true; | 5933 | bool delalloc_lock = true; |
5737 | u64 to_free = 0; | 5934 | u64 to_free = 0; |
5738 | unsigned dropped; | 5935 | unsigned dropped; |
5936 | bool release_extra = false; | ||
5739 | 5937 | ||
5740 | /* If we are a free space inode we need to not flush since we will be in | 5938 | /* If we are a free space inode we need to not flush since we will be in |
5741 | * the middle of a transaction commit. We also don't need the delalloc | 5939 | * the middle of a transaction commit. We also don't need the delalloc |
5742 | * mutex since we won't race with anybody. We need this mostly to make | 5940 | * mutex since we won't race with anybody. We need this mostly to make |
5743 | * lockdep shut its filthy mouth. | 5941 | * lockdep shut its filthy mouth. |
5942 | * | ||
5943 | * If we have a transaction open (can happen if we call truncate_block | ||
5944 | * from truncate), then we need FLUSH_LIMIT so we don't deadlock. | ||
5744 | */ | 5945 | */ |
5745 | if (btrfs_is_free_space_inode(inode)) { | 5946 | if (btrfs_is_free_space_inode(inode)) { |
5746 | flush = BTRFS_RESERVE_NO_FLUSH; | 5947 | flush = BTRFS_RESERVE_NO_FLUSH; |
5747 | delalloc_lock = false; | 5948 | delalloc_lock = false; |
5949 | } else if (current->journal_info) { | ||
5950 | flush = BTRFS_RESERVE_FLUSH_LIMIT; | ||
5748 | } | 5951 | } |
5749 | 5952 | ||
5750 | if (flush != BTRFS_RESERVE_NO_FLUSH && | 5953 | if (flush != BTRFS_RESERVE_NO_FLUSH && |
@@ -5761,24 +5964,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
5761 | BTRFS_MAX_EXTENT_SIZE - 1, | 5964 | BTRFS_MAX_EXTENT_SIZE - 1, |
5762 | BTRFS_MAX_EXTENT_SIZE); | 5965 | BTRFS_MAX_EXTENT_SIZE); |
5763 | BTRFS_I(inode)->outstanding_extents += nr_extents; | 5966 | BTRFS_I(inode)->outstanding_extents += nr_extents; |
5764 | nr_extents = 0; | ||
5765 | 5967 | ||
5968 | nr_extents = 0; | ||
5766 | if (BTRFS_I(inode)->outstanding_extents > | 5969 | if (BTRFS_I(inode)->outstanding_extents > |
5767 | BTRFS_I(inode)->reserved_extents) | 5970 | BTRFS_I(inode)->reserved_extents) |
5768 | nr_extents = BTRFS_I(inode)->outstanding_extents - | 5971 | nr_extents += BTRFS_I(inode)->outstanding_extents - |
5769 | BTRFS_I(inode)->reserved_extents; | 5972 | BTRFS_I(inode)->reserved_extents; |
5770 | 5973 | ||
5771 | /* | 5974 | /* We always want to reserve a slot for updating the inode. */ |
5772 | * Add an item to reserve for updating the inode when we complete the | 5975 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents + 1); |
5773 | * delalloc io. | ||
5774 | */ | ||
5775 | if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED, | ||
5776 | &BTRFS_I(inode)->runtime_flags)) { | ||
5777 | nr_extents++; | ||
5778 | extra_reserve = 1; | ||
5779 | } | ||
5780 | |||
5781 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | ||
5782 | to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); | 5976 | to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); |
5783 | csum_bytes = BTRFS_I(inode)->csum_bytes; | 5977 | csum_bytes = BTRFS_I(inode)->csum_bytes; |
5784 | spin_unlock(&BTRFS_I(inode)->lock); | 5978 | spin_unlock(&BTRFS_I(inode)->lock); |
@@ -5790,17 +5984,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
5790 | goto out_fail; | 5984 | goto out_fail; |
5791 | } | 5985 | } |
5792 | 5986 | ||
5793 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); | 5987 | ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush); |
5794 | if (unlikely(ret)) { | 5988 | if (unlikely(ret)) { |
5795 | btrfs_qgroup_free_meta(root, nr_extents * root->nodesize); | 5989 | btrfs_qgroup_free_meta(root, nr_extents * root->nodesize); |
5796 | goto out_fail; | 5990 | goto out_fail; |
5797 | } | 5991 | } |
5798 | 5992 | ||
5799 | spin_lock(&BTRFS_I(inode)->lock); | 5993 | spin_lock(&BTRFS_I(inode)->lock); |
5800 | if (extra_reserve) { | 5994 | if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
5801 | set_bit(BTRFS_INODE_DELALLOC_META_RESERVED, | 5995 | &BTRFS_I(inode)->runtime_flags)) { |
5802 | &BTRFS_I(inode)->runtime_flags); | 5996 | to_reserve -= btrfs_calc_trans_metadata_size(root, 1); |
5803 | nr_extents--; | 5997 | release_extra = true; |
5804 | } | 5998 | } |
5805 | BTRFS_I(inode)->reserved_extents += nr_extents; | 5999 | BTRFS_I(inode)->reserved_extents += nr_extents; |
5806 | spin_unlock(&BTRFS_I(inode)->lock); | 6000 | spin_unlock(&BTRFS_I(inode)->lock); |
@@ -5811,8 +6005,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
5811 | if (to_reserve) | 6005 | if (to_reserve) |
5812 | trace_btrfs_space_reservation(root->fs_info, "delalloc", | 6006 | trace_btrfs_space_reservation(root->fs_info, "delalloc", |
5813 | btrfs_ino(inode), to_reserve, 1); | 6007 | btrfs_ino(inode), to_reserve, 1); |
5814 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 6008 | if (release_extra) |
5815 | 6009 | btrfs_block_rsv_release(root, block_rsv, | |
6010 | btrfs_calc_trans_metadata_size(root, | ||
6011 | 1)); | ||
5816 | return 0; | 6012 | return 0; |
5817 | 6013 | ||
5818 | out_fail: | 6014 | out_fail: |
@@ -6044,6 +6240,9 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
6044 | spin_unlock(&cache->lock); | 6240 | spin_unlock(&cache->lock); |
6045 | spin_unlock(&cache->space_info->lock); | 6241 | spin_unlock(&cache->space_info->lock); |
6046 | 6242 | ||
6243 | trace_btrfs_space_reservation(root->fs_info, "pinned", | ||
6244 | cache->space_info->flags, | ||
6245 | num_bytes, 1); | ||
6047 | set_extent_dirty(info->pinned_extents, | 6246 | set_extent_dirty(info->pinned_extents, |
6048 | bytenr, bytenr + num_bytes - 1, | 6247 | bytenr, bytenr + num_bytes - 1, |
6049 | GFP_NOFS | __GFP_NOFAIL); | 6248 | GFP_NOFS | __GFP_NOFAIL); |
@@ -6118,10 +6317,10 @@ static int pin_down_extent(struct btrfs_root *root, | |||
6118 | spin_unlock(&cache->lock); | 6317 | spin_unlock(&cache->lock); |
6119 | spin_unlock(&cache->space_info->lock); | 6318 | spin_unlock(&cache->space_info->lock); |
6120 | 6319 | ||
6320 | trace_btrfs_space_reservation(root->fs_info, "pinned", | ||
6321 | cache->space_info->flags, num_bytes, 1); | ||
6121 | set_extent_dirty(root->fs_info->pinned_extents, bytenr, | 6322 | set_extent_dirty(root->fs_info->pinned_extents, bytenr, |
6122 | bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); | 6323 | bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); |
6123 | if (reserved) | ||
6124 | trace_btrfs_reserved_extent_free(root, bytenr, num_bytes); | ||
6125 | return 0; | 6324 | return 0; |
6126 | } | 6325 | } |
6127 | 6326 | ||
@@ -6476,6 +6675,9 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, | |||
6476 | spin_lock(&cache->lock); | 6675 | spin_lock(&cache->lock); |
6477 | cache->pinned -= len; | 6676 | cache->pinned -= len; |
6478 | space_info->bytes_pinned -= len; | 6677 | space_info->bytes_pinned -= len; |
6678 | |||
6679 | trace_btrfs_space_reservation(fs_info, "pinned", | ||
6680 | space_info->flags, len, 0); | ||
6479 | space_info->max_extent_size = 0; | 6681 | space_info->max_extent_size = 0; |
6480 | percpu_counter_add(&space_info->total_bytes_pinned, -len); | 6682 | percpu_counter_add(&space_info->total_bytes_pinned, -len); |
6481 | if (cache->ro) { | 6683 | if (cache->ro) { |
@@ -6483,17 +6685,29 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, | |||
6483 | readonly = true; | 6685 | readonly = true; |
6484 | } | 6686 | } |
6485 | spin_unlock(&cache->lock); | 6687 | spin_unlock(&cache->lock); |
6486 | if (!readonly && global_rsv->space_info == space_info) { | 6688 | if (!readonly && return_free_space && |
6689 | global_rsv->space_info == space_info) { | ||
6690 | u64 to_add = len; | ||
6691 | WARN_ON(!return_free_space); | ||
6487 | spin_lock(&global_rsv->lock); | 6692 | spin_lock(&global_rsv->lock); |
6488 | if (!global_rsv->full) { | 6693 | if (!global_rsv->full) { |
6489 | len = min(len, global_rsv->size - | 6694 | to_add = min(len, global_rsv->size - |
6490 | global_rsv->reserved); | 6695 | global_rsv->reserved); |
6491 | global_rsv->reserved += len; | 6696 | global_rsv->reserved += to_add; |
6492 | space_info->bytes_may_use += len; | 6697 | space_info->bytes_may_use += to_add; |
6493 | if (global_rsv->reserved >= global_rsv->size) | 6698 | if (global_rsv->reserved >= global_rsv->size) |
6494 | global_rsv->full = 1; | 6699 | global_rsv->full = 1; |
6700 | trace_btrfs_space_reservation(fs_info, | ||
6701 | "space_info", | ||
6702 | space_info->flags, | ||
6703 | to_add, 1); | ||
6704 | len -= to_add; | ||
6495 | } | 6705 | } |
6496 | spin_unlock(&global_rsv->lock); | 6706 | spin_unlock(&global_rsv->lock); |
6707 | /* Add to any tickets we may have */ | ||
6708 | if (len) | ||
6709 | space_info_add_new_bytes(fs_info, space_info, | ||
6710 | len); | ||
6497 | } | 6711 | } |
6498 | spin_unlock(&space_info->lock); | 6712 | spin_unlock(&space_info->lock); |
6499 | } | 6713 | } |
@@ -7782,12 +7996,10 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, | |||
7782 | ret = btrfs_discard_extent(root, start, len, NULL); | 7996 | ret = btrfs_discard_extent(root, start, len, NULL); |
7783 | btrfs_add_free_space(cache, start, len); | 7997 | btrfs_add_free_space(cache, start, len); |
7784 | btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); | 7998 | btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); |
7999 | trace_btrfs_reserved_extent_free(root, start, len); | ||
7785 | } | 8000 | } |
7786 | 8001 | ||
7787 | btrfs_put_block_group(cache); | 8002 | btrfs_put_block_group(cache); |
7788 | |||
7789 | trace_btrfs_reserved_extent_free(root, start, len); | ||
7790 | |||
7791 | return ret; | 8003 | return ret; |
7792 | } | 8004 | } |
7793 | 8005 | ||
@@ -9791,13 +10003,15 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
9791 | space_info = list_entry(info->space_info.next, | 10003 | space_info = list_entry(info->space_info.next, |
9792 | struct btrfs_space_info, | 10004 | struct btrfs_space_info, |
9793 | list); | 10005 | list); |
9794 | if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) { | 10006 | |
9795 | if (WARN_ON(space_info->bytes_pinned > 0 || | 10007 | /* |
10008 | * Do not hide this behind enospc_debug, this is actually | ||
10009 | * important and indicates a real bug if this happens. | ||
10010 | */ | ||
10011 | if (WARN_ON(space_info->bytes_pinned > 0 || | ||
9796 | space_info->bytes_reserved > 0 || | 10012 | space_info->bytes_reserved > 0 || |
9797 | space_info->bytes_may_use > 0)) { | 10013 | space_info->bytes_may_use > 0)) |
9798 | dump_space_info(space_info, 0, 0); | 10014 | dump_space_info(space_info, 0, 0); |
9799 | } | ||
9800 | } | ||
9801 | list_del(&space_info->list); | 10015 | list_del(&space_info->list); |
9802 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { | 10016 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { |
9803 | struct kobject *kobj; | 10017 | struct kobject *kobj; |
@@ -10005,9 +10219,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
10005 | goto error; | 10219 | goto error; |
10006 | } | 10220 | } |
10007 | 10221 | ||
10222 | trace_btrfs_add_block_group(root->fs_info, cache, 0); | ||
10008 | ret = update_space_info(info, cache->flags, found_key.offset, | 10223 | ret = update_space_info(info, cache->flags, found_key.offset, |
10009 | btrfs_block_group_used(&cache->item), | 10224 | btrfs_block_group_used(&cache->item), |
10010 | &space_info); | 10225 | cache->bytes_super, &space_info); |
10011 | if (ret) { | 10226 | if (ret) { |
10012 | btrfs_remove_free_space_cache(cache); | 10227 | btrfs_remove_free_space_cache(cache); |
10013 | spin_lock(&info->block_group_cache_lock); | 10228 | spin_lock(&info->block_group_cache_lock); |
@@ -10020,9 +10235,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
10020 | } | 10235 | } |
10021 | 10236 | ||
10022 | cache->space_info = space_info; | 10237 | cache->space_info = space_info; |
10023 | spin_lock(&cache->space_info->lock); | ||
10024 | cache->space_info->bytes_readonly += cache->bytes_super; | ||
10025 | spin_unlock(&cache->space_info->lock); | ||
10026 | 10238 | ||
10027 | __link_block_group(space_info, cache); | 10239 | __link_block_group(space_info, cache); |
10028 | 10240 | ||
@@ -10114,7 +10326,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
10114 | int ret; | 10326 | int ret; |
10115 | struct btrfs_root *extent_root; | 10327 | struct btrfs_root *extent_root; |
10116 | struct btrfs_block_group_cache *cache; | 10328 | struct btrfs_block_group_cache *cache; |
10117 | |||
10118 | extent_root = root->fs_info->extent_root; | 10329 | extent_root = root->fs_info->extent_root; |
10119 | 10330 | ||
10120 | btrfs_set_log_full_commit(root->fs_info, trans); | 10331 | btrfs_set_log_full_commit(root->fs_info, trans); |
@@ -10160,7 +10371,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
10160 | * assigned to our block group, but don't update its counters just yet. | 10371 | * assigned to our block group, but don't update its counters just yet. |
10161 | * We want our bg to be added to the rbtree with its ->space_info set. | 10372 | * We want our bg to be added to the rbtree with its ->space_info set. |
10162 | */ | 10373 | */ |
10163 | ret = update_space_info(root->fs_info, cache->flags, 0, 0, | 10374 | ret = update_space_info(root->fs_info, cache->flags, 0, 0, 0, |
10164 | &cache->space_info); | 10375 | &cache->space_info); |
10165 | if (ret) { | 10376 | if (ret) { |
10166 | btrfs_remove_free_space_cache(cache); | 10377 | btrfs_remove_free_space_cache(cache); |
@@ -10179,8 +10390,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
10179 | * Now that our block group has its ->space_info set and is inserted in | 10390 | * Now that our block group has its ->space_info set and is inserted in |
10180 | * the rbtree, update the space info's counters. | 10391 | * the rbtree, update the space info's counters. |
10181 | */ | 10392 | */ |
10393 | trace_btrfs_add_block_group(root->fs_info, cache, 1); | ||
10182 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, | 10394 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, |
10183 | &cache->space_info); | 10395 | cache->bytes_super, &cache->space_info); |
10184 | if (ret) { | 10396 | if (ret) { |
10185 | btrfs_remove_free_space_cache(cache); | 10397 | btrfs_remove_free_space_cache(cache); |
10186 | spin_lock(&root->fs_info->block_group_cache_lock); | 10398 | spin_lock(&root->fs_info->block_group_cache_lock); |
@@ -10193,16 +10405,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
10193 | } | 10405 | } |
10194 | update_global_block_rsv(root->fs_info); | 10406 | update_global_block_rsv(root->fs_info); |
10195 | 10407 | ||
10196 | spin_lock(&cache->space_info->lock); | ||
10197 | cache->space_info->bytes_readonly += cache->bytes_super; | ||
10198 | spin_unlock(&cache->space_info->lock); | ||
10199 | |||
10200 | __link_block_group(cache->space_info, cache); | 10408 | __link_block_group(cache->space_info, cache); |
10201 | 10409 | ||
10202 | list_add_tail(&cache->bg_list, &trans->new_bgs); | 10410 | list_add_tail(&cache->bg_list, &trans->new_bgs); |
10203 | 10411 | ||
10204 | set_avail_alloc_bits(extent_root->fs_info, type); | 10412 | set_avail_alloc_bits(extent_root->fs_info, type); |
10205 | |||
10206 | return 0; | 10413 | return 0; |
10207 | } | 10414 | } |
10208 | 10415 | ||
@@ -10747,21 +10954,21 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info) | |||
10747 | mixed = 1; | 10954 | mixed = 1; |
10748 | 10955 | ||
10749 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | 10956 | flags = BTRFS_BLOCK_GROUP_SYSTEM; |
10750 | ret = update_space_info(fs_info, flags, 0, 0, &space_info); | 10957 | ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info); |
10751 | if (ret) | 10958 | if (ret) |
10752 | goto out; | 10959 | goto out; |
10753 | 10960 | ||
10754 | if (mixed) { | 10961 | if (mixed) { |
10755 | flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; | 10962 | flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; |
10756 | ret = update_space_info(fs_info, flags, 0, 0, &space_info); | 10963 | ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info); |
10757 | } else { | 10964 | } else { |
10758 | flags = BTRFS_BLOCK_GROUP_METADATA; | 10965 | flags = BTRFS_BLOCK_GROUP_METADATA; |
10759 | ret = update_space_info(fs_info, flags, 0, 0, &space_info); | 10966 | ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info); |
10760 | if (ret) | 10967 | if (ret) |
10761 | goto out; | 10968 | goto out; |
10762 | 10969 | ||
10763 | flags = BTRFS_BLOCK_GROUP_DATA; | 10970 | flags = BTRFS_BLOCK_GROUP_DATA; |
10764 | ret = update_space_info(fs_info, flags, 0, 0, &space_info); | 10971 | ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info); |
10765 | } | 10972 | } |
10766 | out: | 10973 | out: |
10767 | return ret; | 10974 | return ret; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2234e88cf674..bcfb4a27ddd4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1629,13 +1629,11 @@ again: | |||
1629 | * managed to copy. | 1629 | * managed to copy. |
1630 | */ | 1630 | */ |
1631 | if (num_sectors > dirty_sectors) { | 1631 | if (num_sectors > dirty_sectors) { |
1632 | /* | 1632 | |
1633 | * we round down because we don't want to count | 1633 | /* release everything except the sectors we dirtied */ |
1634 | * any partial blocks actually sent through the | 1634 | release_bytes -= dirty_sectors << |
1635 | * IO machines | 1635 | root->fs_info->sb->s_blocksize_bits; |
1636 | */ | 1636 | |
1637 | release_bytes = round_down(release_bytes - copied, | ||
1638 | root->sectorsize); | ||
1639 | if (copied > 0) { | 1637 | if (copied > 0) { |
1640 | spin_lock(&BTRFS_I(inode)->lock); | 1638 | spin_lock(&BTRFS_I(inode)->lock); |
1641 | BTRFS_I(inode)->outstanding_extents++; | 1639 | BTRFS_I(inode)->outstanding_extents++; |
@@ -2479,7 +2477,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2479 | } | 2477 | } |
2480 | 2478 | ||
2481 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, | 2479 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, |
2482 | min_size); | 2480 | min_size, 0); |
2483 | BUG_ON(ret); | 2481 | BUG_ON(ret); |
2484 | trans->block_rsv = rsv; | 2482 | trans->block_rsv = rsv; |
2485 | 2483 | ||
@@ -2522,7 +2520,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2522 | } | 2520 | } |
2523 | 2521 | ||
2524 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, | 2522 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, |
2525 | rsv, min_size); | 2523 | rsv, min_size, 0); |
2526 | BUG_ON(ret); /* shouldn't happen */ | 2524 | BUG_ON(ret); /* shouldn't happen */ |
2527 | trans->block_rsv = rsv; | 2525 | trans->block_rsv = rsv; |
2528 | 2526 | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index df731c0ebec7..8078077d1090 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -5263,7 +5263,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
5263 | if (steal_from_global) { | 5263 | if (steal_from_global) { |
5264 | if (!btrfs_check_space_for_delayed_refs(trans, root)) | 5264 | if (!btrfs_check_space_for_delayed_refs(trans, root)) |
5265 | ret = btrfs_block_rsv_migrate(global_rsv, rsv, | 5265 | ret = btrfs_block_rsv_migrate(global_rsv, rsv, |
5266 | min_size); | 5266 | min_size, 0); |
5267 | else | 5267 | else |
5268 | ret = -ENOSPC; | 5268 | ret = -ENOSPC; |
5269 | } | 5269 | } |
@@ -9116,7 +9116,7 @@ static int btrfs_truncate(struct inode *inode) | |||
9116 | 9116 | ||
9117 | /* Migrate the slack space for the truncate to our reserve */ | 9117 | /* Migrate the slack space for the truncate to our reserve */ |
9118 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, | 9118 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, |
9119 | min_size); | 9119 | min_size, 0); |
9120 | BUG_ON(ret); | 9120 | BUG_ON(ret); |
9121 | 9121 | ||
9122 | /* | 9122 | /* |
@@ -9156,7 +9156,7 @@ static int btrfs_truncate(struct inode *inode) | |||
9156 | } | 9156 | } |
9157 | 9157 | ||
9158 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, | 9158 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, |
9159 | rsv, min_size); | 9159 | rsv, min_size, 0); |
9160 | BUG_ON(ret); /* shouldn't happen */ | 9160 | BUG_ON(ret); /* shouldn't happen */ |
9161 | trans->block_rsv = rsv; | 9161 | trans->block_rsv = rsv; |
9162 | } | 9162 | } |
@@ -9177,7 +9177,6 @@ static int btrfs_truncate(struct inode *inode) | |||
9177 | ret = btrfs_end_transaction(trans, root); | 9177 | ret = btrfs_end_transaction(trans, root); |
9178 | btrfs_btree_balance_dirty(root); | 9178 | btrfs_btree_balance_dirty(root); |
9179 | } | 9179 | } |
9180 | |||
9181 | out: | 9180 | out: |
9182 | btrfs_free_block_rsv(root, rsv); | 9181 | btrfs_free_block_rsv(root, rsv); |
9183 | 9182 | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 0477dca154ed..fc067b07e31f 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -2604,25 +2604,28 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans, | |||
2604 | 2604 | ||
2605 | trans->block_rsv = rc->block_rsv; | 2605 | trans->block_rsv = rc->block_rsv; |
2606 | rc->reserved_bytes += num_bytes; | 2606 | rc->reserved_bytes += num_bytes; |
2607 | |||
2608 | /* | ||
2609 | * We are under a transaction here so we can only do limited flushing. | ||
2610 | * If we get an enospc just kick back -EAGAIN so we know to drop the | ||
2611 | * transaction and try to refill when we can flush all the things. | ||
2612 | */ | ||
2607 | ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes, | 2613 | ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes, |
2608 | BTRFS_RESERVE_FLUSH_ALL); | 2614 | BTRFS_RESERVE_FLUSH_LIMIT); |
2609 | if (ret) { | 2615 | if (ret) { |
2610 | if (ret == -EAGAIN) { | 2616 | tmp = rc->extent_root->nodesize * RELOCATION_RESERVED_NODES; |
2611 | tmp = rc->extent_root->nodesize * | 2617 | while (tmp <= rc->reserved_bytes) |
2612 | RELOCATION_RESERVED_NODES; | 2618 | tmp <<= 1; |
2613 | while (tmp <= rc->reserved_bytes) | 2619 | /* |
2614 | tmp <<= 1; | 2620 | * only one thread can access block_rsv at this point, |
2615 | /* | 2621 | * so we don't need hold lock to protect block_rsv. |
2616 | * only one thread can access block_rsv at this point, | 2622 | * we expand more reservation size here to allow enough |
2617 | * so we don't need hold lock to protect block_rsv. | 2623 | * space for relocation and we will return eailer in |
2618 | * we expand more reservation size here to allow enough | 2624 | * enospc case. |
2619 | * space for relocation and we will return earlier in | 2625 | */ |
2620 | * enospc case. | 2626 | rc->block_rsv->size = tmp + rc->extent_root->nodesize * |
2621 | */ | 2627 | RELOCATION_RESERVED_NODES; |
2622 | rc->block_rsv->size = tmp + rc->extent_root->nodesize * | 2628 | return -EAGAIN; |
2623 | RELOCATION_RESERVED_NODES; | ||
2624 | } | ||
2625 | return ret; | ||
2626 | } | 2629 | } |
2627 | 2630 | ||
2628 | return 0; | 2631 | return 0; |
@@ -3871,6 +3874,7 @@ static noinline_for_stack | |||
3871 | int prepare_to_relocate(struct reloc_control *rc) | 3874 | int prepare_to_relocate(struct reloc_control *rc) |
3872 | { | 3875 | { |
3873 | struct btrfs_trans_handle *trans; | 3876 | struct btrfs_trans_handle *trans; |
3877 | int ret; | ||
3874 | 3878 | ||
3875 | rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, | 3879 | rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, |
3876 | BTRFS_BLOCK_RSV_TEMP); | 3880 | BTRFS_BLOCK_RSV_TEMP); |
@@ -3885,6 +3889,11 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
3885 | rc->reserved_bytes = 0; | 3889 | rc->reserved_bytes = 0; |
3886 | rc->block_rsv->size = rc->extent_root->nodesize * | 3890 | rc->block_rsv->size = rc->extent_root->nodesize * |
3887 | RELOCATION_RESERVED_NODES; | 3891 | RELOCATION_RESERVED_NODES; |
3892 | ret = btrfs_block_rsv_refill(rc->extent_root, | ||
3893 | rc->block_rsv, rc->block_rsv->size, | ||
3894 | BTRFS_RESERVE_FLUSH_ALL); | ||
3895 | if (ret) | ||
3896 | return ret; | ||
3888 | 3897 | ||
3889 | rc->create_reloc_tree = 1; | 3898 | rc->create_reloc_tree = 1; |
3890 | set_reloc_control(rc); | 3899 | set_reloc_control(rc); |
@@ -4643,7 +4652,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | |||
4643 | if (rc->merge_reloc_tree) { | 4652 | if (rc->merge_reloc_tree) { |
4644 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | 4653 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, |
4645 | rc->block_rsv, | 4654 | rc->block_rsv, |
4646 | rc->nodes_relocated); | 4655 | rc->nodes_relocated, 1); |
4647 | if (ret) | 4656 | if (ret) |
4648 | return ret; | 4657 | return ret; |
4649 | } | 4658 | } |