diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/ctree.h | 2 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 529 |
2 files changed, 380 insertions, 151 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fe16474fabf3..2e04c9d6f21d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -439,6 +439,8 @@ struct btrfs_space_info { | |||
439 | struct list_head list; | 439 | struct list_head list; |
440 | /* Protected by the spinlock 'lock'. */ | 440 | /* Protected by the spinlock 'lock'. */ |
441 | struct list_head ro_bgs; | 441 | struct list_head ro_bgs; |
442 | struct list_head priority_tickets; | ||
443 | struct list_head tickets; | ||
442 | 444 | ||
443 | struct rw_semaphore groups_sem; | 445 | struct rw_semaphore groups_sem; |
444 | /* for block groups in our same type */ | 446 | /* for block groups in our same type */ |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index aae7b04afa9f..2c17b621a661 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -111,6 +111,16 @@ static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | |||
111 | u64 num_bytes); | 111 | u64 num_bytes); |
112 | int btrfs_pin_extent(struct btrfs_root *root, | 112 | int btrfs_pin_extent(struct btrfs_root *root, |
113 | u64 bytenr, u64 num_bytes, int reserved); | 113 | u64 bytenr, u64 num_bytes, int reserved); |
114 | static int __reserve_metadata_bytes(struct btrfs_root *root, | ||
115 | struct btrfs_space_info *space_info, | ||
116 | u64 orig_bytes, | ||
117 | enum btrfs_reserve_flush_enum flush); | ||
118 | static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, | ||
119 | struct btrfs_space_info *space_info, | ||
120 | u64 num_bytes); | ||
121 | static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info, | ||
122 | struct btrfs_space_info *space_info, | ||
123 | u64 num_bytes); | ||
114 | 124 | ||
115 | static noinline int | 125 | static noinline int |
116 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 126 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
@@ -3937,6 +3947,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3937 | found->bytes_readonly += bytes_readonly; | 3947 | found->bytes_readonly += bytes_readonly; |
3938 | if (total_bytes > 0) | 3948 | if (total_bytes > 0) |
3939 | found->full = 0; | 3949 | found->full = 0; |
3950 | space_info_add_new_bytes(info, found, total_bytes - | ||
3951 | bytes_used - bytes_readonly); | ||
3940 | spin_unlock(&found->lock); | 3952 | spin_unlock(&found->lock); |
3941 | *space_info = found; | 3953 | *space_info = found; |
3942 | return 0; | 3954 | return 0; |
@@ -3971,6 +3983,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3971 | found->flush = 0; | 3983 | found->flush = 0; |
3972 | init_waitqueue_head(&found->wait); | 3984 | init_waitqueue_head(&found->wait); |
3973 | INIT_LIST_HEAD(&found->ro_bgs); | 3985 | INIT_LIST_HEAD(&found->ro_bgs); |
3986 | INIT_LIST_HEAD(&found->tickets); | ||
3987 | INIT_LIST_HEAD(&found->priority_tickets); | ||
3974 | 3988 | ||
3975 | ret = kobject_init_and_add(&found->kobj, &space_info_ktype, | 3989 | ret = kobject_init_and_add(&found->kobj, &space_info_ktype, |
3976 | info->space_info_kobj, "%s", | 3990 | info->space_info_kobj, "%s", |
@@ -4584,12 +4598,19 @@ static int can_overcommit(struct btrfs_root *root, | |||
4584 | struct btrfs_space_info *space_info, u64 bytes, | 4598 | struct btrfs_space_info *space_info, u64 bytes, |
4585 | enum btrfs_reserve_flush_enum flush) | 4599 | enum btrfs_reserve_flush_enum flush) |
4586 | { | 4600 | { |
4587 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | 4601 | struct btrfs_block_rsv *global_rsv; |
4588 | u64 profile = btrfs_get_alloc_profile(root, 0); | 4602 | u64 profile; |
4589 | u64 space_size; | 4603 | u64 space_size; |
4590 | u64 avail; | 4604 | u64 avail; |
4591 | u64 used; | 4605 | u64 used; |
4592 | 4606 | ||
4607 | /* Don't overcommit when in mixed mode. */ | ||
4608 | if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) | ||
4609 | return 0; | ||
4610 | |||
4611 | BUG_ON(root->fs_info == NULL); | ||
4612 | global_rsv = &root->fs_info->global_block_rsv; | ||
4613 | profile = btrfs_get_alloc_profile(root, 0); | ||
4593 | used = space_info->bytes_used + space_info->bytes_reserved + | 4614 | used = space_info->bytes_used + space_info->bytes_reserved + |
4594 | space_info->bytes_pinned + space_info->bytes_readonly; | 4615 | space_info->bytes_pinned + space_info->bytes_readonly; |
4595 | 4616 | ||
@@ -4741,6 +4762,11 @@ skip_async: | |||
4741 | spin_unlock(&space_info->lock); | 4762 | spin_unlock(&space_info->lock); |
4742 | break; | 4763 | break; |
4743 | } | 4764 | } |
4765 | if (list_empty(&space_info->tickets) && | ||
4766 | list_empty(&space_info->priority_tickets)) { | ||
4767 | spin_unlock(&space_info->lock); | ||
4768 | break; | ||
4769 | } | ||
4744 | spin_unlock(&space_info->lock); | 4770 | spin_unlock(&space_info->lock); |
4745 | 4771 | ||
4746 | loops++; | 4772 | loops++; |
@@ -4818,6 +4844,13 @@ enum flush_state { | |||
4818 | COMMIT_TRANS = 6, | 4844 | COMMIT_TRANS = 6, |
4819 | }; | 4845 | }; |
4820 | 4846 | ||
4847 | struct reserve_ticket { | ||
4848 | u64 bytes; | ||
4849 | int error; | ||
4850 | struct list_head list; | ||
4851 | wait_queue_head_t wait; | ||
4852 | }; | ||
4853 | |||
4821 | static int flush_space(struct btrfs_root *root, | 4854 | static int flush_space(struct btrfs_root *root, |
4822 | struct btrfs_space_info *space_info, u64 num_bytes, | 4855 | struct btrfs_space_info *space_info, u64 num_bytes, |
4823 | u64 orig_bytes, int state) | 4856 | u64 orig_bytes, int state) |
@@ -4875,17 +4908,22 @@ static inline u64 | |||
4875 | btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, | 4908 | btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, |
4876 | struct btrfs_space_info *space_info) | 4909 | struct btrfs_space_info *space_info) |
4877 | { | 4910 | { |
4911 | struct reserve_ticket *ticket; | ||
4878 | u64 used; | 4912 | u64 used; |
4879 | u64 expected; | 4913 | u64 expected; |
4880 | u64 to_reclaim; | 4914 | u64 to_reclaim = 0; |
4881 | 4915 | ||
4882 | to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); | 4916 | to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); |
4883 | spin_lock(&space_info->lock); | ||
4884 | if (can_overcommit(root, space_info, to_reclaim, | 4917 | if (can_overcommit(root, space_info, to_reclaim, |
4885 | BTRFS_RESERVE_FLUSH_ALL)) { | 4918 | BTRFS_RESERVE_FLUSH_ALL)) |
4886 | to_reclaim = 0; | 4919 | return 0; |
4887 | goto out; | 4920 | |
4888 | } | 4921 | list_for_each_entry(ticket, &space_info->tickets, list) |
4922 | to_reclaim += ticket->bytes; | ||
4923 | list_for_each_entry(ticket, &space_info->priority_tickets, list) | ||
4924 | to_reclaim += ticket->bytes; | ||
4925 | if (to_reclaim) | ||
4926 | return to_reclaim; | ||
4889 | 4927 | ||
4890 | used = space_info->bytes_used + space_info->bytes_reserved + | 4928 | used = space_info->bytes_used + space_info->bytes_reserved + |
4891 | space_info->bytes_pinned + space_info->bytes_readonly + | 4929 | space_info->bytes_pinned + space_info->bytes_readonly + |
@@ -4901,9 +4939,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, | |||
4901 | to_reclaim = 0; | 4939 | to_reclaim = 0; |
4902 | to_reclaim = min(to_reclaim, space_info->bytes_may_use + | 4940 | to_reclaim = min(to_reclaim, space_info->bytes_may_use + |
4903 | space_info->bytes_reserved); | 4941 | space_info->bytes_reserved); |
4904 | out: | ||
4905 | spin_unlock(&space_info->lock); | ||
4906 | |||
4907 | return to_reclaim; | 4942 | return to_reclaim; |
4908 | } | 4943 | } |
4909 | 4944 | ||
@@ -4920,69 +4955,169 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, | |||
4920 | !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); | 4955 | !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); |
4921 | } | 4956 | } |
4922 | 4957 | ||
4923 | static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info, | 4958 | static void wake_all_tickets(struct list_head *head) |
4924 | struct btrfs_fs_info *fs_info, | ||
4925 | int flush_state) | ||
4926 | { | 4959 | { |
4927 | u64 used; | 4960 | struct reserve_ticket *ticket; |
4928 | |||
4929 | spin_lock(&space_info->lock); | ||
4930 | /* | ||
4931 | * We run out of space and have not got any free space via flush_space, | ||
4932 | * so don't bother doing async reclaim. | ||
4933 | */ | ||
4934 | if (flush_state > COMMIT_TRANS && space_info->full) { | ||
4935 | spin_unlock(&space_info->lock); | ||
4936 | return 0; | ||
4937 | } | ||
4938 | 4961 | ||
4939 | used = space_info->bytes_used + space_info->bytes_reserved + | 4962 | while (!list_empty(head)) { |
4940 | space_info->bytes_pinned + space_info->bytes_readonly + | 4963 | ticket = list_first_entry(head, struct reserve_ticket, list); |
4941 | space_info->bytes_may_use; | 4964 | list_del_init(&ticket->list); |
4942 | if (need_do_async_reclaim(space_info, fs_info, used)) { | 4965 | ticket->error = -ENOSPC; |
4943 | spin_unlock(&space_info->lock); | 4966 | wake_up(&ticket->wait); |
4944 | return 1; | ||
4945 | } | 4967 | } |
4946 | spin_unlock(&space_info->lock); | ||
4947 | |||
4948 | return 0; | ||
4949 | } | 4968 | } |
4950 | 4969 | ||
4970 | /* | ||
4971 | * This is for normal flushers, we can wait all goddamned day if we want to. We | ||
4972 | * will loop and continuously try to flush as long as we are making progress. | ||
4973 | * We count progress as clearing off tickets each time we have to loop. | ||
4974 | */ | ||
4951 | static void btrfs_async_reclaim_metadata_space(struct work_struct *work) | 4975 | static void btrfs_async_reclaim_metadata_space(struct work_struct *work) |
4952 | { | 4976 | { |
4977 | struct reserve_ticket *last_ticket = NULL; | ||
4953 | struct btrfs_fs_info *fs_info; | 4978 | struct btrfs_fs_info *fs_info; |
4954 | struct btrfs_space_info *space_info; | 4979 | struct btrfs_space_info *space_info; |
4955 | u64 to_reclaim; | 4980 | u64 to_reclaim; |
4956 | int flush_state; | 4981 | int flush_state; |
4982 | int commit_cycles = 0; | ||
4957 | 4983 | ||
4958 | fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); | 4984 | fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); |
4959 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | 4985 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
4960 | 4986 | ||
4987 | spin_lock(&space_info->lock); | ||
4961 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, | 4988 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, |
4962 | space_info); | 4989 | space_info); |
4963 | if (!to_reclaim) | 4990 | if (!to_reclaim) { |
4991 | space_info->flush = 0; | ||
4992 | spin_unlock(&space_info->lock); | ||
4964 | return; | 4993 | return; |
4994 | } | ||
4995 | last_ticket = list_first_entry(&space_info->tickets, | ||
4996 | struct reserve_ticket, list); | ||
4997 | spin_unlock(&space_info->lock); | ||
4965 | 4998 | ||
4966 | flush_state = FLUSH_DELAYED_ITEMS_NR; | 4999 | flush_state = FLUSH_DELAYED_ITEMS_NR; |
4967 | do { | 5000 | do { |
5001 | struct reserve_ticket *ticket; | ||
5002 | int ret; | ||
5003 | |||
5004 | ret = flush_space(fs_info->fs_root, space_info, to_reclaim, | ||
5005 | to_reclaim, flush_state); | ||
5006 | spin_lock(&space_info->lock); | ||
5007 | if (list_empty(&space_info->tickets)) { | ||
5008 | space_info->flush = 0; | ||
5009 | spin_unlock(&space_info->lock); | ||
5010 | return; | ||
5011 | } | ||
5012 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, | ||
5013 | space_info); | ||
5014 | ticket = list_first_entry(&space_info->tickets, | ||
5015 | struct reserve_ticket, list); | ||
5016 | if (last_ticket == ticket) { | ||
5017 | flush_state++; | ||
5018 | } else { | ||
5019 | last_ticket = ticket; | ||
5020 | flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
5021 | if (commit_cycles) | ||
5022 | commit_cycles--; | ||
5023 | } | ||
5024 | |||
5025 | if (flush_state > COMMIT_TRANS) { | ||
5026 | commit_cycles++; | ||
5027 | if (commit_cycles > 2) { | ||
5028 | wake_all_tickets(&space_info->tickets); | ||
5029 | space_info->flush = 0; | ||
5030 | } else { | ||
5031 | flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
5032 | } | ||
5033 | } | ||
5034 | spin_unlock(&space_info->lock); | ||
5035 | } while (flush_state <= COMMIT_TRANS); | ||
5036 | } | ||
5037 | |||
5038 | void btrfs_init_async_reclaim_work(struct work_struct *work) | ||
5039 | { | ||
5040 | INIT_WORK(work, btrfs_async_reclaim_metadata_space); | ||
5041 | } | ||
5042 | |||
5043 | static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, | ||
5044 | struct btrfs_space_info *space_info, | ||
5045 | struct reserve_ticket *ticket) | ||
5046 | { | ||
5047 | u64 to_reclaim; | ||
5048 | int flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
5049 | |||
5050 | spin_lock(&space_info->lock); | ||
5051 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, | ||
5052 | space_info); | ||
5053 | if (!to_reclaim) { | ||
5054 | spin_unlock(&space_info->lock); | ||
5055 | return; | ||
5056 | } | ||
5057 | spin_unlock(&space_info->lock); | ||
5058 | |||
5059 | do { | ||
4968 | flush_space(fs_info->fs_root, space_info, to_reclaim, | 5060 | flush_space(fs_info->fs_root, space_info, to_reclaim, |
4969 | to_reclaim, flush_state); | 5061 | to_reclaim, flush_state); |
4970 | flush_state++; | 5062 | flush_state++; |
4971 | if (!btrfs_need_do_async_reclaim(space_info, fs_info, | 5063 | spin_lock(&space_info->lock); |
4972 | flush_state)) | 5064 | if (ticket->bytes == 0) { |
5065 | spin_unlock(&space_info->lock); | ||
4973 | return; | 5066 | return; |
5067 | } | ||
5068 | spin_unlock(&space_info->lock); | ||
5069 | |||
5070 | /* | ||
5071 | * Priority flushers can't wait on delalloc without | ||
5072 | * deadlocking. | ||
5073 | */ | ||
5074 | if (flush_state == FLUSH_DELALLOC || | ||
5075 | flush_state == FLUSH_DELALLOC_WAIT) | ||
5076 | flush_state = ALLOC_CHUNK; | ||
4974 | } while (flush_state < COMMIT_TRANS); | 5077 | } while (flush_state < COMMIT_TRANS); |
4975 | } | 5078 | } |
4976 | 5079 | ||
4977 | void btrfs_init_async_reclaim_work(struct work_struct *work) | 5080 | static int wait_reserve_ticket(struct btrfs_fs_info *fs_info, |
5081 | struct btrfs_space_info *space_info, | ||
5082 | struct reserve_ticket *ticket, u64 orig_bytes) | ||
5083 | |||
4978 | { | 5084 | { |
4979 | INIT_WORK(work, btrfs_async_reclaim_metadata_space); | 5085 | DEFINE_WAIT(wait); |
5086 | int ret = 0; | ||
5087 | |||
5088 | spin_lock(&space_info->lock); | ||
5089 | while (ticket->bytes > 0 && ticket->error == 0) { | ||
5090 | ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE); | ||
5091 | if (ret) { | ||
5092 | ret = -EINTR; | ||
5093 | break; | ||
5094 | } | ||
5095 | spin_unlock(&space_info->lock); | ||
5096 | |||
5097 | schedule(); | ||
5098 | |||
5099 | finish_wait(&ticket->wait, &wait); | ||
5100 | spin_lock(&space_info->lock); | ||
5101 | } | ||
5102 | if (!ret) | ||
5103 | ret = ticket->error; | ||
5104 | if (!list_empty(&ticket->list)) | ||
5105 | list_del_init(&ticket->list); | ||
5106 | if (ticket->bytes && ticket->bytes < orig_bytes) { | ||
5107 | u64 num_bytes = orig_bytes - ticket->bytes; | ||
5108 | space_info->bytes_may_use -= num_bytes; | ||
5109 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5110 | space_info->flags, num_bytes, 0); | ||
5111 | } | ||
5112 | spin_unlock(&space_info->lock); | ||
5113 | |||
5114 | return ret; | ||
4980 | } | 5115 | } |
4981 | 5116 | ||
4982 | /** | 5117 | /** |
4983 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | 5118 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space |
4984 | * @root - the root we're allocating for | 5119 | * @root - the root we're allocating for |
4985 | * @block_rsv - the block_rsv we're allocating for | 5120 | * @space_info - the space info we want to allocate from |
4986 | * @orig_bytes - the number of bytes we want | 5121 | * @orig_bytes - the number of bytes we want |
4987 | * @flush - whether or not we can flush to make our reservation | 5122 | * @flush - whether or not we can flush to make our reservation |
4988 | * | 5123 | * |
@@ -4993,81 +5128,34 @@ void btrfs_init_async_reclaim_work(struct work_struct *work) | |||
4993 | * regain reservations will be made and this will fail if there is not enough | 5128 | * regain reservations will be made and this will fail if there is not enough |
4994 | * space already. | 5129 | * space already. |
4995 | */ | 5130 | */ |
4996 | static int reserve_metadata_bytes(struct btrfs_root *root, | 5131 | static int __reserve_metadata_bytes(struct btrfs_root *root, |
4997 | struct btrfs_block_rsv *block_rsv, | 5132 | struct btrfs_space_info *space_info, |
4998 | u64 orig_bytes, | 5133 | u64 orig_bytes, |
4999 | enum btrfs_reserve_flush_enum flush) | 5134 | enum btrfs_reserve_flush_enum flush) |
5000 | { | 5135 | { |
5001 | struct btrfs_space_info *space_info = block_rsv->space_info; | 5136 | struct reserve_ticket ticket; |
5002 | u64 used; | 5137 | u64 used; |
5003 | u64 num_bytes = orig_bytes; | ||
5004 | int flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
5005 | int ret = 0; | 5138 | int ret = 0; |
5006 | bool flushing = false; | ||
5007 | 5139 | ||
5008 | again: | 5140 | ASSERT(orig_bytes); |
5009 | ret = 0; | ||
5010 | spin_lock(&space_info->lock); | 5141 | spin_lock(&space_info->lock); |
5011 | /* | ||
5012 | * We only want to wait if somebody other than us is flushing and we | ||
5013 | * are actually allowed to flush all things. | ||
5014 | */ | ||
5015 | while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing && | ||
5016 | space_info->flush) { | ||
5017 | spin_unlock(&space_info->lock); | ||
5018 | /* | ||
5019 | * If we have a trans handle we can't wait because the flusher | ||
5020 | * may have to commit the transaction, which would mean we would | ||
5021 | * deadlock since we are waiting for the flusher to finish, but | ||
5022 | * hold the current transaction open. | ||
5023 | */ | ||
5024 | if (current->journal_info) | ||
5025 | return -EAGAIN; | ||
5026 | ret = wait_event_killable(space_info->wait, !space_info->flush); | ||
5027 | /* Must have been killed, return */ | ||
5028 | if (ret) | ||
5029 | return -EINTR; | ||
5030 | |||
5031 | spin_lock(&space_info->lock); | ||
5032 | } | ||
5033 | |||
5034 | ret = -ENOSPC; | 5142 | ret = -ENOSPC; |
5035 | used = space_info->bytes_used + space_info->bytes_reserved + | 5143 | used = space_info->bytes_used + space_info->bytes_reserved + |
5036 | space_info->bytes_pinned + space_info->bytes_readonly + | 5144 | space_info->bytes_pinned + space_info->bytes_readonly + |
5037 | space_info->bytes_may_use; | 5145 | space_info->bytes_may_use; |
5038 | 5146 | ||
5039 | /* | 5147 | /* |
5040 | * The idea here is that we've not already over-reserved the block group | 5148 | * If we have enough space then hooray, make our reservation and carry |
5041 | * then we can go ahead and save our reservation first and then start | 5149 | * on. If not see if we can overcommit, and if we can, hooray carry on. |
5042 | * flushing if we need to. Otherwise if we've already overcommitted | 5150 | * If not things get more complicated. |
5043 | * lets start flushing stuff first and then come back and try to make | ||
5044 | * our reservation. | ||
5045 | */ | 5151 | */ |
5046 | if (used <= space_info->total_bytes) { | 5152 | if (used + orig_bytes <= space_info->total_bytes) { |
5047 | if (used + orig_bytes <= space_info->total_bytes) { | 5153 | space_info->bytes_may_use += orig_bytes; |
5048 | space_info->bytes_may_use += orig_bytes; | 5154 | trace_btrfs_space_reservation(root->fs_info, "space_info", |
5049 | trace_btrfs_space_reservation(root->fs_info, | 5155 | space_info->flags, orig_bytes, |
5050 | "space_info", space_info->flags, orig_bytes, 1); | 5156 | 1); |
5051 | ret = 0; | 5157 | ret = 0; |
5052 | } else { | 5158 | } else if (can_overcommit(root, space_info, orig_bytes, flush)) { |
5053 | /* | ||
5054 | * Ok set num_bytes to orig_bytes since we aren't | ||
5055 | * overocmmitted, this way we only try and reclaim what | ||
5056 | * we need. | ||
5057 | */ | ||
5058 | num_bytes = orig_bytes; | ||
5059 | } | ||
5060 | } else { | ||
5061 | /* | ||
5062 | * Ok we're over committed, set num_bytes to the overcommitted | ||
5063 | * amount plus the amount of bytes that we need for this | ||
5064 | * reservation. | ||
5065 | */ | ||
5066 | num_bytes = used - space_info->total_bytes + | ||
5067 | (orig_bytes * 2); | ||
5068 | } | ||
5069 | |||
5070 | if (ret && can_overcommit(root, space_info, orig_bytes, flush)) { | ||
5071 | space_info->bytes_may_use += orig_bytes; | 5159 | space_info->bytes_may_use += orig_bytes; |
5072 | trace_btrfs_space_reservation(root->fs_info, "space_info", | 5160 | trace_btrfs_space_reservation(root->fs_info, "space_info", |
5073 | space_info->flags, orig_bytes, | 5161 | space_info->flags, orig_bytes, |
@@ -5076,16 +5164,27 @@ again: | |||
5076 | } | 5164 | } |
5077 | 5165 | ||
5078 | /* | 5166 | /* |
5079 | * Couldn't make our reservation, save our place so while we're trying | 5167 | * If we couldn't make a reservation then setup our reservation ticket |
5080 | * to reclaim space we can actually use it instead of somebody else | 5168 | * and kick the async worker if it's not already running. |
5081 | * stealing it from us. | ||
5082 | * | 5169 | * |
5083 | * We make the other tasks wait for the flush only when we can flush | 5170 | * If we are a priority flusher then we just need to add our ticket to |
5084 | * all things. | 5171 | * the list and we will do our own flushing further down. |
5085 | */ | 5172 | */ |
5086 | if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { | 5173 | if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { |
5087 | flushing = true; | 5174 | ticket.bytes = orig_bytes; |
5088 | space_info->flush = 1; | 5175 | ticket.error = 0; |
5176 | init_waitqueue_head(&ticket.wait); | ||
5177 | if (flush == BTRFS_RESERVE_FLUSH_ALL) { | ||
5178 | list_add_tail(&ticket.list, &space_info->tickets); | ||
5179 | if (!space_info->flush) { | ||
5180 | space_info->flush = 1; | ||
5181 | queue_work(system_unbound_wq, | ||
5182 | &root->fs_info->async_reclaim_work); | ||
5183 | } | ||
5184 | } else { | ||
5185 | list_add_tail(&ticket.list, | ||
5186 | &space_info->priority_tickets); | ||
5187 | } | ||
5089 | } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { | 5188 | } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { |
5090 | used += orig_bytes; | 5189 | used += orig_bytes; |
5091 | /* | 5190 | /* |
@@ -5100,33 +5199,56 @@ again: | |||
5100 | &root->fs_info->async_reclaim_work); | 5199 | &root->fs_info->async_reclaim_work); |
5101 | } | 5200 | } |
5102 | spin_unlock(&space_info->lock); | 5201 | spin_unlock(&space_info->lock); |
5103 | |||
5104 | if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) | 5202 | if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) |
5105 | goto out; | 5203 | return ret; |
5106 | 5204 | ||
5107 | ret = flush_space(root, space_info, num_bytes, orig_bytes, | 5205 | if (flush == BTRFS_RESERVE_FLUSH_ALL) |
5108 | flush_state); | 5206 | return wait_reserve_ticket(root->fs_info, space_info, &ticket, |
5109 | flush_state++; | 5207 | orig_bytes); |
5110 | 5208 | ||
5111 | /* | 5209 | ret = 0; |
5112 | * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock | 5210 | priority_reclaim_metadata_space(root->fs_info, space_info, &ticket); |
5113 | * would happen. So skip delalloc flush. | 5211 | spin_lock(&space_info->lock); |
5114 | */ | 5212 | if (ticket.bytes) { |
5115 | if (flush == BTRFS_RESERVE_FLUSH_LIMIT && | 5213 | if (ticket.bytes < orig_bytes) { |
5116 | (flush_state == FLUSH_DELALLOC || | 5214 | u64 num_bytes = orig_bytes - ticket.bytes; |
5117 | flush_state == FLUSH_DELALLOC_WAIT)) | 5215 | space_info->bytes_may_use -= num_bytes; |
5118 | flush_state = ALLOC_CHUNK; | 5216 | trace_btrfs_space_reservation(root->fs_info, |
5217 | "space_info", space_info->flags, | ||
5218 | num_bytes, 0); | ||
5119 | 5219 | ||
5120 | if (!ret) | 5220 | } |
5121 | goto again; | 5221 | list_del_init(&ticket.list); |
5122 | else if (flush == BTRFS_RESERVE_FLUSH_LIMIT && | 5222 | ret = -ENOSPC; |
5123 | flush_state < COMMIT_TRANS) | 5223 | } |
5124 | goto again; | 5224 | spin_unlock(&space_info->lock); |
5125 | else if (flush == BTRFS_RESERVE_FLUSH_ALL && | 5225 | ASSERT(list_empty(&ticket.list)); |
5126 | flush_state <= COMMIT_TRANS) | 5226 | return ret; |
5127 | goto again; | 5227 | } |
5128 | 5228 | ||
5129 | out: | 5229 | /** |
5230 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | ||
5231 | * @root - the root we're allocating for | ||
5232 | * @block_rsv - the block_rsv we're allocating for | ||
5233 | * @orig_bytes - the number of bytes we want | ||
5234 | * @flush - whether or not we can flush to make our reservation | ||
5235 | * | ||
5236 | * This will reserve orgi_bytes number of bytes from the space info associated | ||
5237 | * with the block_rsv. If there is not enough space it will make an attempt to | ||
5238 | * flush out space to make room. It will do this by flushing delalloc if | ||
5239 | * possible or committing the transaction. If flush is 0 then no attempts to | ||
5240 | * regain reservations will be made and this will fail if there is not enough | ||
5241 | * space already. | ||
5242 | */ | ||
5243 | static int reserve_metadata_bytes(struct btrfs_root *root, | ||
5244 | struct btrfs_block_rsv *block_rsv, | ||
5245 | u64 orig_bytes, | ||
5246 | enum btrfs_reserve_flush_enum flush) | ||
5247 | { | ||
5248 | int ret; | ||
5249 | |||
5250 | ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes, | ||
5251 | flush); | ||
5130 | if (ret == -ENOSPC && | 5252 | if (ret == -ENOSPC && |
5131 | unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { | 5253 | unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { |
5132 | struct btrfs_block_rsv *global_rsv = | 5254 | struct btrfs_block_rsv *global_rsv = |
@@ -5139,13 +5261,8 @@ out: | |||
5139 | if (ret == -ENOSPC) | 5261 | if (ret == -ENOSPC) |
5140 | trace_btrfs_space_reservation(root->fs_info, | 5262 | trace_btrfs_space_reservation(root->fs_info, |
5141 | "space_info:enospc", | 5263 | "space_info:enospc", |
5142 | space_info->flags, orig_bytes, 1); | 5264 | block_rsv->space_info->flags, |
5143 | if (flushing) { | 5265 | orig_bytes, 1); |
5144 | spin_lock(&space_info->lock); | ||
5145 | space_info->flush = 0; | ||
5146 | wake_up_all(&space_info->wait); | ||
5147 | spin_unlock(&space_info->lock); | ||
5148 | } | ||
5149 | return ret; | 5266 | return ret; |
5150 | } | 5267 | } |
5151 | 5268 | ||
@@ -5221,6 +5338,108 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, | |||
5221 | return 0; | 5338 | return 0; |
5222 | } | 5339 | } |
5223 | 5340 | ||
5341 | /* | ||
5342 | * This is for space we already have accounted in space_info->bytes_may_use, so | ||
5343 | * basically when we're returning space from block_rsv's. | ||
5344 | */ | ||
5345 | static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info, | ||
5346 | struct btrfs_space_info *space_info, | ||
5347 | u64 num_bytes) | ||
5348 | { | ||
5349 | struct reserve_ticket *ticket; | ||
5350 | struct list_head *head; | ||
5351 | u64 used; | ||
5352 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH; | ||
5353 | bool check_overcommit = false; | ||
5354 | |||
5355 | spin_lock(&space_info->lock); | ||
5356 | head = &space_info->priority_tickets; | ||
5357 | |||
5358 | /* | ||
5359 | * If we are over our limit then we need to check and see if we can | ||
5360 | * overcommit, and if we can't then we just need to free up our space | ||
5361 | * and not satisfy any requests. | ||
5362 | */ | ||
5363 | used = space_info->bytes_used + space_info->bytes_reserved + | ||
5364 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
5365 | space_info->bytes_may_use; | ||
5366 | if (used - num_bytes >= space_info->total_bytes) | ||
5367 | check_overcommit = true; | ||
5368 | again: | ||
5369 | while (!list_empty(head) && num_bytes) { | ||
5370 | ticket = list_first_entry(head, struct reserve_ticket, | ||
5371 | list); | ||
5372 | /* | ||
5373 | * We use 0 bytes because this space is already reserved, so | ||
5374 | * adding the ticket space would be a double count. | ||
5375 | */ | ||
5376 | if (check_overcommit && | ||
5377 | !can_overcommit(fs_info->extent_root, space_info, 0, | ||
5378 | flush)) | ||
5379 | break; | ||
5380 | if (num_bytes >= ticket->bytes) { | ||
5381 | list_del_init(&ticket->list); | ||
5382 | num_bytes -= ticket->bytes; | ||
5383 | ticket->bytes = 0; | ||
5384 | wake_up(&ticket->wait); | ||
5385 | } else { | ||
5386 | ticket->bytes -= num_bytes; | ||
5387 | num_bytes = 0; | ||
5388 | } | ||
5389 | } | ||
5390 | |||
5391 | if (num_bytes && head == &space_info->priority_tickets) { | ||
5392 | head = &space_info->tickets; | ||
5393 | flush = BTRFS_RESERVE_FLUSH_ALL; | ||
5394 | goto again; | ||
5395 | } | ||
5396 | space_info->bytes_may_use -= num_bytes; | ||
5397 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5398 | space_info->flags, num_bytes, 0); | ||
5399 | spin_unlock(&space_info->lock); | ||
5400 | } | ||
5401 | |||
5402 | /* | ||
5403 | * This is for newly allocated space that isn't accounted in | ||
5404 | * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent | ||
5405 | * we use this helper. | ||
5406 | */ | ||
5407 | static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, | ||
5408 | struct btrfs_space_info *space_info, | ||
5409 | u64 num_bytes) | ||
5410 | { | ||
5411 | struct reserve_ticket *ticket; | ||
5412 | struct list_head *head = &space_info->priority_tickets; | ||
5413 | |||
5414 | again: | ||
5415 | while (!list_empty(head) && num_bytes) { | ||
5416 | ticket = list_first_entry(head, struct reserve_ticket, | ||
5417 | list); | ||
5418 | if (num_bytes >= ticket->bytes) { | ||
5419 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5420 | space_info->flags, | ||
5421 | ticket->bytes, 1); | ||
5422 | list_del_init(&ticket->list); | ||
5423 | num_bytes -= ticket->bytes; | ||
5424 | space_info->bytes_may_use += ticket->bytes; | ||
5425 | ticket->bytes = 0; | ||
5426 | wake_up(&ticket->wait); | ||
5427 | } else { | ||
5428 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5429 | space_info->flags, | ||
5430 | num_bytes, 1); | ||
5431 | space_info->bytes_may_use += num_bytes; | ||
5432 | ticket->bytes -= num_bytes; | ||
5433 | num_bytes = 0; | ||
5434 | } | ||
5435 | } | ||
5436 | |||
5437 | if (num_bytes && head == &space_info->priority_tickets) { | ||
5438 | head = &space_info->tickets; | ||
5439 | goto again; | ||
5440 | } | ||
5441 | } | ||
5442 | |||
5224 | static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, | 5443 | static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, |
5225 | struct btrfs_block_rsv *block_rsv, | 5444 | struct btrfs_block_rsv *block_rsv, |
5226 | struct btrfs_block_rsv *dest, u64 num_bytes) | 5445 | struct btrfs_block_rsv *dest, u64 num_bytes) |
@@ -5255,13 +5474,9 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, | |||
5255 | } | 5474 | } |
5256 | spin_unlock(&dest->lock); | 5475 | spin_unlock(&dest->lock); |
5257 | } | 5476 | } |
5258 | if (num_bytes) { | 5477 | if (num_bytes) |
5259 | spin_lock(&space_info->lock); | 5478 | space_info_add_old_bytes(fs_info, space_info, |
5260 | space_info->bytes_may_use -= num_bytes; | 5479 | num_bytes); |
5261 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5262 | space_info->flags, num_bytes, 0); | ||
5263 | spin_unlock(&space_info->lock); | ||
5264 | } | ||
5265 | } | 5480 | } |
5266 | } | 5481 | } |
5267 | 5482 | ||
@@ -6470,17 +6685,29 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, | |||
6470 | readonly = true; | 6685 | readonly = true; |
6471 | } | 6686 | } |
6472 | spin_unlock(&cache->lock); | 6687 | spin_unlock(&cache->lock); |
6473 | if (!readonly && global_rsv->space_info == space_info) { | 6688 | if (!readonly && return_free_space && |
6689 | global_rsv->space_info == space_info) { | ||
6690 | u64 to_add = len; | ||
6691 | WARN_ON(!return_free_space); | ||
6474 | spin_lock(&global_rsv->lock); | 6692 | spin_lock(&global_rsv->lock); |
6475 | if (!global_rsv->full) { | 6693 | if (!global_rsv->full) { |
6476 | len = min(len, global_rsv->size - | 6694 | to_add = min(len, global_rsv->size - |
6477 | global_rsv->reserved); | 6695 | global_rsv->reserved); |
6478 | global_rsv->reserved += len; | 6696 | global_rsv->reserved += to_add; |
6479 | space_info->bytes_may_use += len; | 6697 | space_info->bytes_may_use += to_add; |
6480 | if (global_rsv->reserved >= global_rsv->size) | 6698 | if (global_rsv->reserved >= global_rsv->size) |
6481 | global_rsv->full = 1; | 6699 | global_rsv->full = 1; |
6700 | trace_btrfs_space_reservation(fs_info, | ||
6701 | "space_info", | ||
6702 | space_info->flags, | ||
6703 | to_add, 1); | ||
6704 | len -= to_add; | ||
6482 | } | 6705 | } |
6483 | spin_unlock(&global_rsv->lock); | 6706 | spin_unlock(&global_rsv->lock); |
6707 | /* Add to any tickets we may have */ | ||
6708 | if (len) | ||
6709 | space_info_add_new_bytes(fs_info, space_info, | ||
6710 | len); | ||
6484 | } | 6711 | } |
6485 | spin_unlock(&space_info->lock); | 6712 | spin_unlock(&space_info->lock); |
6486 | } | 6713 | } |