aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent-tree.c529
2 files changed, 380 insertions, 151 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index fe16474fabf3..2e04c9d6f21d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -439,6 +439,8 @@ struct btrfs_space_info {
439 struct list_head list; 439 struct list_head list;
440 /* Protected by the spinlock 'lock'. */ 440 /* Protected by the spinlock 'lock'. */
441 struct list_head ro_bgs; 441 struct list_head ro_bgs;
442 struct list_head priority_tickets;
443 struct list_head tickets;
442 444
443 struct rw_semaphore groups_sem; 445 struct rw_semaphore groups_sem;
444 /* for block groups in our same type */ 446 /* for block groups in our same type */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index aae7b04afa9f..2c17b621a661 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -111,6 +111,16 @@ static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
111 u64 num_bytes); 111 u64 num_bytes);
112int btrfs_pin_extent(struct btrfs_root *root, 112int btrfs_pin_extent(struct btrfs_root *root,
113 u64 bytenr, u64 num_bytes, int reserved); 113 u64 bytenr, u64 num_bytes, int reserved);
114static int __reserve_metadata_bytes(struct btrfs_root *root,
115 struct btrfs_space_info *space_info,
116 u64 orig_bytes,
117 enum btrfs_reserve_flush_enum flush);
118static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
119 struct btrfs_space_info *space_info,
120 u64 num_bytes);
121static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
122 struct btrfs_space_info *space_info,
123 u64 num_bytes);
114 124
115static noinline int 125static noinline int
116block_group_cache_done(struct btrfs_block_group_cache *cache) 126block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -3937,6 +3947,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3937 found->bytes_readonly += bytes_readonly; 3947 found->bytes_readonly += bytes_readonly;
3938 if (total_bytes > 0) 3948 if (total_bytes > 0)
3939 found->full = 0; 3949 found->full = 0;
3950 space_info_add_new_bytes(info, found, total_bytes -
3951 bytes_used - bytes_readonly);
3940 spin_unlock(&found->lock); 3952 spin_unlock(&found->lock);
3941 *space_info = found; 3953 *space_info = found;
3942 return 0; 3954 return 0;
@@ -3971,6 +3983,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3971 found->flush = 0; 3983 found->flush = 0;
3972 init_waitqueue_head(&found->wait); 3984 init_waitqueue_head(&found->wait);
3973 INIT_LIST_HEAD(&found->ro_bgs); 3985 INIT_LIST_HEAD(&found->ro_bgs);
3986 INIT_LIST_HEAD(&found->tickets);
3987 INIT_LIST_HEAD(&found->priority_tickets);
3974 3988
3975 ret = kobject_init_and_add(&found->kobj, &space_info_ktype, 3989 ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
3976 info->space_info_kobj, "%s", 3990 info->space_info_kobj, "%s",
@@ -4584,12 +4598,19 @@ static int can_overcommit(struct btrfs_root *root,
4584 struct btrfs_space_info *space_info, u64 bytes, 4598 struct btrfs_space_info *space_info, u64 bytes,
4585 enum btrfs_reserve_flush_enum flush) 4599 enum btrfs_reserve_flush_enum flush)
4586{ 4600{
4587 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 4601 struct btrfs_block_rsv *global_rsv;
4588 u64 profile = btrfs_get_alloc_profile(root, 0); 4602 u64 profile;
4589 u64 space_size; 4603 u64 space_size;
4590 u64 avail; 4604 u64 avail;
4591 u64 used; 4605 u64 used;
4592 4606
4607 /* Don't overcommit when in mixed mode. */
4608 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
4609 return 0;
4610
4611 BUG_ON(root->fs_info == NULL);
4612 global_rsv = &root->fs_info->global_block_rsv;
4613 profile = btrfs_get_alloc_profile(root, 0);
4593 used = space_info->bytes_used + space_info->bytes_reserved + 4614 used = space_info->bytes_used + space_info->bytes_reserved +
4594 space_info->bytes_pinned + space_info->bytes_readonly; 4615 space_info->bytes_pinned + space_info->bytes_readonly;
4595 4616
@@ -4741,6 +4762,11 @@ skip_async:
4741 spin_unlock(&space_info->lock); 4762 spin_unlock(&space_info->lock);
4742 break; 4763 break;
4743 } 4764 }
4765 if (list_empty(&space_info->tickets) &&
4766 list_empty(&space_info->priority_tickets)) {
4767 spin_unlock(&space_info->lock);
4768 break;
4769 }
4744 spin_unlock(&space_info->lock); 4770 spin_unlock(&space_info->lock);
4745 4771
4746 loops++; 4772 loops++;
@@ -4818,6 +4844,13 @@ enum flush_state {
4818 COMMIT_TRANS = 6, 4844 COMMIT_TRANS = 6,
4819}; 4845};
4820 4846
4847struct reserve_ticket {
4848 u64 bytes;
4849 int error;
4850 struct list_head list;
4851 wait_queue_head_t wait;
4852};
4853
4821static int flush_space(struct btrfs_root *root, 4854static int flush_space(struct btrfs_root *root,
4822 struct btrfs_space_info *space_info, u64 num_bytes, 4855 struct btrfs_space_info *space_info, u64 num_bytes,
4823 u64 orig_bytes, int state) 4856 u64 orig_bytes, int state)
@@ -4875,17 +4908,22 @@ static inline u64
4875btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, 4908btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
4876 struct btrfs_space_info *space_info) 4909 struct btrfs_space_info *space_info)
4877{ 4910{
4911 struct reserve_ticket *ticket;
4878 u64 used; 4912 u64 used;
4879 u64 expected; 4913 u64 expected;
4880 u64 to_reclaim; 4914 u64 to_reclaim = 0;
4881 4915
4882 to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); 4916 to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
4883 spin_lock(&space_info->lock);
4884 if (can_overcommit(root, space_info, to_reclaim, 4917 if (can_overcommit(root, space_info, to_reclaim,
4885 BTRFS_RESERVE_FLUSH_ALL)) { 4918 BTRFS_RESERVE_FLUSH_ALL))
4886 to_reclaim = 0; 4919 return 0;
4887 goto out; 4920
4888 } 4921 list_for_each_entry(ticket, &space_info->tickets, list)
4922 to_reclaim += ticket->bytes;
4923 list_for_each_entry(ticket, &space_info->priority_tickets, list)
4924 to_reclaim += ticket->bytes;
4925 if (to_reclaim)
4926 return to_reclaim;
4889 4927
4890 used = space_info->bytes_used + space_info->bytes_reserved + 4928 used = space_info->bytes_used + space_info->bytes_reserved +
4891 space_info->bytes_pinned + space_info->bytes_readonly + 4929 space_info->bytes_pinned + space_info->bytes_readonly +
@@ -4901,9 +4939,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
4901 to_reclaim = 0; 4939 to_reclaim = 0;
4902 to_reclaim = min(to_reclaim, space_info->bytes_may_use + 4940 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
4903 space_info->bytes_reserved); 4941 space_info->bytes_reserved);
4904out:
4905 spin_unlock(&space_info->lock);
4906
4907 return to_reclaim; 4942 return to_reclaim;
4908} 4943}
4909 4944
@@ -4920,69 +4955,169 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
4920 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); 4955 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
4921} 4956}
4922 4957
4923static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info, 4958static void wake_all_tickets(struct list_head *head)
4924 struct btrfs_fs_info *fs_info,
4925 int flush_state)
4926{ 4959{
4927 u64 used; 4960 struct reserve_ticket *ticket;
4928
4929 spin_lock(&space_info->lock);
4930 /*
4931 * We run out of space and have not got any free space via flush_space,
4932 * so don't bother doing async reclaim.
4933 */
4934 if (flush_state > COMMIT_TRANS && space_info->full) {
4935 spin_unlock(&space_info->lock);
4936 return 0;
4937 }
4938 4961
4939 used = space_info->bytes_used + space_info->bytes_reserved + 4962 while (!list_empty(head)) {
4940 space_info->bytes_pinned + space_info->bytes_readonly + 4963 ticket = list_first_entry(head, struct reserve_ticket, list);
4941 space_info->bytes_may_use; 4964 list_del_init(&ticket->list);
4942 if (need_do_async_reclaim(space_info, fs_info, used)) { 4965 ticket->error = -ENOSPC;
4943 spin_unlock(&space_info->lock); 4966 wake_up(&ticket->wait);
4944 return 1;
4945 } 4967 }
4946 spin_unlock(&space_info->lock);
4947
4948 return 0;
4949} 4968}
4950 4969
4970/*
4971 * This is for normal flushers, we can wait all goddamned day if we want to. We
4972 * will loop and continuously try to flush as long as we are making progress.
4973 * We count progress as clearing off tickets each time we have to loop.
4974 */
4951static void btrfs_async_reclaim_metadata_space(struct work_struct *work) 4975static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
4952{ 4976{
4977 struct reserve_ticket *last_ticket = NULL;
4953 struct btrfs_fs_info *fs_info; 4978 struct btrfs_fs_info *fs_info;
4954 struct btrfs_space_info *space_info; 4979 struct btrfs_space_info *space_info;
4955 u64 to_reclaim; 4980 u64 to_reclaim;
4956 int flush_state; 4981 int flush_state;
4982 int commit_cycles = 0;
4957 4983
4958 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); 4984 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
4959 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 4985 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4960 4986
4987 spin_lock(&space_info->lock);
4961 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, 4988 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
4962 space_info); 4989 space_info);
4963 if (!to_reclaim) 4990 if (!to_reclaim) {
4991 space_info->flush = 0;
4992 spin_unlock(&space_info->lock);
4964 return; 4993 return;
4994 }
4995 last_ticket = list_first_entry(&space_info->tickets,
4996 struct reserve_ticket, list);
4997 spin_unlock(&space_info->lock);
4965 4998
4966 flush_state = FLUSH_DELAYED_ITEMS_NR; 4999 flush_state = FLUSH_DELAYED_ITEMS_NR;
4967 do { 5000 do {
5001 struct reserve_ticket *ticket;
5002 int ret;
5003
5004 ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
5005 to_reclaim, flush_state);
5006 spin_lock(&space_info->lock);
5007 if (list_empty(&space_info->tickets)) {
5008 space_info->flush = 0;
5009 spin_unlock(&space_info->lock);
5010 return;
5011 }
5012 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
5013 space_info);
5014 ticket = list_first_entry(&space_info->tickets,
5015 struct reserve_ticket, list);
5016 if (last_ticket == ticket) {
5017 flush_state++;
5018 } else {
5019 last_ticket = ticket;
5020 flush_state = FLUSH_DELAYED_ITEMS_NR;
5021 if (commit_cycles)
5022 commit_cycles--;
5023 }
5024
5025 if (flush_state > COMMIT_TRANS) {
5026 commit_cycles++;
5027 if (commit_cycles > 2) {
5028 wake_all_tickets(&space_info->tickets);
5029 space_info->flush = 0;
5030 } else {
5031 flush_state = FLUSH_DELAYED_ITEMS_NR;
5032 }
5033 }
5034 spin_unlock(&space_info->lock);
5035 } while (flush_state <= COMMIT_TRANS);
5036}
5037
5038void btrfs_init_async_reclaim_work(struct work_struct *work)
5039{
5040 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
5041}
5042
5043static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
5044 struct btrfs_space_info *space_info,
5045 struct reserve_ticket *ticket)
5046{
5047 u64 to_reclaim;
5048 int flush_state = FLUSH_DELAYED_ITEMS_NR;
5049
5050 spin_lock(&space_info->lock);
5051 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
5052 space_info);
5053 if (!to_reclaim) {
5054 spin_unlock(&space_info->lock);
5055 return;
5056 }
5057 spin_unlock(&space_info->lock);
5058
5059 do {
4968 flush_space(fs_info->fs_root, space_info, to_reclaim, 5060 flush_space(fs_info->fs_root, space_info, to_reclaim,
4969 to_reclaim, flush_state); 5061 to_reclaim, flush_state);
4970 flush_state++; 5062 flush_state++;
4971 if (!btrfs_need_do_async_reclaim(space_info, fs_info, 5063 spin_lock(&space_info->lock);
4972 flush_state)) 5064 if (ticket->bytes == 0) {
5065 spin_unlock(&space_info->lock);
4973 return; 5066 return;
5067 }
5068 spin_unlock(&space_info->lock);
5069
5070 /*
5071 * Priority flushers can't wait on delalloc without
5072 * deadlocking.
5073 */
5074 if (flush_state == FLUSH_DELALLOC ||
5075 flush_state == FLUSH_DELALLOC_WAIT)
5076 flush_state = ALLOC_CHUNK;
4974 } while (flush_state < COMMIT_TRANS); 5077 } while (flush_state < COMMIT_TRANS);
4975} 5078}
4976 5079
4977void btrfs_init_async_reclaim_work(struct work_struct *work) 5080static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
5081 struct btrfs_space_info *space_info,
5082 struct reserve_ticket *ticket, u64 orig_bytes)
5083
4978{ 5084{
4979 INIT_WORK(work, btrfs_async_reclaim_metadata_space); 5085 DEFINE_WAIT(wait);
5086 int ret = 0;
5087
5088 spin_lock(&space_info->lock);
5089 while (ticket->bytes > 0 && ticket->error == 0) {
5090 ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
5091 if (ret) {
5092 ret = -EINTR;
5093 break;
5094 }
5095 spin_unlock(&space_info->lock);
5096
5097 schedule();
5098
5099 finish_wait(&ticket->wait, &wait);
5100 spin_lock(&space_info->lock);
5101 }
5102 if (!ret)
5103 ret = ticket->error;
5104 if (!list_empty(&ticket->list))
5105 list_del_init(&ticket->list);
5106 if (ticket->bytes && ticket->bytes < orig_bytes) {
5107 u64 num_bytes = orig_bytes - ticket->bytes;
5108 space_info->bytes_may_use -= num_bytes;
5109 trace_btrfs_space_reservation(fs_info, "space_info",
5110 space_info->flags, num_bytes, 0);
5111 }
5112 spin_unlock(&space_info->lock);
5113
5114 return ret;
4980} 5115}
4981 5116
4982/** 5117/**
4983 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space 5118 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
4984 * @root - the root we're allocating for 5119 * @root - the root we're allocating for
4985 * @block_rsv - the block_rsv we're allocating for 5120 * @space_info - the space info we want to allocate from
4986 * @orig_bytes - the number of bytes we want 5121 * @orig_bytes - the number of bytes we want
4987 * @flush - whether or not we can flush to make our reservation 5122 * @flush - whether or not we can flush to make our reservation
4988 * 5123 *
@@ -4993,81 +5128,34 @@ void btrfs_init_async_reclaim_work(struct work_struct *work)
4993 * regain reservations will be made and this will fail if there is not enough 5128 * regain reservations will be made and this will fail if there is not enough
4994 * space already. 5129 * space already.
4995 */ 5130 */
4996static int reserve_metadata_bytes(struct btrfs_root *root, 5131static int __reserve_metadata_bytes(struct btrfs_root *root,
4997 struct btrfs_block_rsv *block_rsv, 5132 struct btrfs_space_info *space_info,
4998 u64 orig_bytes, 5133 u64 orig_bytes,
4999 enum btrfs_reserve_flush_enum flush) 5134 enum btrfs_reserve_flush_enum flush)
5000{ 5135{
5001 struct btrfs_space_info *space_info = block_rsv->space_info; 5136 struct reserve_ticket ticket;
5002 u64 used; 5137 u64 used;
5003 u64 num_bytes = orig_bytes;
5004 int flush_state = FLUSH_DELAYED_ITEMS_NR;
5005 int ret = 0; 5138 int ret = 0;
5006 bool flushing = false;
5007 5139
5008again: 5140 ASSERT(orig_bytes);
5009 ret = 0;
5010 spin_lock(&space_info->lock); 5141 spin_lock(&space_info->lock);
5011 /*
5012 * We only want to wait if somebody other than us is flushing and we
5013 * are actually allowed to flush all things.
5014 */
5015 while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
5016 space_info->flush) {
5017 spin_unlock(&space_info->lock);
5018 /*
5019 * If we have a trans handle we can't wait because the flusher
5020 * may have to commit the transaction, which would mean we would
5021 * deadlock since we are waiting for the flusher to finish, but
5022 * hold the current transaction open.
5023 */
5024 if (current->journal_info)
5025 return -EAGAIN;
5026 ret = wait_event_killable(space_info->wait, !space_info->flush);
5027 /* Must have been killed, return */
5028 if (ret)
5029 return -EINTR;
5030
5031 spin_lock(&space_info->lock);
5032 }
5033
5034 ret = -ENOSPC; 5142 ret = -ENOSPC;
5035 used = space_info->bytes_used + space_info->bytes_reserved + 5143 used = space_info->bytes_used + space_info->bytes_reserved +
5036 space_info->bytes_pinned + space_info->bytes_readonly + 5144 space_info->bytes_pinned + space_info->bytes_readonly +
5037 space_info->bytes_may_use; 5145 space_info->bytes_may_use;
5038 5146
5039 /* 5147 /*
5040 * The idea here is that we've not already over-reserved the block group 5148 * If we have enough space then hooray, make our reservation and carry
5041 * then we can go ahead and save our reservation first and then start 5149 * on. If not see if we can overcommit, and if we can, hooray carry on.
5042 * flushing if we need to. Otherwise if we've already overcommitted 5150 * If not things get more complicated.
5043 * lets start flushing stuff first and then come back and try to make
5044 * our reservation.
5045 */ 5151 */
5046 if (used <= space_info->total_bytes) { 5152 if (used + orig_bytes <= space_info->total_bytes) {
5047 if (used + orig_bytes <= space_info->total_bytes) { 5153 space_info->bytes_may_use += orig_bytes;
5048 space_info->bytes_may_use += orig_bytes; 5154 trace_btrfs_space_reservation(root->fs_info, "space_info",
5049 trace_btrfs_space_reservation(root->fs_info, 5155 space_info->flags, orig_bytes,
5050 "space_info", space_info->flags, orig_bytes, 1); 5156 1);
5051 ret = 0; 5157 ret = 0;
5052 } else { 5158 } else if (can_overcommit(root, space_info, orig_bytes, flush)) {
5053 /*
5054 * Ok set num_bytes to orig_bytes since we aren't
5055 * overocmmitted, this way we only try and reclaim what
5056 * we need.
5057 */
5058 num_bytes = orig_bytes;
5059 }
5060 } else {
5061 /*
5062 * Ok we're over committed, set num_bytes to the overcommitted
5063 * amount plus the amount of bytes that we need for this
5064 * reservation.
5065 */
5066 num_bytes = used - space_info->total_bytes +
5067 (orig_bytes * 2);
5068 }
5069
5070 if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
5071 space_info->bytes_may_use += orig_bytes; 5159 space_info->bytes_may_use += orig_bytes;
5072 trace_btrfs_space_reservation(root->fs_info, "space_info", 5160 trace_btrfs_space_reservation(root->fs_info, "space_info",
5073 space_info->flags, orig_bytes, 5161 space_info->flags, orig_bytes,
@@ -5076,16 +5164,27 @@ again:
5076 } 5164 }
5077 5165
5078 /* 5166 /*
5079 * Couldn't make our reservation, save our place so while we're trying 5167 * If we couldn't make a reservation then setup our reservation ticket
5080 * to reclaim space we can actually use it instead of somebody else 5168 * and kick the async worker if it's not already running.
5081 * stealing it from us.
5082 * 5169 *
5083 * We make the other tasks wait for the flush only when we can flush 5170 * If we are a priority flusher then we just need to add our ticket to
5084 * all things. 5171 * the list and we will do our own flushing further down.
5085 */ 5172 */
5086 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { 5173 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
5087 flushing = true; 5174 ticket.bytes = orig_bytes;
5088 space_info->flush = 1; 5175 ticket.error = 0;
5176 init_waitqueue_head(&ticket.wait);
5177 if (flush == BTRFS_RESERVE_FLUSH_ALL) {
5178 list_add_tail(&ticket.list, &space_info->tickets);
5179 if (!space_info->flush) {
5180 space_info->flush = 1;
5181 queue_work(system_unbound_wq,
5182 &root->fs_info->async_reclaim_work);
5183 }
5184 } else {
5185 list_add_tail(&ticket.list,
5186 &space_info->priority_tickets);
5187 }
5089 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { 5188 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
5090 used += orig_bytes; 5189 used += orig_bytes;
5091 /* 5190 /*
@@ -5100,33 +5199,56 @@ again:
5100 &root->fs_info->async_reclaim_work); 5199 &root->fs_info->async_reclaim_work);
5101 } 5200 }
5102 spin_unlock(&space_info->lock); 5201 spin_unlock(&space_info->lock);
5103
5104 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) 5202 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
5105 goto out; 5203 return ret;
5106 5204
5107 ret = flush_space(root, space_info, num_bytes, orig_bytes, 5205 if (flush == BTRFS_RESERVE_FLUSH_ALL)
5108 flush_state); 5206 return wait_reserve_ticket(root->fs_info, space_info, &ticket,
5109 flush_state++; 5207 orig_bytes);
5110 5208
5111 /* 5209 ret = 0;
5112 * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock 5210 priority_reclaim_metadata_space(root->fs_info, space_info, &ticket);
5113 * would happen. So skip delalloc flush. 5211 spin_lock(&space_info->lock);
5114 */ 5212 if (ticket.bytes) {
5115 if (flush == BTRFS_RESERVE_FLUSH_LIMIT && 5213 if (ticket.bytes < orig_bytes) {
5116 (flush_state == FLUSH_DELALLOC || 5214 u64 num_bytes = orig_bytes - ticket.bytes;
5117 flush_state == FLUSH_DELALLOC_WAIT)) 5215 space_info->bytes_may_use -= num_bytes;
5118 flush_state = ALLOC_CHUNK; 5216 trace_btrfs_space_reservation(root->fs_info,
5217 "space_info", space_info->flags,
5218 num_bytes, 0);
5119 5219
5120 if (!ret) 5220 }
5121 goto again; 5221 list_del_init(&ticket.list);
5122 else if (flush == BTRFS_RESERVE_FLUSH_LIMIT && 5222 ret = -ENOSPC;
5123 flush_state < COMMIT_TRANS) 5223 }
5124 goto again; 5224 spin_unlock(&space_info->lock);
5125 else if (flush == BTRFS_RESERVE_FLUSH_ALL && 5225 ASSERT(list_empty(&ticket.list));
5126 flush_state <= COMMIT_TRANS) 5226 return ret;
5127 goto again; 5227}
5128 5228
5129out: 5229/**
5230 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
5231 * @root - the root we're allocating for
5232 * @block_rsv - the block_rsv we're allocating for
5233 * @orig_bytes - the number of bytes we want
5234 * @flush - whether or not we can flush to make our reservation
5235 *
5236 * This will reserve orgi_bytes number of bytes from the space info associated
5237 * with the block_rsv. If there is not enough space it will make an attempt to
5238 * flush out space to make room. It will do this by flushing delalloc if
5239 * possible or committing the transaction. If flush is 0 then no attempts to
5240 * regain reservations will be made and this will fail if there is not enough
5241 * space already.
5242 */
5243static int reserve_metadata_bytes(struct btrfs_root *root,
5244 struct btrfs_block_rsv *block_rsv,
5245 u64 orig_bytes,
5246 enum btrfs_reserve_flush_enum flush)
5247{
5248 int ret;
5249
5250 ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes,
5251 flush);
5130 if (ret == -ENOSPC && 5252 if (ret == -ENOSPC &&
5131 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { 5253 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
5132 struct btrfs_block_rsv *global_rsv = 5254 struct btrfs_block_rsv *global_rsv =
@@ -5139,13 +5261,8 @@ out:
5139 if (ret == -ENOSPC) 5261 if (ret == -ENOSPC)
5140 trace_btrfs_space_reservation(root->fs_info, 5262 trace_btrfs_space_reservation(root->fs_info,
5141 "space_info:enospc", 5263 "space_info:enospc",
5142 space_info->flags, orig_bytes, 1); 5264 block_rsv->space_info->flags,
5143 if (flushing) { 5265 orig_bytes, 1);
5144 spin_lock(&space_info->lock);
5145 space_info->flush = 0;
5146 wake_up_all(&space_info->wait);
5147 spin_unlock(&space_info->lock);
5148 }
5149 return ret; 5266 return ret;
5150} 5267}
5151 5268
@@ -5221,6 +5338,108 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
5221 return 0; 5338 return 0;
5222} 5339}
5223 5340
5341/*
5342 * This is for space we already have accounted in space_info->bytes_may_use, so
5343 * basically when we're returning space from block_rsv's.
5344 */
5345static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
5346 struct btrfs_space_info *space_info,
5347 u64 num_bytes)
5348{
5349 struct reserve_ticket *ticket;
5350 struct list_head *head;
5351 u64 used;
5352 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
5353 bool check_overcommit = false;
5354
5355 spin_lock(&space_info->lock);
5356 head = &space_info->priority_tickets;
5357
5358 /*
5359 * If we are over our limit then we need to check and see if we can
5360 * overcommit, and if we can't then we just need to free up our space
5361 * and not satisfy any requests.
5362 */
5363 used = space_info->bytes_used + space_info->bytes_reserved +
5364 space_info->bytes_pinned + space_info->bytes_readonly +
5365 space_info->bytes_may_use;
5366 if (used - num_bytes >= space_info->total_bytes)
5367 check_overcommit = true;
5368again:
5369 while (!list_empty(head) && num_bytes) {
5370 ticket = list_first_entry(head, struct reserve_ticket,
5371 list);
5372 /*
5373 * We use 0 bytes because this space is already reserved, so
5374 * adding the ticket space would be a double count.
5375 */
5376 if (check_overcommit &&
5377 !can_overcommit(fs_info->extent_root, space_info, 0,
5378 flush))
5379 break;
5380 if (num_bytes >= ticket->bytes) {
5381 list_del_init(&ticket->list);
5382 num_bytes -= ticket->bytes;
5383 ticket->bytes = 0;
5384 wake_up(&ticket->wait);
5385 } else {
5386 ticket->bytes -= num_bytes;
5387 num_bytes = 0;
5388 }
5389 }
5390
5391 if (num_bytes && head == &space_info->priority_tickets) {
5392 head = &space_info->tickets;
5393 flush = BTRFS_RESERVE_FLUSH_ALL;
5394 goto again;
5395 }
5396 space_info->bytes_may_use -= num_bytes;
5397 trace_btrfs_space_reservation(fs_info, "space_info",
5398 space_info->flags, num_bytes, 0);
5399 spin_unlock(&space_info->lock);
5400}
5401
5402/*
5403 * This is for newly allocated space that isn't accounted in
5404 * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent
5405 * we use this helper.
5406 */
5407static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
5408 struct btrfs_space_info *space_info,
5409 u64 num_bytes)
5410{
5411 struct reserve_ticket *ticket;
5412 struct list_head *head = &space_info->priority_tickets;
5413
5414again:
5415 while (!list_empty(head) && num_bytes) {
5416 ticket = list_first_entry(head, struct reserve_ticket,
5417 list);
5418 if (num_bytes >= ticket->bytes) {
5419 trace_btrfs_space_reservation(fs_info, "space_info",
5420 space_info->flags,
5421 ticket->bytes, 1);
5422 list_del_init(&ticket->list);
5423 num_bytes -= ticket->bytes;
5424 space_info->bytes_may_use += ticket->bytes;
5425 ticket->bytes = 0;
5426 wake_up(&ticket->wait);
5427 } else {
5428 trace_btrfs_space_reservation(fs_info, "space_info",
5429 space_info->flags,
5430 num_bytes, 1);
5431 space_info->bytes_may_use += num_bytes;
5432 ticket->bytes -= num_bytes;
5433 num_bytes = 0;
5434 }
5435 }
5436
5437 if (num_bytes && head == &space_info->priority_tickets) {
5438 head = &space_info->tickets;
5439 goto again;
5440 }
5441}
5442
5224static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, 5443static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5225 struct btrfs_block_rsv *block_rsv, 5444 struct btrfs_block_rsv *block_rsv,
5226 struct btrfs_block_rsv *dest, u64 num_bytes) 5445 struct btrfs_block_rsv *dest, u64 num_bytes)
@@ -5255,13 +5474,9 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5255 } 5474 }
5256 spin_unlock(&dest->lock); 5475 spin_unlock(&dest->lock);
5257 } 5476 }
5258 if (num_bytes) { 5477 if (num_bytes)
5259 spin_lock(&space_info->lock); 5478 space_info_add_old_bytes(fs_info, space_info,
5260 space_info->bytes_may_use -= num_bytes; 5479 num_bytes);
5261 trace_btrfs_space_reservation(fs_info, "space_info",
5262 space_info->flags, num_bytes, 0);
5263 spin_unlock(&space_info->lock);
5264 }
5265 } 5480 }
5266} 5481}
5267 5482
@@ -6470,17 +6685,29 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
6470 readonly = true; 6685 readonly = true;
6471 } 6686 }
6472 spin_unlock(&cache->lock); 6687 spin_unlock(&cache->lock);
6473 if (!readonly && global_rsv->space_info == space_info) { 6688 if (!readonly && return_free_space &&
6689 global_rsv->space_info == space_info) {
6690 u64 to_add = len;
6691 WARN_ON(!return_free_space);
6474 spin_lock(&global_rsv->lock); 6692 spin_lock(&global_rsv->lock);
6475 if (!global_rsv->full) { 6693 if (!global_rsv->full) {
6476 len = min(len, global_rsv->size - 6694 to_add = min(len, global_rsv->size -
6477 global_rsv->reserved); 6695 global_rsv->reserved);
6478 global_rsv->reserved += len; 6696 global_rsv->reserved += to_add;
6479 space_info->bytes_may_use += len; 6697 space_info->bytes_may_use += to_add;
6480 if (global_rsv->reserved >= global_rsv->size) 6698 if (global_rsv->reserved >= global_rsv->size)
6481 global_rsv->full = 1; 6699 global_rsv->full = 1;
6700 trace_btrfs_space_reservation(fs_info,
6701 "space_info",
6702 space_info->flags,
6703 to_add, 1);
6704 len -= to_add;
6482 } 6705 }
6483 spin_unlock(&global_rsv->lock); 6706 spin_unlock(&global_rsv->lock);
6707 /* Add to any tickets we may have */
6708 if (len)
6709 space_info_add_new_bytes(fs_info, space_info,
6710 len);
6484 } 6711 }
6485 spin_unlock(&space_info->lock); 6712 spin_unlock(&space_info->lock);
6486 } 6713 }