aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c731
1 files changed, 469 insertions, 262 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b480fd555774..e9376b1657e2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -111,6 +111,16 @@ static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
111 u64 num_bytes); 111 u64 num_bytes);
112int btrfs_pin_extent(struct btrfs_root *root, 112int btrfs_pin_extent(struct btrfs_root *root,
113 u64 bytenr, u64 num_bytes, int reserved); 113 u64 bytenr, u64 num_bytes, int reserved);
114static int __reserve_metadata_bytes(struct btrfs_root *root,
115 struct btrfs_space_info *space_info,
116 u64 orig_bytes,
117 enum btrfs_reserve_flush_enum flush);
118static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
119 struct btrfs_space_info *space_info,
120 u64 num_bytes);
121static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
122 struct btrfs_space_info *space_info,
123 u64 num_bytes);
114 124
115static noinline int 125static noinline int
116block_group_cache_done(struct btrfs_block_group_cache *cache) 126block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -3913,6 +3923,7 @@ static const char *alloc_name(u64 flags)
3913 3923
3914static int update_space_info(struct btrfs_fs_info *info, u64 flags, 3924static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3915 u64 total_bytes, u64 bytes_used, 3925 u64 total_bytes, u64 bytes_used,
3926 u64 bytes_readonly,
3916 struct btrfs_space_info **space_info) 3927 struct btrfs_space_info **space_info)
3917{ 3928{
3918 struct btrfs_space_info *found; 3929 struct btrfs_space_info *found;
@@ -3933,8 +3944,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3933 found->disk_total += total_bytes * factor; 3944 found->disk_total += total_bytes * factor;
3934 found->bytes_used += bytes_used; 3945 found->bytes_used += bytes_used;
3935 found->disk_used += bytes_used * factor; 3946 found->disk_used += bytes_used * factor;
3947 found->bytes_readonly += bytes_readonly;
3936 if (total_bytes > 0) 3948 if (total_bytes > 0)
3937 found->full = 0; 3949 found->full = 0;
3950 space_info_add_new_bytes(info, found, total_bytes -
3951 bytes_used - bytes_readonly);
3938 spin_unlock(&found->lock); 3952 spin_unlock(&found->lock);
3939 *space_info = found; 3953 *space_info = found;
3940 return 0; 3954 return 0;
@@ -3960,7 +3974,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3960 found->disk_used = bytes_used * factor; 3974 found->disk_used = bytes_used * factor;
3961 found->bytes_pinned = 0; 3975 found->bytes_pinned = 0;
3962 found->bytes_reserved = 0; 3976 found->bytes_reserved = 0;
3963 found->bytes_readonly = 0; 3977 found->bytes_readonly = bytes_readonly;
3964 found->bytes_may_use = 0; 3978 found->bytes_may_use = 0;
3965 found->full = 0; 3979 found->full = 0;
3966 found->max_extent_size = 0; 3980 found->max_extent_size = 0;
@@ -3969,6 +3983,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3969 found->flush = 0; 3983 found->flush = 0;
3970 init_waitqueue_head(&found->wait); 3984 init_waitqueue_head(&found->wait);
3971 INIT_LIST_HEAD(&found->ro_bgs); 3985 INIT_LIST_HEAD(&found->ro_bgs);
3986 INIT_LIST_HEAD(&found->tickets);
3987 INIT_LIST_HEAD(&found->priority_tickets);
3972 3988
3973 ret = kobject_init_and_add(&found->kobj, &space_info_ktype, 3989 ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
3974 info->space_info_kobj, "%s", 3990 info->space_info_kobj, "%s",
@@ -4470,7 +4486,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
4470 space_info = __find_space_info(extent_root->fs_info, flags); 4486 space_info = __find_space_info(extent_root->fs_info, flags);
4471 if (!space_info) { 4487 if (!space_info) {
4472 ret = update_space_info(extent_root->fs_info, flags, 4488 ret = update_space_info(extent_root->fs_info, flags,
4473 0, 0, &space_info); 4489 0, 0, 0, &space_info);
4474 BUG_ON(ret); /* -ENOMEM */ 4490 BUG_ON(ret); /* -ENOMEM */
4475 } 4491 }
4476 BUG_ON(!space_info); /* Logic error */ 4492 BUG_ON(!space_info); /* Logic error */
@@ -4582,12 +4598,19 @@ static int can_overcommit(struct btrfs_root *root,
4582 struct btrfs_space_info *space_info, u64 bytes, 4598 struct btrfs_space_info *space_info, u64 bytes,
4583 enum btrfs_reserve_flush_enum flush) 4599 enum btrfs_reserve_flush_enum flush)
4584{ 4600{
4585 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 4601 struct btrfs_block_rsv *global_rsv;
4586 u64 profile = btrfs_get_alloc_profile(root, 0); 4602 u64 profile;
4587 u64 space_size; 4603 u64 space_size;
4588 u64 avail; 4604 u64 avail;
4589 u64 used; 4605 u64 used;
4590 4606
4607 /* Don't overcommit when in mixed mode. */
4608 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
4609 return 0;
4610
4611 BUG_ON(root->fs_info == NULL);
4612 global_rsv = &root->fs_info->global_block_rsv;
4613 profile = btrfs_get_alloc_profile(root, 0);
4591 used = space_info->bytes_used + space_info->bytes_reserved + 4614 used = space_info->bytes_used + space_info->bytes_reserved +
4592 space_info->bytes_pinned + space_info->bytes_readonly; 4615 space_info->bytes_pinned + space_info->bytes_readonly;
4593 4616
@@ -4739,6 +4762,11 @@ skip_async:
4739 spin_unlock(&space_info->lock); 4762 spin_unlock(&space_info->lock);
4740 break; 4763 break;
4741 } 4764 }
4765 if (list_empty(&space_info->tickets) &&
4766 list_empty(&space_info->priority_tickets)) {
4767 spin_unlock(&space_info->lock);
4768 break;
4769 }
4742 spin_unlock(&space_info->lock); 4770 spin_unlock(&space_info->lock);
4743 4771
4744 loops++; 4772 loops++;
@@ -4807,13 +4835,11 @@ commit:
4807 return btrfs_commit_transaction(trans, root); 4835 return btrfs_commit_transaction(trans, root);
4808} 4836}
4809 4837
4810enum flush_state { 4838struct reserve_ticket {
4811 FLUSH_DELAYED_ITEMS_NR = 1, 4839 u64 bytes;
4812 FLUSH_DELAYED_ITEMS = 2, 4840 int error;
4813 FLUSH_DELALLOC = 3, 4841 struct list_head list;
4814 FLUSH_DELALLOC_WAIT = 4, 4842 wait_queue_head_t wait;
4815 ALLOC_CHUNK = 5,
4816 COMMIT_TRANS = 6,
4817}; 4843};
4818 4844
4819static int flush_space(struct btrfs_root *root, 4845static int flush_space(struct btrfs_root *root,
@@ -4866,6 +4892,8 @@ static int flush_space(struct btrfs_root *root,
4866 break; 4892 break;
4867 } 4893 }
4868 4894
4895 trace_btrfs_flush_space(root->fs_info, space_info->flags, num_bytes,
4896 orig_bytes, state, ret);
4869 return ret; 4897 return ret;
4870} 4898}
4871 4899
@@ -4873,17 +4901,22 @@ static inline u64
4873btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, 4901btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
4874 struct btrfs_space_info *space_info) 4902 struct btrfs_space_info *space_info)
4875{ 4903{
4904 struct reserve_ticket *ticket;
4876 u64 used; 4905 u64 used;
4877 u64 expected; 4906 u64 expected;
4878 u64 to_reclaim; 4907 u64 to_reclaim = 0;
4879 4908
4880 to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); 4909 to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
4881 spin_lock(&space_info->lock);
4882 if (can_overcommit(root, space_info, to_reclaim, 4910 if (can_overcommit(root, space_info, to_reclaim,
4883 BTRFS_RESERVE_FLUSH_ALL)) { 4911 BTRFS_RESERVE_FLUSH_ALL))
4884 to_reclaim = 0; 4912 return 0;
4885 goto out; 4913
4886 } 4914 list_for_each_entry(ticket, &space_info->tickets, list)
4915 to_reclaim += ticket->bytes;
4916 list_for_each_entry(ticket, &space_info->priority_tickets, list)
4917 to_reclaim += ticket->bytes;
4918 if (to_reclaim)
4919 return to_reclaim;
4887 4920
4888 used = space_info->bytes_used + space_info->bytes_reserved + 4921 used = space_info->bytes_used + space_info->bytes_reserved +
4889 space_info->bytes_pinned + space_info->bytes_readonly + 4922 space_info->bytes_pinned + space_info->bytes_readonly +
@@ -4899,14 +4932,11 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
4899 to_reclaim = 0; 4932 to_reclaim = 0;
4900 to_reclaim = min(to_reclaim, space_info->bytes_may_use + 4933 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
4901 space_info->bytes_reserved); 4934 space_info->bytes_reserved);
4902out:
4903 spin_unlock(&space_info->lock);
4904
4905 return to_reclaim; 4935 return to_reclaim;
4906} 4936}
4907 4937
4908static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, 4938static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
4909 struct btrfs_fs_info *fs_info, u64 used) 4939 struct btrfs_root *root, u64 used)
4910{ 4940{
4911 u64 thresh = div_factor_fine(space_info->total_bytes, 98); 4941 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
4912 4942
@@ -4914,73 +4944,177 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
4914 if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) 4944 if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
4915 return 0; 4945 return 0;
4916 4946
4917 return (used >= thresh && !btrfs_fs_closing(fs_info) && 4947 if (!btrfs_calc_reclaim_metadata_size(root, space_info))
4918 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); 4948 return 0;
4949
4950 return (used >= thresh && !btrfs_fs_closing(root->fs_info) &&
4951 !test_bit(BTRFS_FS_STATE_REMOUNTING,
4952 &root->fs_info->fs_state));
4919} 4953}
4920 4954
4921static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info, 4955static void wake_all_tickets(struct list_head *head)
4922 struct btrfs_fs_info *fs_info,
4923 int flush_state)
4924{ 4956{
4925 u64 used; 4957 struct reserve_ticket *ticket;
4926
4927 spin_lock(&space_info->lock);
4928 /*
4929 * We run out of space and have not got any free space via flush_space,
4930 * so don't bother doing async reclaim.
4931 */
4932 if (flush_state > COMMIT_TRANS && space_info->full) {
4933 spin_unlock(&space_info->lock);
4934 return 0;
4935 }
4936 4958
4937 used = space_info->bytes_used + space_info->bytes_reserved + 4959 while (!list_empty(head)) {
4938 space_info->bytes_pinned + space_info->bytes_readonly + 4960 ticket = list_first_entry(head, struct reserve_ticket, list);
4939 space_info->bytes_may_use; 4961 list_del_init(&ticket->list);
4940 if (need_do_async_reclaim(space_info, fs_info, used)) { 4962 ticket->error = -ENOSPC;
4941 spin_unlock(&space_info->lock); 4963 wake_up(&ticket->wait);
4942 return 1;
4943 } 4964 }
4944 spin_unlock(&space_info->lock);
4945
4946 return 0;
4947} 4965}
4948 4966
4967/*
4968 * This is for normal flushers, we can wait all goddamned day if we want to. We
4969 * will loop and continuously try to flush as long as we are making progress.
4970 * We count progress as clearing off tickets each time we have to loop.
4971 */
4949static void btrfs_async_reclaim_metadata_space(struct work_struct *work) 4972static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
4950{ 4973{
4974 struct reserve_ticket *last_ticket = NULL;
4951 struct btrfs_fs_info *fs_info; 4975 struct btrfs_fs_info *fs_info;
4952 struct btrfs_space_info *space_info; 4976 struct btrfs_space_info *space_info;
4953 u64 to_reclaim; 4977 u64 to_reclaim;
4954 int flush_state; 4978 int flush_state;
4979 int commit_cycles = 0;
4955 4980
4956 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); 4981 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
4957 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 4982 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4958 4983
4984 spin_lock(&space_info->lock);
4959 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, 4985 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
4960 space_info); 4986 space_info);
4961 if (!to_reclaim) 4987 if (!to_reclaim) {
4988 space_info->flush = 0;
4989 spin_unlock(&space_info->lock);
4962 return; 4990 return;
4991 }
4992 last_ticket = list_first_entry(&space_info->tickets,
4993 struct reserve_ticket, list);
4994 spin_unlock(&space_info->lock);
4963 4995
4964 flush_state = FLUSH_DELAYED_ITEMS_NR; 4996 flush_state = FLUSH_DELAYED_ITEMS_NR;
4965 do { 4997 do {
4998 struct reserve_ticket *ticket;
4999 int ret;
5000
5001 ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
5002 to_reclaim, flush_state);
5003 spin_lock(&space_info->lock);
5004 if (list_empty(&space_info->tickets)) {
5005 space_info->flush = 0;
5006 spin_unlock(&space_info->lock);
5007 return;
5008 }
5009 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
5010 space_info);
5011 ticket = list_first_entry(&space_info->tickets,
5012 struct reserve_ticket, list);
5013 if (last_ticket == ticket) {
5014 flush_state++;
5015 } else {
5016 last_ticket = ticket;
5017 flush_state = FLUSH_DELAYED_ITEMS_NR;
5018 if (commit_cycles)
5019 commit_cycles--;
5020 }
5021
5022 if (flush_state > COMMIT_TRANS) {
5023 commit_cycles++;
5024 if (commit_cycles > 2) {
5025 wake_all_tickets(&space_info->tickets);
5026 space_info->flush = 0;
5027 } else {
5028 flush_state = FLUSH_DELAYED_ITEMS_NR;
5029 }
5030 }
5031 spin_unlock(&space_info->lock);
5032 } while (flush_state <= COMMIT_TRANS);
5033}
5034
5035void btrfs_init_async_reclaim_work(struct work_struct *work)
5036{
5037 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
5038}
5039
5040static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
5041 struct btrfs_space_info *space_info,
5042 struct reserve_ticket *ticket)
5043{
5044 u64 to_reclaim;
5045 int flush_state = FLUSH_DELAYED_ITEMS_NR;
5046
5047 spin_lock(&space_info->lock);
5048 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
5049 space_info);
5050 if (!to_reclaim) {
5051 spin_unlock(&space_info->lock);
5052 return;
5053 }
5054 spin_unlock(&space_info->lock);
5055
5056 do {
4966 flush_space(fs_info->fs_root, space_info, to_reclaim, 5057 flush_space(fs_info->fs_root, space_info, to_reclaim,
4967 to_reclaim, flush_state); 5058 to_reclaim, flush_state);
4968 flush_state++; 5059 flush_state++;
4969 if (!btrfs_need_do_async_reclaim(space_info, fs_info, 5060 spin_lock(&space_info->lock);
4970 flush_state)) 5061 if (ticket->bytes == 0) {
5062 spin_unlock(&space_info->lock);
4971 return; 5063 return;
5064 }
5065 spin_unlock(&space_info->lock);
5066
5067 /*
5068 * Priority flushers can't wait on delalloc without
5069 * deadlocking.
5070 */
5071 if (flush_state == FLUSH_DELALLOC ||
5072 flush_state == FLUSH_DELALLOC_WAIT)
5073 flush_state = ALLOC_CHUNK;
4972 } while (flush_state < COMMIT_TRANS); 5074 } while (flush_state < COMMIT_TRANS);
4973} 5075}
4974 5076
4975void btrfs_init_async_reclaim_work(struct work_struct *work) 5077static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
5078 struct btrfs_space_info *space_info,
5079 struct reserve_ticket *ticket, u64 orig_bytes)
5080
4976{ 5081{
4977 INIT_WORK(work, btrfs_async_reclaim_metadata_space); 5082 DEFINE_WAIT(wait);
5083 int ret = 0;
5084
5085 spin_lock(&space_info->lock);
5086 while (ticket->bytes > 0 && ticket->error == 0) {
5087 ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
5088 if (ret) {
5089 ret = -EINTR;
5090 break;
5091 }
5092 spin_unlock(&space_info->lock);
5093
5094 schedule();
5095
5096 finish_wait(&ticket->wait, &wait);
5097 spin_lock(&space_info->lock);
5098 }
5099 if (!ret)
5100 ret = ticket->error;
5101 if (!list_empty(&ticket->list))
5102 list_del_init(&ticket->list);
5103 if (ticket->bytes && ticket->bytes < orig_bytes) {
5104 u64 num_bytes = orig_bytes - ticket->bytes;
5105 space_info->bytes_may_use -= num_bytes;
5106 trace_btrfs_space_reservation(fs_info, "space_info",
5107 space_info->flags, num_bytes, 0);
5108 }
5109 spin_unlock(&space_info->lock);
5110
5111 return ret;
4978} 5112}
4979 5113
4980/** 5114/**
4981 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space 5115 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
4982 * @root - the root we're allocating for 5116 * @root - the root we're allocating for
4983 * @block_rsv - the block_rsv we're allocating for 5117 * @space_info - the space info we want to allocate from
4984 * @orig_bytes - the number of bytes we want 5118 * @orig_bytes - the number of bytes we want
4985 * @flush - whether or not we can flush to make our reservation 5119 * @flush - whether or not we can flush to make our reservation
4986 * 5120 *
@@ -4991,81 +5125,36 @@ void btrfs_init_async_reclaim_work(struct work_struct *work)
4991 * regain reservations will be made and this will fail if there is not enough 5125 * regain reservations will be made and this will fail if there is not enough
4992 * space already. 5126 * space already.
4993 */ 5127 */
4994static int reserve_metadata_bytes(struct btrfs_root *root, 5128static int __reserve_metadata_bytes(struct btrfs_root *root,
4995 struct btrfs_block_rsv *block_rsv, 5129 struct btrfs_space_info *space_info,
4996 u64 orig_bytes, 5130 u64 orig_bytes,
4997 enum btrfs_reserve_flush_enum flush) 5131 enum btrfs_reserve_flush_enum flush)
4998{ 5132{
4999 struct btrfs_space_info *space_info = block_rsv->space_info; 5133 struct reserve_ticket ticket;
5000 u64 used; 5134 u64 used;
5001 u64 num_bytes = orig_bytes;
5002 int flush_state = FLUSH_DELAYED_ITEMS_NR;
5003 int ret = 0; 5135 int ret = 0;
5004 bool flushing = false;
5005 5136
5006again: 5137 ASSERT(orig_bytes);
5007 ret = 0; 5138 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
5008 spin_lock(&space_info->lock);
5009 /*
5010 * We only want to wait if somebody other than us is flushing and we
5011 * are actually allowed to flush all things.
5012 */
5013 while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
5014 space_info->flush) {
5015 spin_unlock(&space_info->lock);
5016 /*
5017 * If we have a trans handle we can't wait because the flusher
5018 * may have to commit the transaction, which would mean we would
5019 * deadlock since we are waiting for the flusher to finish, but
5020 * hold the current transaction open.
5021 */
5022 if (current->journal_info)
5023 return -EAGAIN;
5024 ret = wait_event_killable(space_info->wait, !space_info->flush);
5025 /* Must have been killed, return */
5026 if (ret)
5027 return -EINTR;
5028
5029 spin_lock(&space_info->lock);
5030 }
5031 5139
5140 spin_lock(&space_info->lock);
5032 ret = -ENOSPC; 5141 ret = -ENOSPC;
5033 used = space_info->bytes_used + space_info->bytes_reserved + 5142 used = space_info->bytes_used + space_info->bytes_reserved +
5034 space_info->bytes_pinned + space_info->bytes_readonly + 5143 space_info->bytes_pinned + space_info->bytes_readonly +
5035 space_info->bytes_may_use; 5144 space_info->bytes_may_use;
5036 5145
5037 /* 5146 /*
5038 * The idea here is that we've not already over-reserved the block group 5147 * If we have enough space then hooray, make our reservation and carry
5039 * then we can go ahead and save our reservation first and then start 5148 * on. If not see if we can overcommit, and if we can, hooray carry on.
5040 * flushing if we need to. Otherwise if we've already overcommitted 5149 * If not things get more complicated.
5041 * lets start flushing stuff first and then come back and try to make
5042 * our reservation.
5043 */ 5150 */
5044 if (used <= space_info->total_bytes) { 5151 if (used + orig_bytes <= space_info->total_bytes) {
5045 if (used + orig_bytes <= space_info->total_bytes) { 5152 space_info->bytes_may_use += orig_bytes;
5046 space_info->bytes_may_use += orig_bytes; 5153 trace_btrfs_space_reservation(root->fs_info, "space_info",
5047 trace_btrfs_space_reservation(root->fs_info, 5154 space_info->flags, orig_bytes,
5048 "space_info", space_info->flags, orig_bytes, 1); 5155 1);
5049 ret = 0; 5156 ret = 0;
5050 } else { 5157 } else if (can_overcommit(root, space_info, orig_bytes, flush)) {
5051 /*
5052 * Ok set num_bytes to orig_bytes since we aren't
5053 * overocmmitted, this way we only try and reclaim what
5054 * we need.
5055 */
5056 num_bytes = orig_bytes;
5057 }
5058 } else {
5059 /*
5060 * Ok we're over committed, set num_bytes to the overcommitted
5061 * amount plus the amount of bytes that we need for this
5062 * reservation.
5063 */
5064 num_bytes = used - space_info->total_bytes +
5065 (orig_bytes * 2);
5066 }
5067
5068 if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
5069 space_info->bytes_may_use += orig_bytes; 5158 space_info->bytes_may_use += orig_bytes;
5070 trace_btrfs_space_reservation(root->fs_info, "space_info", 5159 trace_btrfs_space_reservation(root->fs_info, "space_info",
5071 space_info->flags, orig_bytes, 5160 space_info->flags, orig_bytes,
@@ -5074,16 +5163,31 @@ again:
5074 } 5163 }
5075 5164
5076 /* 5165 /*
5077 * Couldn't make our reservation, save our place so while we're trying 5166 * If we couldn't make a reservation then setup our reservation ticket
5078 * to reclaim space we can actually use it instead of somebody else 5167 * and kick the async worker if it's not already running.
5079 * stealing it from us.
5080 * 5168 *
5081 * We make the other tasks wait for the flush only when we can flush 5169 * If we are a priority flusher then we just need to add our ticket to
5082 * all things. 5170 * the list and we will do our own flushing further down.
5083 */ 5171 */
5084 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { 5172 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
5085 flushing = true; 5173 ticket.bytes = orig_bytes;
5086 space_info->flush = 1; 5174 ticket.error = 0;
5175 init_waitqueue_head(&ticket.wait);
5176 if (flush == BTRFS_RESERVE_FLUSH_ALL) {
5177 list_add_tail(&ticket.list, &space_info->tickets);
5178 if (!space_info->flush) {
5179 space_info->flush = 1;
5180 trace_btrfs_trigger_flush(root->fs_info,
5181 space_info->flags,
5182 orig_bytes, flush,
5183 "enospc");
5184 queue_work(system_unbound_wq,
5185 &root->fs_info->async_reclaim_work);
5186 }
5187 } else {
5188 list_add_tail(&ticket.list,
5189 &space_info->priority_tickets);
5190 }
5087 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { 5191 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
5088 used += orig_bytes; 5192 used += orig_bytes;
5089 /* 5193 /*
@@ -5092,39 +5196,67 @@ again:
5092 * the async reclaim as we will panic. 5196 * the async reclaim as we will panic.
5093 */ 5197 */
5094 if (!root->fs_info->log_root_recovering && 5198 if (!root->fs_info->log_root_recovering &&
5095 need_do_async_reclaim(space_info, root->fs_info, used) && 5199 need_do_async_reclaim(space_info, root, used) &&
5096 !work_busy(&root->fs_info->async_reclaim_work)) 5200 !work_busy(&root->fs_info->async_reclaim_work)) {
5201 trace_btrfs_trigger_flush(root->fs_info,
5202 space_info->flags,
5203 orig_bytes, flush,
5204 "preempt");
5097 queue_work(system_unbound_wq, 5205 queue_work(system_unbound_wq,
5098 &root->fs_info->async_reclaim_work); 5206 &root->fs_info->async_reclaim_work);
5207 }
5099 } 5208 }
5100 spin_unlock(&space_info->lock); 5209 spin_unlock(&space_info->lock);
5101
5102 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) 5210 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
5103 goto out; 5211 return ret;
5104 5212
5105 ret = flush_space(root, space_info, num_bytes, orig_bytes, 5213 if (flush == BTRFS_RESERVE_FLUSH_ALL)
5106 flush_state); 5214 return wait_reserve_ticket(root->fs_info, space_info, &ticket,
5107 flush_state++; 5215 orig_bytes);
5108 5216
5109 /* 5217 ret = 0;
5110 * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock 5218 priority_reclaim_metadata_space(root->fs_info, space_info, &ticket);
5111 * would happen. So skip delalloc flush. 5219 spin_lock(&space_info->lock);
5112 */ 5220 if (ticket.bytes) {
5113 if (flush == BTRFS_RESERVE_FLUSH_LIMIT && 5221 if (ticket.bytes < orig_bytes) {
5114 (flush_state == FLUSH_DELALLOC || 5222 u64 num_bytes = orig_bytes - ticket.bytes;
5115 flush_state == FLUSH_DELALLOC_WAIT)) 5223 space_info->bytes_may_use -= num_bytes;
5116 flush_state = ALLOC_CHUNK; 5224 trace_btrfs_space_reservation(root->fs_info,
5225 "space_info", space_info->flags,
5226 num_bytes, 0);
5117 5227
5118 if (!ret) 5228 }
5119 goto again; 5229 list_del_init(&ticket.list);
5120 else if (flush == BTRFS_RESERVE_FLUSH_LIMIT && 5230 ret = -ENOSPC;
5121 flush_state < COMMIT_TRANS) 5231 }
5122 goto again; 5232 spin_unlock(&space_info->lock);
5123 else if (flush == BTRFS_RESERVE_FLUSH_ALL && 5233 ASSERT(list_empty(&ticket.list));
5124 flush_state <= COMMIT_TRANS) 5234 return ret;
5125 goto again; 5235}
5126 5236
5127out: 5237/**
5238 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
5239 * @root - the root we're allocating for
5240 * @block_rsv - the block_rsv we're allocating for
5241 * @orig_bytes - the number of bytes we want
5242 * @flush - whether or not we can flush to make our reservation
5243 *
5244 * This will reserve orgi_bytes number of bytes from the space info associated
5245 * with the block_rsv. If there is not enough space it will make an attempt to
5246 * flush out space to make room. It will do this by flushing delalloc if
5247 * possible or committing the transaction. If flush is 0 then no attempts to
5248 * regain reservations will be made and this will fail if there is not enough
5249 * space already.
5250 */
5251static int reserve_metadata_bytes(struct btrfs_root *root,
5252 struct btrfs_block_rsv *block_rsv,
5253 u64 orig_bytes,
5254 enum btrfs_reserve_flush_enum flush)
5255{
5256 int ret;
5257
5258 ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes,
5259 flush);
5128 if (ret == -ENOSPC && 5260 if (ret == -ENOSPC &&
5129 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { 5261 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
5130 struct btrfs_block_rsv *global_rsv = 5262 struct btrfs_block_rsv *global_rsv =
@@ -5137,13 +5269,8 @@ out:
5137 if (ret == -ENOSPC) 5269 if (ret == -ENOSPC)
5138 trace_btrfs_space_reservation(root->fs_info, 5270 trace_btrfs_space_reservation(root->fs_info,
5139 "space_info:enospc", 5271 "space_info:enospc",
5140 space_info->flags, orig_bytes, 1); 5272 block_rsv->space_info->flags,
5141 if (flushing) { 5273 orig_bytes, 1);
5142 spin_lock(&space_info->lock);
5143 space_info->flush = 0;
5144 wake_up_all(&space_info->wait);
5145 spin_unlock(&space_info->lock);
5146 }
5147 return ret; 5274 return ret;
5148} 5275}
5149 5276
@@ -5219,6 +5346,108 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
5219 return 0; 5346 return 0;
5220} 5347}
5221 5348
5349/*
5350 * This is for space we already have accounted in space_info->bytes_may_use, so
5351 * basically when we're returning space from block_rsv's.
5352 */
5353static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
5354 struct btrfs_space_info *space_info,
5355 u64 num_bytes)
5356{
5357 struct reserve_ticket *ticket;
5358 struct list_head *head;
5359 u64 used;
5360 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
5361 bool check_overcommit = false;
5362
5363 spin_lock(&space_info->lock);
5364 head = &space_info->priority_tickets;
5365
5366 /*
5367 * If we are over our limit then we need to check and see if we can
5368 * overcommit, and if we can't then we just need to free up our space
5369 * and not satisfy any requests.
5370 */
5371 used = space_info->bytes_used + space_info->bytes_reserved +
5372 space_info->bytes_pinned + space_info->bytes_readonly +
5373 space_info->bytes_may_use;
5374 if (used - num_bytes >= space_info->total_bytes)
5375 check_overcommit = true;
5376again:
5377 while (!list_empty(head) && num_bytes) {
5378 ticket = list_first_entry(head, struct reserve_ticket,
5379 list);
5380 /*
5381 * We use 0 bytes because this space is already reserved, so
5382 * adding the ticket space would be a double count.
5383 */
5384 if (check_overcommit &&
5385 !can_overcommit(fs_info->extent_root, space_info, 0,
5386 flush))
5387 break;
5388 if (num_bytes >= ticket->bytes) {
5389 list_del_init(&ticket->list);
5390 num_bytes -= ticket->bytes;
5391 ticket->bytes = 0;
5392 wake_up(&ticket->wait);
5393 } else {
5394 ticket->bytes -= num_bytes;
5395 num_bytes = 0;
5396 }
5397 }
5398
5399 if (num_bytes && head == &space_info->priority_tickets) {
5400 head = &space_info->tickets;
5401 flush = BTRFS_RESERVE_FLUSH_ALL;
5402 goto again;
5403 }
5404 space_info->bytes_may_use -= num_bytes;
5405 trace_btrfs_space_reservation(fs_info, "space_info",
5406 space_info->flags, num_bytes, 0);
5407 spin_unlock(&space_info->lock);
5408}
5409
5410/*
5411 * This is for newly allocated space that isn't accounted in
5412 * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent
5413 * we use this helper.
5414 */
5415static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
5416 struct btrfs_space_info *space_info,
5417 u64 num_bytes)
5418{
5419 struct reserve_ticket *ticket;
5420 struct list_head *head = &space_info->priority_tickets;
5421
5422again:
5423 while (!list_empty(head) && num_bytes) {
5424 ticket = list_first_entry(head, struct reserve_ticket,
5425 list);
5426 if (num_bytes >= ticket->bytes) {
5427 trace_btrfs_space_reservation(fs_info, "space_info",
5428 space_info->flags,
5429 ticket->bytes, 1);
5430 list_del_init(&ticket->list);
5431 num_bytes -= ticket->bytes;
5432 space_info->bytes_may_use += ticket->bytes;
5433 ticket->bytes = 0;
5434 wake_up(&ticket->wait);
5435 } else {
5436 trace_btrfs_space_reservation(fs_info, "space_info",
5437 space_info->flags,
5438 num_bytes, 1);
5439 space_info->bytes_may_use += num_bytes;
5440 ticket->bytes -= num_bytes;
5441 num_bytes = 0;
5442 }
5443 }
5444
5445 if (num_bytes && head == &space_info->priority_tickets) {
5446 head = &space_info->tickets;
5447 goto again;
5448 }
5449}
5450
5222static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, 5451static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5223 struct btrfs_block_rsv *block_rsv, 5452 struct btrfs_block_rsv *block_rsv,
5224 struct btrfs_block_rsv *dest, u64 num_bytes) 5453 struct btrfs_block_rsv *dest, u64 num_bytes)
@@ -5253,18 +5482,15 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5253 } 5482 }
5254 spin_unlock(&dest->lock); 5483 spin_unlock(&dest->lock);
5255 } 5484 }
5256 if (num_bytes) { 5485 if (num_bytes)
5257 spin_lock(&space_info->lock); 5486 space_info_add_old_bytes(fs_info, space_info,
5258 space_info->bytes_may_use -= num_bytes; 5487 num_bytes);
5259 trace_btrfs_space_reservation(fs_info, "space_info",
5260 space_info->flags, num_bytes, 0);
5261 spin_unlock(&space_info->lock);
5262 }
5263 } 5488 }
5264} 5489}
5265 5490
5266static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, 5491int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
5267 struct btrfs_block_rsv *dst, u64 num_bytes) 5492 struct btrfs_block_rsv *dst, u64 num_bytes,
5493 int update_size)
5268{ 5494{
5269 int ret; 5495 int ret;
5270 5496
@@ -5272,7 +5498,7 @@ static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
5272 if (ret) 5498 if (ret)
5273 return ret; 5499 return ret;
5274 5500
5275 block_rsv_add_bytes(dst, num_bytes, 1); 5501 block_rsv_add_bytes(dst, num_bytes, update_size);
5276 return 0; 5502 return 0;
5277} 5503}
5278 5504
@@ -5379,13 +5605,6 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
5379 return ret; 5605 return ret;
5380} 5606}
5381 5607
5382int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
5383 struct btrfs_block_rsv *dst_rsv,
5384 u64 num_bytes)
5385{
5386 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
5387}
5388
5389void btrfs_block_rsv_release(struct btrfs_root *root, 5608void btrfs_block_rsv_release(struct btrfs_root *root,
5390 struct btrfs_block_rsv *block_rsv, 5609 struct btrfs_block_rsv *block_rsv,
5391 u64 num_bytes) 5610 u64 num_bytes)
@@ -5398,48 +5617,21 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
5398 num_bytes); 5617 num_bytes);
5399} 5618}
5400 5619
5401/*
5402 * helper to calculate size of global block reservation.
5403 * the desired value is sum of space used by extent tree,
5404 * checksum tree and root tree
5405 */
5406static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
5407{
5408 struct btrfs_space_info *sinfo;
5409 u64 num_bytes;
5410 u64 meta_used;
5411 u64 data_used;
5412 int csum_size = btrfs_super_csum_size(fs_info->super_copy);
5413
5414 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
5415 spin_lock(&sinfo->lock);
5416 data_used = sinfo->bytes_used;
5417 spin_unlock(&sinfo->lock);
5418
5419 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5420 spin_lock(&sinfo->lock);
5421 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
5422 data_used = 0;
5423 meta_used = sinfo->bytes_used;
5424 spin_unlock(&sinfo->lock);
5425
5426 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
5427 csum_size * 2;
5428 num_bytes += div_u64(data_used + meta_used, 50);
5429
5430 if (num_bytes * 3 > meta_used)
5431 num_bytes = div_u64(meta_used, 3);
5432
5433 return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
5434}
5435
5436static void update_global_block_rsv(struct btrfs_fs_info *fs_info) 5620static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
5437{ 5621{
5438 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; 5622 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
5439 struct btrfs_space_info *sinfo = block_rsv->space_info; 5623 struct btrfs_space_info *sinfo = block_rsv->space_info;
5440 u64 num_bytes; 5624 u64 num_bytes;
5441 5625
5442 num_bytes = calc_global_metadata_size(fs_info); 5626 /*
5627 * The global block rsv is based on the size of the extent tree, the
5628 * checksum tree and the root tree. If the fs is empty we want to set
5629 * it to a minimal amount for safety.
5630 */
5631 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
5632 btrfs_root_used(&fs_info->csum_root->root_item) +
5633 btrfs_root_used(&fs_info->tree_root->root_item);
5634 num_bytes = max_t(u64, num_bytes, SZ_16M);
5443 5635
5444 spin_lock(&sinfo->lock); 5636 spin_lock(&sinfo->lock);
5445 spin_lock(&block_rsv->lock); 5637 spin_lock(&block_rsv->lock);
@@ -5554,7 +5746,13 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
5554 struct inode *inode) 5746 struct inode *inode)
5555{ 5747{
5556 struct btrfs_root *root = BTRFS_I(inode)->root; 5748 struct btrfs_root *root = BTRFS_I(inode)->root;
5557 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); 5749 /*
5750 * We always use trans->block_rsv here as we will have reserved space
5751 * for our orphan when starting the transaction, using get_block_rsv()
5752 * here will sometimes make us choose the wrong block rsv as we could be
5753 * doing a reloc inode for a non refcounted root.
5754 */
5755 struct btrfs_block_rsv *src_rsv = trans->block_rsv;
5558 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; 5756 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
5559 5757
5560 /* 5758 /*
@@ -5565,7 +5763,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
5565 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 5763 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
5566 trace_btrfs_space_reservation(root->fs_info, "orphan", 5764 trace_btrfs_space_reservation(root->fs_info, "orphan",
5567 btrfs_ino(inode), num_bytes, 1); 5765 btrfs_ino(inode), num_bytes, 1);
5568 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 5766 return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
5569} 5767}
5570 5768
5571void btrfs_orphan_release_metadata(struct inode *inode) 5769void btrfs_orphan_release_metadata(struct inode *inode)
@@ -5620,7 +5818,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
5620 BTRFS_RESERVE_FLUSH_ALL); 5818 BTRFS_RESERVE_FLUSH_ALL);
5621 5819
5622 if (ret == -ENOSPC && use_global_rsv) 5820 if (ret == -ENOSPC && use_global_rsv)
5623 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes); 5821 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
5624 5822
5625 if (ret && *qgroup_reserved) 5823 if (ret && *qgroup_reserved)
5626 btrfs_qgroup_free_meta(root, *qgroup_reserved); 5824 btrfs_qgroup_free_meta(root, *qgroup_reserved);
@@ -5730,21 +5928,26 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5730 u64 to_reserve = 0; 5928 u64 to_reserve = 0;
5731 u64 csum_bytes; 5929 u64 csum_bytes;
5732 unsigned nr_extents = 0; 5930 unsigned nr_extents = 0;
5733 int extra_reserve = 0;
5734 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; 5931 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
5735 int ret = 0; 5932 int ret = 0;
5736 bool delalloc_lock = true; 5933 bool delalloc_lock = true;
5737 u64 to_free = 0; 5934 u64 to_free = 0;
5738 unsigned dropped; 5935 unsigned dropped;
5936 bool release_extra = false;
5739 5937
5740 /* If we are a free space inode we need to not flush since we will be in 5938 /* If we are a free space inode we need to not flush since we will be in
5741 * the middle of a transaction commit. We also don't need the delalloc 5939 * the middle of a transaction commit. We also don't need the delalloc
5742 * mutex since we won't race with anybody. We need this mostly to make 5940 * mutex since we won't race with anybody. We need this mostly to make
5743 * lockdep shut its filthy mouth. 5941 * lockdep shut its filthy mouth.
5942 *
5943 * If we have a transaction open (can happen if we call truncate_block
5944 * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
5744 */ 5945 */
5745 if (btrfs_is_free_space_inode(inode)) { 5946 if (btrfs_is_free_space_inode(inode)) {
5746 flush = BTRFS_RESERVE_NO_FLUSH; 5947 flush = BTRFS_RESERVE_NO_FLUSH;
5747 delalloc_lock = false; 5948 delalloc_lock = false;
5949 } else if (current->journal_info) {
5950 flush = BTRFS_RESERVE_FLUSH_LIMIT;
5748 } 5951 }
5749 5952
5750 if (flush != BTRFS_RESERVE_NO_FLUSH && 5953 if (flush != BTRFS_RESERVE_NO_FLUSH &&
@@ -5761,24 +5964,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5761 BTRFS_MAX_EXTENT_SIZE - 1, 5964 BTRFS_MAX_EXTENT_SIZE - 1,
5762 BTRFS_MAX_EXTENT_SIZE); 5965 BTRFS_MAX_EXTENT_SIZE);
5763 BTRFS_I(inode)->outstanding_extents += nr_extents; 5966 BTRFS_I(inode)->outstanding_extents += nr_extents;
5764 nr_extents = 0;
5765 5967
5968 nr_extents = 0;
5766 if (BTRFS_I(inode)->outstanding_extents > 5969 if (BTRFS_I(inode)->outstanding_extents >
5767 BTRFS_I(inode)->reserved_extents) 5970 BTRFS_I(inode)->reserved_extents)
5768 nr_extents = BTRFS_I(inode)->outstanding_extents - 5971 nr_extents += BTRFS_I(inode)->outstanding_extents -
5769 BTRFS_I(inode)->reserved_extents; 5972 BTRFS_I(inode)->reserved_extents;
5770 5973
5771 /* 5974 /* We always want to reserve a slot for updating the inode. */
5772 * Add an item to reserve for updating the inode when we complete the 5975 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents + 1);
5773 * delalloc io.
5774 */
5775 if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5776 &BTRFS_I(inode)->runtime_flags)) {
5777 nr_extents++;
5778 extra_reserve = 1;
5779 }
5780
5781 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
5782 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); 5976 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
5783 csum_bytes = BTRFS_I(inode)->csum_bytes; 5977 csum_bytes = BTRFS_I(inode)->csum_bytes;
5784 spin_unlock(&BTRFS_I(inode)->lock); 5978 spin_unlock(&BTRFS_I(inode)->lock);
@@ -5790,17 +5984,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5790 goto out_fail; 5984 goto out_fail;
5791 } 5985 }
5792 5986
5793 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 5987 ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush);
5794 if (unlikely(ret)) { 5988 if (unlikely(ret)) {
5795 btrfs_qgroup_free_meta(root, nr_extents * root->nodesize); 5989 btrfs_qgroup_free_meta(root, nr_extents * root->nodesize);
5796 goto out_fail; 5990 goto out_fail;
5797 } 5991 }
5798 5992
5799 spin_lock(&BTRFS_I(inode)->lock); 5993 spin_lock(&BTRFS_I(inode)->lock);
5800 if (extra_reserve) { 5994 if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5801 set_bit(BTRFS_INODE_DELALLOC_META_RESERVED, 5995 &BTRFS_I(inode)->runtime_flags)) {
5802 &BTRFS_I(inode)->runtime_flags); 5996 to_reserve -= btrfs_calc_trans_metadata_size(root, 1);
5803 nr_extents--; 5997 release_extra = true;
5804 } 5998 }
5805 BTRFS_I(inode)->reserved_extents += nr_extents; 5999 BTRFS_I(inode)->reserved_extents += nr_extents;
5806 spin_unlock(&BTRFS_I(inode)->lock); 6000 spin_unlock(&BTRFS_I(inode)->lock);
@@ -5811,8 +6005,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5811 if (to_reserve) 6005 if (to_reserve)
5812 trace_btrfs_space_reservation(root->fs_info, "delalloc", 6006 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5813 btrfs_ino(inode), to_reserve, 1); 6007 btrfs_ino(inode), to_reserve, 1);
5814 block_rsv_add_bytes(block_rsv, to_reserve, 1); 6008 if (release_extra)
5815 6009 btrfs_block_rsv_release(root, block_rsv,
6010 btrfs_calc_trans_metadata_size(root,
6011 1));
5816 return 0; 6012 return 0;
5817 6013
5818out_fail: 6014out_fail:
@@ -6044,6 +6240,9 @@ static int update_block_group(struct btrfs_trans_handle *trans,
6044 spin_unlock(&cache->lock); 6240 spin_unlock(&cache->lock);
6045 spin_unlock(&cache->space_info->lock); 6241 spin_unlock(&cache->space_info->lock);
6046 6242
6243 trace_btrfs_space_reservation(root->fs_info, "pinned",
6244 cache->space_info->flags,
6245 num_bytes, 1);
6047 set_extent_dirty(info->pinned_extents, 6246 set_extent_dirty(info->pinned_extents,
6048 bytenr, bytenr + num_bytes - 1, 6247 bytenr, bytenr + num_bytes - 1,
6049 GFP_NOFS | __GFP_NOFAIL); 6248 GFP_NOFS | __GFP_NOFAIL);
@@ -6118,10 +6317,10 @@ static int pin_down_extent(struct btrfs_root *root,
6118 spin_unlock(&cache->lock); 6317 spin_unlock(&cache->lock);
6119 spin_unlock(&cache->space_info->lock); 6318 spin_unlock(&cache->space_info->lock);
6120 6319
6320 trace_btrfs_space_reservation(root->fs_info, "pinned",
6321 cache->space_info->flags, num_bytes, 1);
6121 set_extent_dirty(root->fs_info->pinned_extents, bytenr, 6322 set_extent_dirty(root->fs_info->pinned_extents, bytenr,
6122 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); 6323 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
6123 if (reserved)
6124 trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
6125 return 0; 6324 return 0;
6126} 6325}
6127 6326
@@ -6476,6 +6675,9 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
6476 spin_lock(&cache->lock); 6675 spin_lock(&cache->lock);
6477 cache->pinned -= len; 6676 cache->pinned -= len;
6478 space_info->bytes_pinned -= len; 6677 space_info->bytes_pinned -= len;
6678
6679 trace_btrfs_space_reservation(fs_info, "pinned",
6680 space_info->flags, len, 0);
6479 space_info->max_extent_size = 0; 6681 space_info->max_extent_size = 0;
6480 percpu_counter_add(&space_info->total_bytes_pinned, -len); 6682 percpu_counter_add(&space_info->total_bytes_pinned, -len);
6481 if (cache->ro) { 6683 if (cache->ro) {
@@ -6483,17 +6685,29 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
6483 readonly = true; 6685 readonly = true;
6484 } 6686 }
6485 spin_unlock(&cache->lock); 6687 spin_unlock(&cache->lock);
6486 if (!readonly && global_rsv->space_info == space_info) { 6688 if (!readonly && return_free_space &&
6689 global_rsv->space_info == space_info) {
6690 u64 to_add = len;
6691 WARN_ON(!return_free_space);
6487 spin_lock(&global_rsv->lock); 6692 spin_lock(&global_rsv->lock);
6488 if (!global_rsv->full) { 6693 if (!global_rsv->full) {
6489 len = min(len, global_rsv->size - 6694 to_add = min(len, global_rsv->size -
6490 global_rsv->reserved); 6695 global_rsv->reserved);
6491 global_rsv->reserved += len; 6696 global_rsv->reserved += to_add;
6492 space_info->bytes_may_use += len; 6697 space_info->bytes_may_use += to_add;
6493 if (global_rsv->reserved >= global_rsv->size) 6698 if (global_rsv->reserved >= global_rsv->size)
6494 global_rsv->full = 1; 6699 global_rsv->full = 1;
6700 trace_btrfs_space_reservation(fs_info,
6701 "space_info",
6702 space_info->flags,
6703 to_add, 1);
6704 len -= to_add;
6495 } 6705 }
6496 spin_unlock(&global_rsv->lock); 6706 spin_unlock(&global_rsv->lock);
6707 /* Add to any tickets we may have */
6708 if (len)
6709 space_info_add_new_bytes(fs_info, space_info,
6710 len);
6497 } 6711 }
6498 spin_unlock(&space_info->lock); 6712 spin_unlock(&space_info->lock);
6499 } 6713 }
@@ -7782,12 +7996,10 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
7782 ret = btrfs_discard_extent(root, start, len, NULL); 7996 ret = btrfs_discard_extent(root, start, len, NULL);
7783 btrfs_add_free_space(cache, start, len); 7997 btrfs_add_free_space(cache, start, len);
7784 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); 7998 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
7999 trace_btrfs_reserved_extent_free(root, start, len);
7785 } 8000 }
7786 8001
7787 btrfs_put_block_group(cache); 8002 btrfs_put_block_group(cache);
7788
7789 trace_btrfs_reserved_extent_free(root, start, len);
7790
7791 return ret; 8003 return ret;
7792} 8004}
7793 8005
@@ -9791,13 +10003,15 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
9791 space_info = list_entry(info->space_info.next, 10003 space_info = list_entry(info->space_info.next,
9792 struct btrfs_space_info, 10004 struct btrfs_space_info,
9793 list); 10005 list);
9794 if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) { 10006
9795 if (WARN_ON(space_info->bytes_pinned > 0 || 10007 /*
10008 * Do not hide this behind enospc_debug, this is actually
10009 * important and indicates a real bug if this happens.
10010 */
10011 if (WARN_ON(space_info->bytes_pinned > 0 ||
9796 space_info->bytes_reserved > 0 || 10012 space_info->bytes_reserved > 0 ||
9797 space_info->bytes_may_use > 0)) { 10013 space_info->bytes_may_use > 0))
9798 dump_space_info(space_info, 0, 0); 10014 dump_space_info(space_info, 0, 0);
9799 }
9800 }
9801 list_del(&space_info->list); 10015 list_del(&space_info->list);
9802 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { 10016 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
9803 struct kobject *kobj; 10017 struct kobject *kobj;
@@ -10005,9 +10219,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
10005 goto error; 10219 goto error;
10006 } 10220 }
10007 10221
10222 trace_btrfs_add_block_group(root->fs_info, cache, 0);
10008 ret = update_space_info(info, cache->flags, found_key.offset, 10223 ret = update_space_info(info, cache->flags, found_key.offset,
10009 btrfs_block_group_used(&cache->item), 10224 btrfs_block_group_used(&cache->item),
10010 &space_info); 10225 cache->bytes_super, &space_info);
10011 if (ret) { 10226 if (ret) {
10012 btrfs_remove_free_space_cache(cache); 10227 btrfs_remove_free_space_cache(cache);
10013 spin_lock(&info->block_group_cache_lock); 10228 spin_lock(&info->block_group_cache_lock);
@@ -10020,9 +10235,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
10020 } 10235 }
10021 10236
10022 cache->space_info = space_info; 10237 cache->space_info = space_info;
10023 spin_lock(&cache->space_info->lock);
10024 cache->space_info->bytes_readonly += cache->bytes_super;
10025 spin_unlock(&cache->space_info->lock);
10026 10238
10027 __link_block_group(space_info, cache); 10239 __link_block_group(space_info, cache);
10028 10240
@@ -10114,7 +10326,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
10114 int ret; 10326 int ret;
10115 struct btrfs_root *extent_root; 10327 struct btrfs_root *extent_root;
10116 struct btrfs_block_group_cache *cache; 10328 struct btrfs_block_group_cache *cache;
10117
10118 extent_root = root->fs_info->extent_root; 10329 extent_root = root->fs_info->extent_root;
10119 10330
10120 btrfs_set_log_full_commit(root->fs_info, trans); 10331 btrfs_set_log_full_commit(root->fs_info, trans);
@@ -10160,7 +10371,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
10160 * assigned to our block group, but don't update its counters just yet. 10371 * assigned to our block group, but don't update its counters just yet.
10161 * We want our bg to be added to the rbtree with its ->space_info set. 10372 * We want our bg to be added to the rbtree with its ->space_info set.
10162 */ 10373 */
10163 ret = update_space_info(root->fs_info, cache->flags, 0, 0, 10374 ret = update_space_info(root->fs_info, cache->flags, 0, 0, 0,
10164 &cache->space_info); 10375 &cache->space_info);
10165 if (ret) { 10376 if (ret) {
10166 btrfs_remove_free_space_cache(cache); 10377 btrfs_remove_free_space_cache(cache);
@@ -10179,8 +10390,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
10179 * Now that our block group has its ->space_info set and is inserted in 10390 * Now that our block group has its ->space_info set and is inserted in
10180 * the rbtree, update the space info's counters. 10391 * the rbtree, update the space info's counters.
10181 */ 10392 */
10393 trace_btrfs_add_block_group(root->fs_info, cache, 1);
10182 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 10394 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
10183 &cache->space_info); 10395 cache->bytes_super, &cache->space_info);
10184 if (ret) { 10396 if (ret) {
10185 btrfs_remove_free_space_cache(cache); 10397 btrfs_remove_free_space_cache(cache);
10186 spin_lock(&root->fs_info->block_group_cache_lock); 10398 spin_lock(&root->fs_info->block_group_cache_lock);
@@ -10193,16 +10405,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
10193 } 10405 }
10194 update_global_block_rsv(root->fs_info); 10406 update_global_block_rsv(root->fs_info);
10195 10407
10196 spin_lock(&cache->space_info->lock);
10197 cache->space_info->bytes_readonly += cache->bytes_super;
10198 spin_unlock(&cache->space_info->lock);
10199
10200 __link_block_group(cache->space_info, cache); 10408 __link_block_group(cache->space_info, cache);
10201 10409
10202 list_add_tail(&cache->bg_list, &trans->new_bgs); 10410 list_add_tail(&cache->bg_list, &trans->new_bgs);
10203 10411
10204 set_avail_alloc_bits(extent_root->fs_info, type); 10412 set_avail_alloc_bits(extent_root->fs_info, type);
10205
10206 return 0; 10413 return 0;
10207} 10414}
10208 10415
@@ -10747,21 +10954,21 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
10747 mixed = 1; 10954 mixed = 1;
10748 10955
10749 flags = BTRFS_BLOCK_GROUP_SYSTEM; 10956 flags = BTRFS_BLOCK_GROUP_SYSTEM;
10750 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 10957 ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
10751 if (ret) 10958 if (ret)
10752 goto out; 10959 goto out;
10753 10960
10754 if (mixed) { 10961 if (mixed) {
10755 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; 10962 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
10756 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 10963 ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
10757 } else { 10964 } else {
10758 flags = BTRFS_BLOCK_GROUP_METADATA; 10965 flags = BTRFS_BLOCK_GROUP_METADATA;
10759 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 10966 ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
10760 if (ret) 10967 if (ret)
10761 goto out; 10968 goto out;
10762 10969
10763 flags = BTRFS_BLOCK_GROUP_DATA; 10970 flags = BTRFS_BLOCK_GROUP_DATA;
10764 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 10971 ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
10765 } 10972 }
10766out: 10973out:
10767 return ret; 10974 return ret;