aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c2503
1 files changed, 126 insertions, 2377 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5faf057f6f37..d3b58e388535 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -28,46 +28,12 @@
28#include "sysfs.h" 28#include "sysfs.h"
29#include "qgroup.h" 29#include "qgroup.h"
30#include "ref-verify.h" 30#include "ref-verify.h"
31#include "space-info.h"
32#include "block-rsv.h"
33#include "delalloc-space.h"
31 34
32#undef SCRAMBLE_DELAYED_REFS 35#undef SCRAMBLE_DELAYED_REFS
33 36
34/*
35 * control flags for do_chunk_alloc's force field
36 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
37 * if we really need one.
38 *
39 * CHUNK_ALLOC_LIMITED means to only try and allocate one
40 * if we have very few chunks already allocated. This is
41 * used as part of the clustering code to help make sure
42 * we have a good pool of storage to cluster in, without
43 * filling the FS with empty chunks
44 *
45 * CHUNK_ALLOC_FORCE means it must try to allocate one
46 *
47 */
48enum {
49 CHUNK_ALLOC_NO_FORCE = 0,
50 CHUNK_ALLOC_LIMITED = 1,
51 CHUNK_ALLOC_FORCE = 2,
52};
53
54/*
55 * Declare a helper function to detect underflow of various space info members
56 */
57#define DECLARE_SPACE_INFO_UPDATE(name) \
58static inline void update_##name(struct btrfs_space_info *sinfo, \
59 s64 bytes) \
60{ \
61 if (bytes < 0 && sinfo->name < -bytes) { \
62 WARN_ON(1); \
63 sinfo->name = 0; \
64 return; \
65 } \
66 sinfo->name += bytes; \
67}
68
69DECLARE_SPACE_INFO_UPDATE(bytes_may_use);
70DECLARE_SPACE_INFO_UPDATE(bytes_pinned);
71 37
72static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 38static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
73 struct btrfs_delayed_ref_node *node, u64 parent, 39 struct btrfs_delayed_ref_node *node, u64 parent,
@@ -84,21 +50,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
84static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 50static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
85 struct btrfs_delayed_ref_node *node, 51 struct btrfs_delayed_ref_node *node,
86 struct btrfs_delayed_extent_op *extent_op); 52 struct btrfs_delayed_extent_op *extent_op);
87static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
88 int force);
89static int find_next_key(struct btrfs_path *path, int level, 53static int find_next_key(struct btrfs_path *path, int level,
90 struct btrfs_key *key); 54 struct btrfs_key *key);
91static void dump_space_info(struct btrfs_fs_info *fs_info,
92 struct btrfs_space_info *info, u64 bytes,
93 int dump_block_groups);
94static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
95 u64 num_bytes);
96static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
97 struct btrfs_space_info *space_info,
98 u64 num_bytes);
99static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
100 struct btrfs_space_info *space_info,
101 u64 num_bytes);
102 55
103static noinline int 56static noinline int
104block_group_cache_done(struct btrfs_block_group_cache *cache) 57block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -737,62 +690,39 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
737 return block_group_cache_tree_search(info, bytenr, 1); 690 return block_group_cache_tree_search(info, bytenr, 1);
738} 691}
739 692
740static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 693static u64 generic_ref_to_space_flags(struct btrfs_ref *ref)
741 u64 flags)
742{ 694{
743 struct list_head *head = &info->space_info; 695 if (ref->type == BTRFS_REF_METADATA) {
744 struct btrfs_space_info *found; 696 if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
745 697 return BTRFS_BLOCK_GROUP_SYSTEM;
746 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; 698 else
747 699 return BTRFS_BLOCK_GROUP_METADATA;
748 rcu_read_lock();
749 list_for_each_entry_rcu(found, head, list) {
750 if (found->flags & flags) {
751 rcu_read_unlock();
752 return found;
753 }
754 } 700 }
755 rcu_read_unlock(); 701 return BTRFS_BLOCK_GROUP_DATA;
756 return NULL;
757} 702}
758 703
759static void add_pinned_bytes(struct btrfs_fs_info *fs_info, 704static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
760 struct btrfs_ref *ref, int sign) 705 struct btrfs_ref *ref)
761{ 706{
762 struct btrfs_space_info *space_info; 707 struct btrfs_space_info *space_info;
763 s64 num_bytes; 708 u64 flags = generic_ref_to_space_flags(ref);
764 u64 flags;
765
766 ASSERT(sign == 1 || sign == -1);
767 num_bytes = sign * ref->len;
768 if (ref->type == BTRFS_REF_METADATA) {
769 if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
770 flags = BTRFS_BLOCK_GROUP_SYSTEM;
771 else
772 flags = BTRFS_BLOCK_GROUP_METADATA;
773 } else {
774 flags = BTRFS_BLOCK_GROUP_DATA;
775 }
776 709
777 space_info = __find_space_info(fs_info, flags); 710 space_info = btrfs_find_space_info(fs_info, flags);
778 ASSERT(space_info); 711 ASSERT(space_info);
779 percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes, 712 percpu_counter_add_batch(&space_info->total_bytes_pinned, ref->len,
780 BTRFS_TOTAL_BYTES_PINNED_BATCH); 713 BTRFS_TOTAL_BYTES_PINNED_BATCH);
781} 714}
782 715
783/* 716static void sub_pinned_bytes(struct btrfs_fs_info *fs_info,
784 * after adding space to the filesystem, we need to clear the full flags 717 struct btrfs_ref *ref)
785 * on all the space infos.
786 */
787void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
788{ 718{
789 struct list_head *head = &info->space_info; 719 struct btrfs_space_info *space_info;
790 struct btrfs_space_info *found; 720 u64 flags = generic_ref_to_space_flags(ref);
791 721
792 rcu_read_lock(); 722 space_info = btrfs_find_space_info(fs_info, flags);
793 list_for_each_entry_rcu(found, head, list) 723 ASSERT(space_info);
794 found->full = 0; 724 percpu_counter_add_batch(&space_info->total_bytes_pinned, -ref->len,
795 rcu_read_unlock(); 725 BTRFS_TOTAL_BYTES_PINNED_BATCH);
796} 726}
797 727
798/* simple helper to search for an existing data extent at a given offset */ 728/* simple helper to search for an existing data extent at a given offset */
@@ -1121,11 +1051,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1121 __le64 lenum; 1051 __le64 lenum;
1122 1052
1123 lenum = cpu_to_le64(root_objectid); 1053 lenum = cpu_to_le64(root_objectid);
1124 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 1054 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
1125 lenum = cpu_to_le64(owner); 1055 lenum = cpu_to_le64(owner);
1126 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 1056 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1127 lenum = cpu_to_le64(offset); 1057 lenum = cpu_to_le64(offset);
1128 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 1058 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1129 1059
1130 return ((u64)high_crc << 31) ^ (u64)low_crc; 1060 return ((u64)high_crc << 31) ^ (u64)low_crc;
1131} 1061}
@@ -2065,7 +1995,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2065 btrfs_ref_tree_mod(fs_info, generic_ref); 1995 btrfs_ref_tree_mod(fs_info, generic_ref);
2066 1996
2067 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) 1997 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
2068 add_pinned_bytes(fs_info, generic_ref, -1); 1998 sub_pinned_bytes(fs_info, generic_ref);
2069 1999
2070 return ret; 2000 return ret;
2071} 2001}
@@ -2462,7 +2392,7 @@ void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
2462 flags = BTRFS_BLOCK_GROUP_SYSTEM; 2392 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2463 else 2393 else
2464 flags = BTRFS_BLOCK_GROUP_METADATA; 2394 flags = BTRFS_BLOCK_GROUP_METADATA;
2465 space_info = __find_space_info(fs_info, flags); 2395 space_info = btrfs_find_space_info(fs_info, flags);
2466 ASSERT(space_info); 2396 ASSERT(space_info);
2467 percpu_counter_add_batch(&space_info->total_bytes_pinned, 2397 percpu_counter_add_batch(&space_info->total_bytes_pinned,
2468 -head->num_bytes, 2398 -head->num_bytes,
@@ -2824,49 +2754,6 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
2824 return num_csums; 2754 return num_csums;
2825} 2755}
2826 2756
2827bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
2828{
2829 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
2830 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2831 bool ret = false;
2832 u64 reserved;
2833
2834 spin_lock(&global_rsv->lock);
2835 reserved = global_rsv->reserved;
2836 spin_unlock(&global_rsv->lock);
2837
2838 /*
2839 * Since the global reserve is just kind of magic we don't really want
2840 * to rely on it to save our bacon, so if our size is more than the
2841 * delayed_refs_rsv and the global rsv then it's time to think about
2842 * bailing.
2843 */
2844 spin_lock(&delayed_refs_rsv->lock);
2845 reserved += delayed_refs_rsv->reserved;
2846 if (delayed_refs_rsv->size >= reserved)
2847 ret = true;
2848 spin_unlock(&delayed_refs_rsv->lock);
2849 return ret;
2850}
2851
2852int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
2853{
2854 u64 num_entries =
2855 atomic_read(&trans->transaction->delayed_refs.num_entries);
2856 u64 avg_runtime;
2857 u64 val;
2858
2859 smp_mb();
2860 avg_runtime = trans->fs_info->avg_delayed_ref_runtime;
2861 val = num_entries * avg_runtime;
2862 if (val >= NSEC_PER_SEC)
2863 return 1;
2864 if (val >= NSEC_PER_SEC / 2)
2865 return 2;
2866
2867 return btrfs_check_space_for_delayed_refs(trans->fs_info);
2868}
2869
2870/* 2757/*
2871 * this starts processing the delayed reference count updates and 2758 * this starts processing the delayed reference count updates and
2872 * extent insertions we have queued up so far. count can be 2759 * extent insertions we have queued up so far. count can be
@@ -3834,93 +3721,6 @@ void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
3834 wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers)); 3721 wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers));
3835} 3722}
3836 3723
3837static const char *alloc_name(u64 flags)
3838{
3839 switch (flags) {
3840 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
3841 return "mixed";
3842 case BTRFS_BLOCK_GROUP_METADATA:
3843 return "metadata";
3844 case BTRFS_BLOCK_GROUP_DATA:
3845 return "data";
3846 case BTRFS_BLOCK_GROUP_SYSTEM:
3847 return "system";
3848 default:
3849 WARN_ON(1);
3850 return "invalid-combination";
3851 };
3852}
3853
3854static int create_space_info(struct btrfs_fs_info *info, u64 flags)
3855{
3856
3857 struct btrfs_space_info *space_info;
3858 int i;
3859 int ret;
3860
3861 space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
3862 if (!space_info)
3863 return -ENOMEM;
3864
3865 ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
3866 GFP_KERNEL);
3867 if (ret) {
3868 kfree(space_info);
3869 return ret;
3870 }
3871
3872 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
3873 INIT_LIST_HEAD(&space_info->block_groups[i]);
3874 init_rwsem(&space_info->groups_sem);
3875 spin_lock_init(&space_info->lock);
3876 space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
3877 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3878 init_waitqueue_head(&space_info->wait);
3879 INIT_LIST_HEAD(&space_info->ro_bgs);
3880 INIT_LIST_HEAD(&space_info->tickets);
3881 INIT_LIST_HEAD(&space_info->priority_tickets);
3882
3883 ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
3884 info->space_info_kobj, "%s",
3885 alloc_name(space_info->flags));
3886 if (ret) {
3887 kobject_put(&space_info->kobj);
3888 return ret;
3889 }
3890
3891 list_add_rcu(&space_info->list, &info->space_info);
3892 if (flags & BTRFS_BLOCK_GROUP_DATA)
3893 info->data_sinfo = space_info;
3894
3895 return ret;
3896}
3897
3898static void update_space_info(struct btrfs_fs_info *info, u64 flags,
3899 u64 total_bytes, u64 bytes_used,
3900 u64 bytes_readonly,
3901 struct btrfs_space_info **space_info)
3902{
3903 struct btrfs_space_info *found;
3904 int factor;
3905
3906 factor = btrfs_bg_type_to_factor(flags);
3907
3908 found = __find_space_info(info, flags);
3909 ASSERT(found);
3910 spin_lock(&found->lock);
3911 found->total_bytes += total_bytes;
3912 found->disk_total += total_bytes * factor;
3913 found->bytes_used += bytes_used;
3914 found->disk_used += bytes_used * factor;
3915 found->bytes_readonly += bytes_readonly;
3916 if (total_bytes > 0)
3917 found->full = 0;
3918 space_info_add_new_bytes(info, found, total_bytes -
3919 bytes_used - bytes_readonly);
3920 spin_unlock(&found->lock);
3921 *space_info = found;
3922}
3923
3924static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 3724static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3925{ 3725{
3926 u64 extra_flags = chunk_to_extended(flags) & 3726 u64 extra_flags = chunk_to_extended(flags) &
@@ -4068,215 +3868,6 @@ u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
4068 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); 3868 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4069} 3869}
4070 3870
4071static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
4072 bool may_use_included)
4073{
4074 ASSERT(s_info);
4075 return s_info->bytes_used + s_info->bytes_reserved +
4076 s_info->bytes_pinned + s_info->bytes_readonly +
4077 (may_use_included ? s_info->bytes_may_use : 0);
4078}
4079
4080int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
4081{
4082 struct btrfs_root *root = inode->root;
4083 struct btrfs_fs_info *fs_info = root->fs_info;
4084 struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
4085 u64 used;
4086 int ret = 0;
4087 int need_commit = 2;
4088 int have_pinned_space;
4089
4090 /* make sure bytes are sectorsize aligned */
4091 bytes = ALIGN(bytes, fs_info->sectorsize);
4092
4093 if (btrfs_is_free_space_inode(inode)) {
4094 need_commit = 0;
4095 ASSERT(current->journal_info);
4096 }
4097
4098again:
4099 /* make sure we have enough space to handle the data first */
4100 spin_lock(&data_sinfo->lock);
4101 used = btrfs_space_info_used(data_sinfo, true);
4102
4103 if (used + bytes > data_sinfo->total_bytes) {
4104 struct btrfs_trans_handle *trans;
4105
4106 /*
4107 * if we don't have enough free bytes in this space then we need
4108 * to alloc a new chunk.
4109 */
4110 if (!data_sinfo->full) {
4111 u64 alloc_target;
4112
4113 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
4114 spin_unlock(&data_sinfo->lock);
4115
4116 alloc_target = btrfs_data_alloc_profile(fs_info);
4117 /*
4118 * It is ugly that we don't call nolock join
4119 * transaction for the free space inode case here.
4120 * But it is safe because we only do the data space
4121 * reservation for the free space cache in the
4122 * transaction context, the common join transaction
4123 * just increase the counter of the current transaction
4124 * handler, doesn't try to acquire the trans_lock of
4125 * the fs.
4126 */
4127 trans = btrfs_join_transaction(root);
4128 if (IS_ERR(trans))
4129 return PTR_ERR(trans);
4130
4131 ret = do_chunk_alloc(trans, alloc_target,
4132 CHUNK_ALLOC_NO_FORCE);
4133 btrfs_end_transaction(trans);
4134 if (ret < 0) {
4135 if (ret != -ENOSPC)
4136 return ret;
4137 else {
4138 have_pinned_space = 1;
4139 goto commit_trans;
4140 }
4141 }
4142
4143 goto again;
4144 }
4145
4146 /*
4147 * If we don't have enough pinned space to deal with this
4148 * allocation, and no removed chunk in current transaction,
4149 * don't bother committing the transaction.
4150 */
4151 have_pinned_space = __percpu_counter_compare(
4152 &data_sinfo->total_bytes_pinned,
4153 used + bytes - data_sinfo->total_bytes,
4154 BTRFS_TOTAL_BYTES_PINNED_BATCH);
4155 spin_unlock(&data_sinfo->lock);
4156
4157 /* commit the current transaction and try again */
4158commit_trans:
4159 if (need_commit) {
4160 need_commit--;
4161
4162 if (need_commit > 0) {
4163 btrfs_start_delalloc_roots(fs_info, -1);
4164 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
4165 (u64)-1);
4166 }
4167
4168 trans = btrfs_join_transaction(root);
4169 if (IS_ERR(trans))
4170 return PTR_ERR(trans);
4171 if (have_pinned_space >= 0 ||
4172 test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
4173 &trans->transaction->flags) ||
4174 need_commit > 0) {
4175 ret = btrfs_commit_transaction(trans);
4176 if (ret)
4177 return ret;
4178 /*
4179 * The cleaner kthread might still be doing iput
4180 * operations. Wait for it to finish so that
4181 * more space is released. We don't need to
4182 * explicitly run the delayed iputs here because
4183 * the commit_transaction would have woken up
4184 * the cleaner.
4185 */
4186 ret = btrfs_wait_on_delayed_iputs(fs_info);
4187 if (ret)
4188 return ret;
4189 goto again;
4190 } else {
4191 btrfs_end_transaction(trans);
4192 }
4193 }
4194
4195 trace_btrfs_space_reservation(fs_info,
4196 "space_info:enospc",
4197 data_sinfo->flags, bytes, 1);
4198 return -ENOSPC;
4199 }
4200 update_bytes_may_use(data_sinfo, bytes);
4201 trace_btrfs_space_reservation(fs_info, "space_info",
4202 data_sinfo->flags, bytes, 1);
4203 spin_unlock(&data_sinfo->lock);
4204
4205 return 0;
4206}
4207
4208int btrfs_check_data_free_space(struct inode *inode,
4209 struct extent_changeset **reserved, u64 start, u64 len)
4210{
4211 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4212 int ret;
4213
4214 /* align the range */
4215 len = round_up(start + len, fs_info->sectorsize) -
4216 round_down(start, fs_info->sectorsize);
4217 start = round_down(start, fs_info->sectorsize);
4218
4219 ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
4220 if (ret < 0)
4221 return ret;
4222
4223 /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
4224 ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
4225 if (ret < 0)
4226 btrfs_free_reserved_data_space_noquota(inode, start, len);
4227 else
4228 ret = 0;
4229 return ret;
4230}
4231
4232/*
4233 * Called if we need to clear a data reservation for this inode
4234 * Normally in a error case.
4235 *
4236 * This one will *NOT* use accurate qgroup reserved space API, just for case
4237 * which we can't sleep and is sure it won't affect qgroup reserved space.
4238 * Like clear_bit_hook().
4239 */
4240void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
4241 u64 len)
4242{
4243 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4244 struct btrfs_space_info *data_sinfo;
4245
4246 /* Make sure the range is aligned to sectorsize */
4247 len = round_up(start + len, fs_info->sectorsize) -
4248 round_down(start, fs_info->sectorsize);
4249 start = round_down(start, fs_info->sectorsize);
4250
4251 data_sinfo = fs_info->data_sinfo;
4252 spin_lock(&data_sinfo->lock);
4253 update_bytes_may_use(data_sinfo, -len);
4254 trace_btrfs_space_reservation(fs_info, "space_info",
4255 data_sinfo->flags, len, 0);
4256 spin_unlock(&data_sinfo->lock);
4257}
4258
4259/*
4260 * Called if we need to clear a data reservation for this inode
4261 * Normally in a error case.
4262 *
4263 * This one will handle the per-inode data rsv map for accurate reserved
4264 * space framework.
4265 */
4266void btrfs_free_reserved_data_space(struct inode *inode,
4267 struct extent_changeset *reserved, u64 start, u64 len)
4268{
4269 struct btrfs_root *root = BTRFS_I(inode)->root;
4270
4271 /* Make sure the range is aligned to sectorsize */
4272 len = round_up(start + len, root->fs_info->sectorsize) -
4273 round_down(start, root->fs_info->sectorsize);
4274 start = round_down(start, root->fs_info->sectorsize);
4275
4276 btrfs_free_reserved_data_space_noquota(inode, start, len);
4277 btrfs_qgroup_free_data(inode, reserved, start, len);
4278}
4279
4280static void force_metadata_allocation(struct btrfs_fs_info *info) 3871static void force_metadata_allocation(struct btrfs_fs_info *info)
4281{ 3872{
4282 struct list_head *head = &info->space_info; 3873 struct list_head *head = &info->space_info;
@@ -4290,11 +3881,6 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
4290 rcu_read_unlock(); 3881 rcu_read_unlock();
4291} 3882}
4292 3883
4293static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
4294{
4295 return (global->size << 1);
4296}
4297
4298static int should_alloc_chunk(struct btrfs_fs_info *fs_info, 3884static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
4299 struct btrfs_space_info *sinfo, int force) 3885 struct btrfs_space_info *sinfo, int force)
4300{ 3886{
@@ -4325,15 +3911,9 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
4325{ 3911{
4326 u64 num_dev; 3912 u64 num_dev;
4327 3913
4328 if (type & (BTRFS_BLOCK_GROUP_RAID10 | 3914 num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max;
4329 BTRFS_BLOCK_GROUP_RAID0 | 3915 if (!num_dev)
4330 BTRFS_BLOCK_GROUP_RAID5 |
4331 BTRFS_BLOCK_GROUP_RAID6))
4332 num_dev = fs_info->fs_devices->rw_devices; 3916 num_dev = fs_info->fs_devices->rw_devices;
4333 else if (type & BTRFS_BLOCK_GROUP_RAID1)
4334 num_dev = 2;
4335 else
4336 num_dev = 1; /* DUP or single */
4337 3917
4338 return num_dev; 3918 return num_dev;
4339} 3919}
@@ -4358,7 +3938,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
4358 */ 3938 */
4359 lockdep_assert_held(&fs_info->chunk_mutex); 3939 lockdep_assert_held(&fs_info->chunk_mutex);
4360 3940
4361 info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); 3941 info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4362 spin_lock(&info->lock); 3942 spin_lock(&info->lock);
4363 left = info->total_bytes - btrfs_space_info_used(info, true); 3943 left = info->total_bytes - btrfs_space_info_used(info, true);
4364 spin_unlock(&info->lock); 3944 spin_unlock(&info->lock);
@@ -4372,7 +3952,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
4372 if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { 3952 if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4373 btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu", 3953 btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
4374 left, thresh, type); 3954 left, thresh, type);
4375 dump_space_info(fs_info, info, 0, 0); 3955 btrfs_dump_space_info(fs_info, info, 0, 0);
4376 } 3956 }
4377 3957
4378 if (left < thresh) { 3958 if (left < thresh) {
@@ -4405,8 +3985,8 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
4405 * - return 1 if it successfully allocates a chunk, 3985 * - return 1 if it successfully allocates a chunk,
4406 * - return errors including -ENOSPC otherwise. 3986 * - return errors including -ENOSPC otherwise.
4407 */ 3987 */
4408static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, 3988int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
4409 int force) 3989 enum btrfs_chunk_alloc_enum force)
4410{ 3990{
4411 struct btrfs_fs_info *fs_info = trans->fs_info; 3991 struct btrfs_fs_info *fs_info = trans->fs_info;
4412 struct btrfs_space_info *space_info; 3992 struct btrfs_space_info *space_info;
@@ -4418,7 +3998,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
4418 if (trans->allocating_chunk) 3998 if (trans->allocating_chunk)
4419 return -ENOSPC; 3999 return -ENOSPC;
4420 4000
4421 space_info = __find_space_info(fs_info, flags); 4001 space_info = btrfs_find_space_info(fs_info, flags);
4422 ASSERT(space_info); 4002 ASSERT(space_info);
4423 4003
4424 do { 4004 do {
@@ -4525,1714 +4105,6 @@ out:
4525 return ret; 4105 return ret;
4526} 4106}
4527 4107
4528static int can_overcommit(struct btrfs_fs_info *fs_info,
4529 struct btrfs_space_info *space_info, u64 bytes,
4530 enum btrfs_reserve_flush_enum flush,
4531 bool system_chunk)
4532{
4533 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
4534 u64 profile;
4535 u64 space_size;
4536 u64 avail;
4537 u64 used;
4538 int factor;
4539
4540 /* Don't overcommit when in mixed mode. */
4541 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
4542 return 0;
4543
4544 if (system_chunk)
4545 profile = btrfs_system_alloc_profile(fs_info);
4546 else
4547 profile = btrfs_metadata_alloc_profile(fs_info);
4548
4549 used = btrfs_space_info_used(space_info, false);
4550
4551 /*
4552 * We only want to allow over committing if we have lots of actual space
4553 * free, but if we don't have enough space to handle the global reserve
4554 * space then we could end up having a real enospc problem when trying
4555 * to allocate a chunk or some other such important allocation.
4556 */
4557 spin_lock(&global_rsv->lock);
4558 space_size = calc_global_rsv_need_space(global_rsv);
4559 spin_unlock(&global_rsv->lock);
4560 if (used + space_size >= space_info->total_bytes)
4561 return 0;
4562
4563 used += space_info->bytes_may_use;
4564
4565 avail = atomic64_read(&fs_info->free_chunk_space);
4566
4567 /*
4568 * If we have dup, raid1 or raid10 then only half of the free
4569 * space is actually usable. For raid56, the space info used
4570 * doesn't include the parity drive, so we don't have to
4571 * change the math
4572 */
4573 factor = btrfs_bg_type_to_factor(profile);
4574 avail = div_u64(avail, factor);
4575
4576 /*
4577 * If we aren't flushing all things, let us overcommit up to
4578 * 1/2th of the space. If we can flush, don't let us overcommit
4579 * too much, let it overcommit up to 1/8 of the space.
4580 */
4581 if (flush == BTRFS_RESERVE_FLUSH_ALL)
4582 avail >>= 3;
4583 else
4584 avail >>= 1;
4585
4586 if (used + bytes < space_info->total_bytes + avail)
4587 return 1;
4588 return 0;
4589}
4590
4591static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
4592 unsigned long nr_pages, int nr_items)
4593{
4594 struct super_block *sb = fs_info->sb;
4595
4596 if (down_read_trylock(&sb->s_umount)) {
4597 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
4598 up_read(&sb->s_umount);
4599 } else {
4600 /*
4601 * We needn't worry the filesystem going from r/w to r/o though
4602 * we don't acquire ->s_umount mutex, because the filesystem
4603 * should guarantee the delalloc inodes list be empty after
4604 * the filesystem is readonly(all dirty pages are written to
4605 * the disk).
4606 */
4607 btrfs_start_delalloc_roots(fs_info, nr_items);
4608 if (!current->journal_info)
4609 btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
4610 }
4611}
4612
4613static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
4614 u64 to_reclaim)
4615{
4616 u64 bytes;
4617 u64 nr;
4618
4619 bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
4620 nr = div64_u64(to_reclaim, bytes);
4621 if (!nr)
4622 nr = 1;
4623 return nr;
4624}
4625
4626#define EXTENT_SIZE_PER_ITEM SZ_256K
4627
4628/*
4629 * shrink metadata reservation for delalloc
4630 */
4631static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
4632 u64 orig, bool wait_ordered)
4633{
4634 struct btrfs_space_info *space_info;
4635 struct btrfs_trans_handle *trans;
4636 u64 delalloc_bytes;
4637 u64 dio_bytes;
4638 u64 async_pages;
4639 u64 items;
4640 long time_left;
4641 unsigned long nr_pages;
4642 int loops;
4643
4644 /* Calc the number of the pages we need flush for space reservation */
4645 items = calc_reclaim_items_nr(fs_info, to_reclaim);
4646 to_reclaim = items * EXTENT_SIZE_PER_ITEM;
4647
4648 trans = (struct btrfs_trans_handle *)current->journal_info;
4649 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4650
4651 delalloc_bytes = percpu_counter_sum_positive(
4652 &fs_info->delalloc_bytes);
4653 dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
4654 if (delalloc_bytes == 0 && dio_bytes == 0) {
4655 if (trans)
4656 return;
4657 if (wait_ordered)
4658 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
4659 return;
4660 }
4661
4662 /*
4663 * If we are doing more ordered than delalloc we need to just wait on
4664 * ordered extents, otherwise we'll waste time trying to flush delalloc
4665 * that likely won't give us the space back we need.
4666 */
4667 if (dio_bytes > delalloc_bytes)
4668 wait_ordered = true;
4669
4670 loops = 0;
4671 while ((delalloc_bytes || dio_bytes) && loops < 3) {
4672 nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
4673
4674 /*
4675 * Triggers inode writeback for up to nr_pages. This will invoke
4676 * ->writepages callback and trigger delalloc filling
4677 * (btrfs_run_delalloc_range()).
4678 */
4679 btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
4680
4681 /*
4682 * We need to wait for the compressed pages to start before
4683 * we continue.
4684 */
4685 async_pages = atomic_read(&fs_info->async_delalloc_pages);
4686 if (!async_pages)
4687 goto skip_async;
4688
4689 /*
4690 * Calculate how many compressed pages we want to be written
4691 * before we continue. I.e if there are more async pages than we
4692 * require wait_event will wait until nr_pages are written.
4693 */
4694 if (async_pages <= nr_pages)
4695 async_pages = 0;
4696 else
4697 async_pages -= nr_pages;
4698
4699 wait_event(fs_info->async_submit_wait,
4700 atomic_read(&fs_info->async_delalloc_pages) <=
4701 (int)async_pages);
4702skip_async:
4703 spin_lock(&space_info->lock);
4704 if (list_empty(&space_info->tickets) &&
4705 list_empty(&space_info->priority_tickets)) {
4706 spin_unlock(&space_info->lock);
4707 break;
4708 }
4709 spin_unlock(&space_info->lock);
4710
4711 loops++;
4712 if (wait_ordered && !trans) {
4713 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
4714 } else {
4715 time_left = schedule_timeout_killable(1);
4716 if (time_left)
4717 break;
4718 }
4719 delalloc_bytes = percpu_counter_sum_positive(
4720 &fs_info->delalloc_bytes);
4721 dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
4722 }
4723}
4724
4725struct reserve_ticket {
4726 u64 orig_bytes;
4727 u64 bytes;
4728 int error;
4729 struct list_head list;
4730 wait_queue_head_t wait;
4731};
4732
4733/**
4734 * maybe_commit_transaction - possibly commit the transaction if its ok to
4735 * @root - the root we're allocating for
4736 * @bytes - the number of bytes we want to reserve
4737 * @force - force the commit
4738 *
4739 * This will check to make sure that committing the transaction will actually
4740 * get us somewhere and then commit the transaction if it does. Otherwise it
4741 * will return -ENOSPC.
4742 */
4743static int may_commit_transaction(struct btrfs_fs_info *fs_info,
4744 struct btrfs_space_info *space_info)
4745{
4746 struct reserve_ticket *ticket = NULL;
4747 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
4748 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
4749 struct btrfs_trans_handle *trans;
4750 u64 bytes_needed;
4751 u64 reclaim_bytes = 0;
4752
4753 trans = (struct btrfs_trans_handle *)current->journal_info;
4754 if (trans)
4755 return -EAGAIN;
4756
4757 spin_lock(&space_info->lock);
4758 if (!list_empty(&space_info->priority_tickets))
4759 ticket = list_first_entry(&space_info->priority_tickets,
4760 struct reserve_ticket, list);
4761 else if (!list_empty(&space_info->tickets))
4762 ticket = list_first_entry(&space_info->tickets,
4763 struct reserve_ticket, list);
4764 bytes_needed = (ticket) ? ticket->bytes : 0;
4765 spin_unlock(&space_info->lock);
4766
4767 if (!bytes_needed)
4768 return 0;
4769
4770 trans = btrfs_join_transaction(fs_info->extent_root);
4771 if (IS_ERR(trans))
4772 return PTR_ERR(trans);
4773
4774 /*
4775 * See if there is enough pinned space to make this reservation, or if
4776 * we have block groups that are going to be freed, allowing us to
4777 * possibly do a chunk allocation the next loop through.
4778 */
4779 if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
4780 __percpu_counter_compare(&space_info->total_bytes_pinned,
4781 bytes_needed,
4782 BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
4783 goto commit;
4784
4785 /*
4786 * See if there is some space in the delayed insertion reservation for
4787 * this reservation.
4788 */
4789 if (space_info != delayed_rsv->space_info)
4790 goto enospc;
4791
4792 spin_lock(&delayed_rsv->lock);
4793 reclaim_bytes += delayed_rsv->reserved;
4794 spin_unlock(&delayed_rsv->lock);
4795
4796 spin_lock(&delayed_refs_rsv->lock);
4797 reclaim_bytes += delayed_refs_rsv->reserved;
4798 spin_unlock(&delayed_refs_rsv->lock);
4799 if (reclaim_bytes >= bytes_needed)
4800 goto commit;
4801 bytes_needed -= reclaim_bytes;
4802
4803 if (__percpu_counter_compare(&space_info->total_bytes_pinned,
4804 bytes_needed,
4805 BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
4806 goto enospc;
4807
4808commit:
4809 return btrfs_commit_transaction(trans);
4810enospc:
4811 btrfs_end_transaction(trans);
4812 return -ENOSPC;
4813}
4814
4815/*
4816 * Try to flush some data based on policy set by @state. This is only advisory
4817 * and may fail for various reasons. The caller is supposed to examine the
4818 * state of @space_info to detect the outcome.
4819 */
4820static void flush_space(struct btrfs_fs_info *fs_info,
4821 struct btrfs_space_info *space_info, u64 num_bytes,
4822 int state)
4823{
4824 struct btrfs_root *root = fs_info->extent_root;
4825 struct btrfs_trans_handle *trans;
4826 int nr;
4827 int ret = 0;
4828
4829 switch (state) {
4830 case FLUSH_DELAYED_ITEMS_NR:
4831 case FLUSH_DELAYED_ITEMS:
4832 if (state == FLUSH_DELAYED_ITEMS_NR)
4833 nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
4834 else
4835 nr = -1;
4836
4837 trans = btrfs_join_transaction(root);
4838 if (IS_ERR(trans)) {
4839 ret = PTR_ERR(trans);
4840 break;
4841 }
4842 ret = btrfs_run_delayed_items_nr(trans, nr);
4843 btrfs_end_transaction(trans);
4844 break;
4845 case FLUSH_DELALLOC:
4846 case FLUSH_DELALLOC_WAIT:
4847 shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
4848 state == FLUSH_DELALLOC_WAIT);
4849 break;
4850 case FLUSH_DELAYED_REFS_NR:
4851 case FLUSH_DELAYED_REFS:
4852 trans = btrfs_join_transaction(root);
4853 if (IS_ERR(trans)) {
4854 ret = PTR_ERR(trans);
4855 break;
4856 }
4857 if (state == FLUSH_DELAYED_REFS_NR)
4858 nr = calc_reclaim_items_nr(fs_info, num_bytes);
4859 else
4860 nr = 0;
4861 btrfs_run_delayed_refs(trans, nr);
4862 btrfs_end_transaction(trans);
4863 break;
4864 case ALLOC_CHUNK:
4865 case ALLOC_CHUNK_FORCE:
4866 trans = btrfs_join_transaction(root);
4867 if (IS_ERR(trans)) {
4868 ret = PTR_ERR(trans);
4869 break;
4870 }
4871 ret = do_chunk_alloc(trans,
4872 btrfs_metadata_alloc_profile(fs_info),
4873 (state == ALLOC_CHUNK) ?
4874 CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE);
4875 btrfs_end_transaction(trans);
4876 if (ret > 0 || ret == -ENOSPC)
4877 ret = 0;
4878 break;
4879 case COMMIT_TRANS:
4880 /*
4881 * If we have pending delayed iputs then we could free up a
4882 * bunch of pinned space, so make sure we run the iputs before
4883 * we do our pinned bytes check below.
4884 */
4885 btrfs_run_delayed_iputs(fs_info);
4886 btrfs_wait_on_delayed_iputs(fs_info);
4887
4888 ret = may_commit_transaction(fs_info, space_info);
4889 break;
4890 default:
4891 ret = -ENOSPC;
4892 break;
4893 }
4894
4895 trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
4896 ret);
4897 return;
4898}
4899
4900static inline u64
4901btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
4902 struct btrfs_space_info *space_info,
4903 bool system_chunk)
4904{
4905 struct reserve_ticket *ticket;
4906 u64 used;
4907 u64 expected;
4908 u64 to_reclaim = 0;
4909
4910 list_for_each_entry(ticket, &space_info->tickets, list)
4911 to_reclaim += ticket->bytes;
4912 list_for_each_entry(ticket, &space_info->priority_tickets, list)
4913 to_reclaim += ticket->bytes;
4914 if (to_reclaim)
4915 return to_reclaim;
4916
4917 to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
4918 if (can_overcommit(fs_info, space_info, to_reclaim,
4919 BTRFS_RESERVE_FLUSH_ALL, system_chunk))
4920 return 0;
4921
4922 used = btrfs_space_info_used(space_info, true);
4923
4924 if (can_overcommit(fs_info, space_info, SZ_1M,
4925 BTRFS_RESERVE_FLUSH_ALL, system_chunk))
4926 expected = div_factor_fine(space_info->total_bytes, 95);
4927 else
4928 expected = div_factor_fine(space_info->total_bytes, 90);
4929
4930 if (used > expected)
4931 to_reclaim = used - expected;
4932 else
4933 to_reclaim = 0;
4934 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
4935 space_info->bytes_reserved);
4936 return to_reclaim;
4937}
4938
4939static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
4940 struct btrfs_space_info *space_info,
4941 u64 used, bool system_chunk)
4942{
4943 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
4944
4945 /* If we're just plain full then async reclaim just slows us down. */
4946 if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
4947 return 0;
4948
4949 if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
4950 system_chunk))
4951 return 0;
4952
4953 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
4954 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
4955}
4956
4957static bool wake_all_tickets(struct list_head *head)
4958{
4959 struct reserve_ticket *ticket;
4960
4961 while (!list_empty(head)) {
4962 ticket = list_first_entry(head, struct reserve_ticket, list);
4963 list_del_init(&ticket->list);
4964 ticket->error = -ENOSPC;
4965 wake_up(&ticket->wait);
4966 if (ticket->bytes != ticket->orig_bytes)
4967 return true;
4968 }
4969 return false;
4970}
4971
4972/*
4973 * This is for normal flushers, we can wait all goddamned day if we want to. We
4974 * will loop and continuously try to flush as long as we are making progress.
4975 * We count progress as clearing off tickets each time we have to loop.
4976 */
4977static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
4978{
4979 struct btrfs_fs_info *fs_info;
4980 struct btrfs_space_info *space_info;
4981 u64 to_reclaim;
4982 int flush_state;
4983 int commit_cycles = 0;
4984 u64 last_tickets_id;
4985
4986 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
4987 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4988
4989 spin_lock(&space_info->lock);
4990 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
4991 false);
4992 if (!to_reclaim) {
4993 space_info->flush = 0;
4994 spin_unlock(&space_info->lock);
4995 return;
4996 }
4997 last_tickets_id = space_info->tickets_id;
4998 spin_unlock(&space_info->lock);
4999
5000 flush_state = FLUSH_DELAYED_ITEMS_NR;
5001 do {
5002 flush_space(fs_info, space_info, to_reclaim, flush_state);
5003 spin_lock(&space_info->lock);
5004 if (list_empty(&space_info->tickets)) {
5005 space_info->flush = 0;
5006 spin_unlock(&space_info->lock);
5007 return;
5008 }
5009 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
5010 space_info,
5011 false);
5012 if (last_tickets_id == space_info->tickets_id) {
5013 flush_state++;
5014 } else {
5015 last_tickets_id = space_info->tickets_id;
5016 flush_state = FLUSH_DELAYED_ITEMS_NR;
5017 if (commit_cycles)
5018 commit_cycles--;
5019 }
5020
5021 /*
5022 * We don't want to force a chunk allocation until we've tried
5023 * pretty hard to reclaim space. Think of the case where we
5024 * freed up a bunch of space and so have a lot of pinned space
5025 * to reclaim. We would rather use that than possibly create a
5026 * underutilized metadata chunk. So if this is our first run
5027 * through the flushing state machine skip ALLOC_CHUNK_FORCE and
5028 * commit the transaction. If nothing has changed the next go
5029 * around then we can force a chunk allocation.
5030 */
5031 if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
5032 flush_state++;
5033
5034 if (flush_state > COMMIT_TRANS) {
5035 commit_cycles++;
5036 if (commit_cycles > 2) {
5037 if (wake_all_tickets(&space_info->tickets)) {
5038 flush_state = FLUSH_DELAYED_ITEMS_NR;
5039 commit_cycles--;
5040 } else {
5041 space_info->flush = 0;
5042 }
5043 } else {
5044 flush_state = FLUSH_DELAYED_ITEMS_NR;
5045 }
5046 }
5047 spin_unlock(&space_info->lock);
5048 } while (flush_state <= COMMIT_TRANS);
5049}
5050
5051void btrfs_init_async_reclaim_work(struct work_struct *work)
5052{
5053 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
5054}
5055
5056static const enum btrfs_flush_state priority_flush_states[] = {
5057 FLUSH_DELAYED_ITEMS_NR,
5058 FLUSH_DELAYED_ITEMS,
5059 ALLOC_CHUNK,
5060};
5061
5062static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
5063 struct btrfs_space_info *space_info,
5064 struct reserve_ticket *ticket)
5065{
5066 u64 to_reclaim;
5067 int flush_state;
5068
5069 spin_lock(&space_info->lock);
5070 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
5071 false);
5072 if (!to_reclaim) {
5073 spin_unlock(&space_info->lock);
5074 return;
5075 }
5076 spin_unlock(&space_info->lock);
5077
5078 flush_state = 0;
5079 do {
5080 flush_space(fs_info, space_info, to_reclaim,
5081 priority_flush_states[flush_state]);
5082 flush_state++;
5083 spin_lock(&space_info->lock);
5084 if (ticket->bytes == 0) {
5085 spin_unlock(&space_info->lock);
5086 return;
5087 }
5088 spin_unlock(&space_info->lock);
5089 } while (flush_state < ARRAY_SIZE(priority_flush_states));
5090}
5091
5092static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
5093 struct btrfs_space_info *space_info,
5094 struct reserve_ticket *ticket)
5095
5096{
5097 DEFINE_WAIT(wait);
5098 u64 reclaim_bytes = 0;
5099 int ret = 0;
5100
5101 spin_lock(&space_info->lock);
5102 while (ticket->bytes > 0 && ticket->error == 0) {
5103 ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
5104 if (ret) {
5105 ret = -EINTR;
5106 break;
5107 }
5108 spin_unlock(&space_info->lock);
5109
5110 schedule();
5111
5112 finish_wait(&ticket->wait, &wait);
5113 spin_lock(&space_info->lock);
5114 }
5115 if (!ret)
5116 ret = ticket->error;
5117 if (!list_empty(&ticket->list))
5118 list_del_init(&ticket->list);
5119 if (ticket->bytes && ticket->bytes < ticket->orig_bytes)
5120 reclaim_bytes = ticket->orig_bytes - ticket->bytes;
5121 spin_unlock(&space_info->lock);
5122
5123 if (reclaim_bytes)
5124 space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
5125 return ret;
5126}
5127
5128/**
5129 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
5130 * @root - the root we're allocating for
5131 * @space_info - the space info we want to allocate from
5132 * @orig_bytes - the number of bytes we want
5133 * @flush - whether or not we can flush to make our reservation
5134 *
5135 * This will reserve orig_bytes number of bytes from the space info associated
5136 * with the block_rsv. If there is not enough space it will make an attempt to
5137 * flush out space to make room. It will do this by flushing delalloc if
5138 * possible or committing the transaction. If flush is 0 then no attempts to
5139 * regain reservations will be made and this will fail if there is not enough
5140 * space already.
5141 */
5142static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
5143 struct btrfs_space_info *space_info,
5144 u64 orig_bytes,
5145 enum btrfs_reserve_flush_enum flush,
5146 bool system_chunk)
5147{
5148 struct reserve_ticket ticket;
5149 u64 used;
5150 u64 reclaim_bytes = 0;
5151 int ret = 0;
5152
5153 ASSERT(orig_bytes);
5154 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
5155
5156 spin_lock(&space_info->lock);
5157 ret = -ENOSPC;
5158 used = btrfs_space_info_used(space_info, true);
5159
5160 /*
5161 * If we have enough space then hooray, make our reservation and carry
5162 * on. If not see if we can overcommit, and if we can, hooray carry on.
5163 * If not things get more complicated.
5164 */
5165 if (used + orig_bytes <= space_info->total_bytes) {
5166 update_bytes_may_use(space_info, orig_bytes);
5167 trace_btrfs_space_reservation(fs_info, "space_info",
5168 space_info->flags, orig_bytes, 1);
5169 ret = 0;
5170 } else if (can_overcommit(fs_info, space_info, orig_bytes, flush,
5171 system_chunk)) {
5172 update_bytes_may_use(space_info, orig_bytes);
5173 trace_btrfs_space_reservation(fs_info, "space_info",
5174 space_info->flags, orig_bytes, 1);
5175 ret = 0;
5176 }
5177
5178 /*
5179 * If we couldn't make a reservation then setup our reservation ticket
5180 * and kick the async worker if it's not already running.
5181 *
5182 * If we are a priority flusher then we just need to add our ticket to
5183 * the list and we will do our own flushing further down.
5184 */
5185 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
5186 ticket.orig_bytes = orig_bytes;
5187 ticket.bytes = orig_bytes;
5188 ticket.error = 0;
5189 init_waitqueue_head(&ticket.wait);
5190 if (flush == BTRFS_RESERVE_FLUSH_ALL) {
5191 list_add_tail(&ticket.list, &space_info->tickets);
5192 if (!space_info->flush) {
5193 space_info->flush = 1;
5194 trace_btrfs_trigger_flush(fs_info,
5195 space_info->flags,
5196 orig_bytes, flush,
5197 "enospc");
5198 queue_work(system_unbound_wq,
5199 &fs_info->async_reclaim_work);
5200 }
5201 } else {
5202 list_add_tail(&ticket.list,
5203 &space_info->priority_tickets);
5204 }
5205 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
5206 used += orig_bytes;
5207 /*
5208 * We will do the space reservation dance during log replay,
5209 * which means we won't have fs_info->fs_root set, so don't do
5210 * the async reclaim as we will panic.
5211 */
5212 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
5213 need_do_async_reclaim(fs_info, space_info,
5214 used, system_chunk) &&
5215 !work_busy(&fs_info->async_reclaim_work)) {
5216 trace_btrfs_trigger_flush(fs_info, space_info->flags,
5217 orig_bytes, flush, "preempt");
5218 queue_work(system_unbound_wq,
5219 &fs_info->async_reclaim_work);
5220 }
5221 }
5222 spin_unlock(&space_info->lock);
5223 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
5224 return ret;
5225
5226 if (flush == BTRFS_RESERVE_FLUSH_ALL)
5227 return wait_reserve_ticket(fs_info, space_info, &ticket);
5228
5229 ret = 0;
5230 priority_reclaim_metadata_space(fs_info, space_info, &ticket);
5231 spin_lock(&space_info->lock);
5232 if (ticket.bytes) {
5233 if (ticket.bytes < orig_bytes)
5234 reclaim_bytes = orig_bytes - ticket.bytes;
5235 list_del_init(&ticket.list);
5236 ret = -ENOSPC;
5237 }
5238 spin_unlock(&space_info->lock);
5239
5240 if (reclaim_bytes)
5241 space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
5242 ASSERT(list_empty(&ticket.list));
5243 return ret;
5244}
5245
5246/**
5247 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
5248 * @root - the root we're allocating for
5249 * @block_rsv - the block_rsv we're allocating for
5250 * @orig_bytes - the number of bytes we want
5251 * @flush - whether or not we can flush to make our reservation
5252 *
5253 * This will reserve orig_bytes number of bytes from the space info associated
5254 * with the block_rsv. If there is not enough space it will make an attempt to
5255 * flush out space to make room. It will do this by flushing delalloc if
5256 * possible or committing the transaction. If flush is 0 then no attempts to
5257 * regain reservations will be made and this will fail if there is not enough
5258 * space already.
5259 */
5260static int reserve_metadata_bytes(struct btrfs_root *root,
5261 struct btrfs_block_rsv *block_rsv,
5262 u64 orig_bytes,
5263 enum btrfs_reserve_flush_enum flush)
5264{
5265 struct btrfs_fs_info *fs_info = root->fs_info;
5266 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5267 int ret;
5268 bool system_chunk = (root == fs_info->chunk_root);
5269
5270 ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
5271 orig_bytes, flush, system_chunk);
5272 if (ret == -ENOSPC &&
5273 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
5274 if (block_rsv != global_rsv &&
5275 !block_rsv_use_bytes(global_rsv, orig_bytes))
5276 ret = 0;
5277 }
5278 if (ret == -ENOSPC) {
5279 trace_btrfs_space_reservation(fs_info, "space_info:enospc",
5280 block_rsv->space_info->flags,
5281 orig_bytes, 1);
5282
5283 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
5284 dump_space_info(fs_info, block_rsv->space_info,
5285 orig_bytes, 0);
5286 }
5287 return ret;
5288}
5289
5290static struct btrfs_block_rsv *get_block_rsv(
5291 const struct btrfs_trans_handle *trans,
5292 const struct btrfs_root *root)
5293{
5294 struct btrfs_fs_info *fs_info = root->fs_info;
5295 struct btrfs_block_rsv *block_rsv = NULL;
5296
5297 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
5298 (root == fs_info->csum_root && trans->adding_csums) ||
5299 (root == fs_info->uuid_root))
5300 block_rsv = trans->block_rsv;
5301
5302 if (!block_rsv)
5303 block_rsv = root->block_rsv;
5304
5305 if (!block_rsv)
5306 block_rsv = &fs_info->empty_block_rsv;
5307
5308 return block_rsv;
5309}
5310
5311static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
5312 u64 num_bytes)
5313{
5314 int ret = -ENOSPC;
5315 spin_lock(&block_rsv->lock);
5316 if (block_rsv->reserved >= num_bytes) {
5317 block_rsv->reserved -= num_bytes;
5318 if (block_rsv->reserved < block_rsv->size)
5319 block_rsv->full = 0;
5320 ret = 0;
5321 }
5322 spin_unlock(&block_rsv->lock);
5323 return ret;
5324}
5325
5326static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
5327 u64 num_bytes, bool update_size)
5328{
5329 spin_lock(&block_rsv->lock);
5330 block_rsv->reserved += num_bytes;
5331 if (update_size)
5332 block_rsv->size += num_bytes;
5333 else if (block_rsv->reserved >= block_rsv->size)
5334 block_rsv->full = 1;
5335 spin_unlock(&block_rsv->lock);
5336}
5337
5338int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
5339 struct btrfs_block_rsv *dest, u64 num_bytes,
5340 int min_factor)
5341{
5342 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5343 u64 min_bytes;
5344
5345 if (global_rsv->space_info != dest->space_info)
5346 return -ENOSPC;
5347
5348 spin_lock(&global_rsv->lock);
5349 min_bytes = div_factor(global_rsv->size, min_factor);
5350 if (global_rsv->reserved < min_bytes + num_bytes) {
5351 spin_unlock(&global_rsv->lock);
5352 return -ENOSPC;
5353 }
5354 global_rsv->reserved -= num_bytes;
5355 if (global_rsv->reserved < global_rsv->size)
5356 global_rsv->full = 0;
5357 spin_unlock(&global_rsv->lock);
5358
5359 block_rsv_add_bytes(dest, num_bytes, true);
5360 return 0;
5361}
5362
5363/**
5364 * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv.
5365 * @fs_info - the fs info for our fs.
5366 * @src - the source block rsv to transfer from.
5367 * @num_bytes - the number of bytes to transfer.
5368 *
5369 * This transfers up to the num_bytes amount from the src rsv to the
5370 * delayed_refs_rsv. Any extra bytes are returned to the space info.
5371 */
5372void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
5373 struct btrfs_block_rsv *src,
5374 u64 num_bytes)
5375{
5376 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
5377 u64 to_free = 0;
5378
5379 spin_lock(&src->lock);
5380 src->reserved -= num_bytes;
5381 src->size -= num_bytes;
5382 spin_unlock(&src->lock);
5383
5384 spin_lock(&delayed_refs_rsv->lock);
5385 if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
5386 u64 delta = delayed_refs_rsv->size -
5387 delayed_refs_rsv->reserved;
5388 if (num_bytes > delta) {
5389 to_free = num_bytes - delta;
5390 num_bytes = delta;
5391 }
5392 } else {
5393 to_free = num_bytes;
5394 num_bytes = 0;
5395 }
5396
5397 if (num_bytes)
5398 delayed_refs_rsv->reserved += num_bytes;
5399 if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
5400 delayed_refs_rsv->full = 1;
5401 spin_unlock(&delayed_refs_rsv->lock);
5402
5403 if (num_bytes)
5404 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
5405 0, num_bytes, 1);
5406 if (to_free)
5407 space_info_add_old_bytes(fs_info, delayed_refs_rsv->space_info,
5408 to_free);
5409}
5410
5411/**
5412 * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage.
5413 * @fs_info - the fs_info for our fs.
5414 * @flush - control how we can flush for this reservation.
5415 *
5416 * This will refill the delayed block_rsv up to 1 items size worth of space and
5417 * will return -ENOSPC if we can't make the reservation.
5418 */
5419int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
5420 enum btrfs_reserve_flush_enum flush)
5421{
5422 struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
5423 u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1);
5424 u64 num_bytes = 0;
5425 int ret = -ENOSPC;
5426
5427 spin_lock(&block_rsv->lock);
5428 if (block_rsv->reserved < block_rsv->size) {
5429 num_bytes = block_rsv->size - block_rsv->reserved;
5430 num_bytes = min(num_bytes, limit);
5431 }
5432 spin_unlock(&block_rsv->lock);
5433
5434 if (!num_bytes)
5435 return 0;
5436
5437 ret = reserve_metadata_bytes(fs_info->extent_root, block_rsv,
5438 num_bytes, flush);
5439 if (ret)
5440 return ret;
5441 block_rsv_add_bytes(block_rsv, num_bytes, 0);
5442 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
5443 0, num_bytes, 1);
5444 return 0;
5445}
5446
5447/*
5448 * This is for space we already have accounted in space_info->bytes_may_use, so
5449 * basically when we're returning space from block_rsv's.
5450 */
5451static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
5452 struct btrfs_space_info *space_info,
5453 u64 num_bytes)
5454{
5455 struct reserve_ticket *ticket;
5456 struct list_head *head;
5457 u64 used;
5458 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
5459 bool check_overcommit = false;
5460
5461 spin_lock(&space_info->lock);
5462 head = &space_info->priority_tickets;
5463
5464 /*
5465 * If we are over our limit then we need to check and see if we can
5466 * overcommit, and if we can't then we just need to free up our space
5467 * and not satisfy any requests.
5468 */
5469 used = btrfs_space_info_used(space_info, true);
5470 if (used - num_bytes >= space_info->total_bytes)
5471 check_overcommit = true;
5472again:
5473 while (!list_empty(head) && num_bytes) {
5474 ticket = list_first_entry(head, struct reserve_ticket,
5475 list);
5476 /*
5477 * We use 0 bytes because this space is already reserved, so
5478 * adding the ticket space would be a double count.
5479 */
5480 if (check_overcommit &&
5481 !can_overcommit(fs_info, space_info, 0, flush, false))
5482 break;
5483 if (num_bytes >= ticket->bytes) {
5484 list_del_init(&ticket->list);
5485 num_bytes -= ticket->bytes;
5486 ticket->bytes = 0;
5487 space_info->tickets_id++;
5488 wake_up(&ticket->wait);
5489 } else {
5490 ticket->bytes -= num_bytes;
5491 num_bytes = 0;
5492 }
5493 }
5494
5495 if (num_bytes && head == &space_info->priority_tickets) {
5496 head = &space_info->tickets;
5497 flush = BTRFS_RESERVE_FLUSH_ALL;
5498 goto again;
5499 }
5500 update_bytes_may_use(space_info, -num_bytes);
5501 trace_btrfs_space_reservation(fs_info, "space_info",
5502 space_info->flags, num_bytes, 0);
5503 spin_unlock(&space_info->lock);
5504}
5505
5506/*
5507 * This is for newly allocated space that isn't accounted in
5508 * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent
5509 * we use this helper.
5510 */
5511static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
5512 struct btrfs_space_info *space_info,
5513 u64 num_bytes)
5514{
5515 struct reserve_ticket *ticket;
5516 struct list_head *head = &space_info->priority_tickets;
5517
5518again:
5519 while (!list_empty(head) && num_bytes) {
5520 ticket = list_first_entry(head, struct reserve_ticket,
5521 list);
5522 if (num_bytes >= ticket->bytes) {
5523 trace_btrfs_space_reservation(fs_info, "space_info",
5524 space_info->flags,
5525 ticket->bytes, 1);
5526 list_del_init(&ticket->list);
5527 num_bytes -= ticket->bytes;
5528 update_bytes_may_use(space_info, ticket->bytes);
5529 ticket->bytes = 0;
5530 space_info->tickets_id++;
5531 wake_up(&ticket->wait);
5532 } else {
5533 trace_btrfs_space_reservation(fs_info, "space_info",
5534 space_info->flags,
5535 num_bytes, 1);
5536 update_bytes_may_use(space_info, num_bytes);
5537 ticket->bytes -= num_bytes;
5538 num_bytes = 0;
5539 }
5540 }
5541
5542 if (num_bytes && head == &space_info->priority_tickets) {
5543 head = &space_info->tickets;
5544 goto again;
5545 }
5546}
5547
5548static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5549 struct btrfs_block_rsv *block_rsv,
5550 struct btrfs_block_rsv *dest, u64 num_bytes,
5551 u64 *qgroup_to_release_ret)
5552{
5553 struct btrfs_space_info *space_info = block_rsv->space_info;
5554 u64 qgroup_to_release = 0;
5555 u64 ret;
5556
5557 spin_lock(&block_rsv->lock);
5558 if (num_bytes == (u64)-1) {
5559 num_bytes = block_rsv->size;
5560 qgroup_to_release = block_rsv->qgroup_rsv_size;
5561 }
5562 block_rsv->size -= num_bytes;
5563 if (block_rsv->reserved >= block_rsv->size) {
5564 num_bytes = block_rsv->reserved - block_rsv->size;
5565 block_rsv->reserved = block_rsv->size;
5566 block_rsv->full = 1;
5567 } else {
5568 num_bytes = 0;
5569 }
5570 if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
5571 qgroup_to_release = block_rsv->qgroup_rsv_reserved -
5572 block_rsv->qgroup_rsv_size;
5573 block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
5574 } else {
5575 qgroup_to_release = 0;
5576 }
5577 spin_unlock(&block_rsv->lock);
5578
5579 ret = num_bytes;
5580 if (num_bytes > 0) {
5581 if (dest) {
5582 spin_lock(&dest->lock);
5583 if (!dest->full) {
5584 u64 bytes_to_add;
5585
5586 bytes_to_add = dest->size - dest->reserved;
5587 bytes_to_add = min(num_bytes, bytes_to_add);
5588 dest->reserved += bytes_to_add;
5589 if (dest->reserved >= dest->size)
5590 dest->full = 1;
5591 num_bytes -= bytes_to_add;
5592 }
5593 spin_unlock(&dest->lock);
5594 }
5595 if (num_bytes)
5596 space_info_add_old_bytes(fs_info, space_info,
5597 num_bytes);
5598 }
5599 if (qgroup_to_release_ret)
5600 *qgroup_to_release_ret = qgroup_to_release;
5601 return ret;
5602}
5603
5604int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
5605 struct btrfs_block_rsv *dst, u64 num_bytes,
5606 bool update_size)
5607{
5608 int ret;
5609
5610 ret = block_rsv_use_bytes(src, num_bytes);
5611 if (ret)
5612 return ret;
5613
5614 block_rsv_add_bytes(dst, num_bytes, update_size);
5615 return 0;
5616}
5617
5618void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
5619{
5620 memset(rsv, 0, sizeof(*rsv));
5621 spin_lock_init(&rsv->lock);
5622 rsv->type = type;
5623}
5624
5625void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
5626 struct btrfs_block_rsv *rsv,
5627 unsigned short type)
5628{
5629 btrfs_init_block_rsv(rsv, type);
5630 rsv->space_info = __find_space_info(fs_info,
5631 BTRFS_BLOCK_GROUP_METADATA);
5632}
5633
5634struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
5635 unsigned short type)
5636{
5637 struct btrfs_block_rsv *block_rsv;
5638
5639 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
5640 if (!block_rsv)
5641 return NULL;
5642
5643 btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
5644 return block_rsv;
5645}
5646
5647void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
5648 struct btrfs_block_rsv *rsv)
5649{
5650 if (!rsv)
5651 return;
5652 btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
5653 kfree(rsv);
5654}
5655
5656int btrfs_block_rsv_add(struct btrfs_root *root,
5657 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
5658 enum btrfs_reserve_flush_enum flush)
5659{
5660 int ret;
5661
5662 if (num_bytes == 0)
5663 return 0;
5664
5665 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5666 if (!ret)
5667 block_rsv_add_bytes(block_rsv, num_bytes, true);
5668
5669 return ret;
5670}
5671
5672int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
5673{
5674 u64 num_bytes = 0;
5675 int ret = -ENOSPC;
5676
5677 if (!block_rsv)
5678 return 0;
5679
5680 spin_lock(&block_rsv->lock);
5681 num_bytes = div_factor(block_rsv->size, min_factor);
5682 if (block_rsv->reserved >= num_bytes)
5683 ret = 0;
5684 spin_unlock(&block_rsv->lock);
5685
5686 return ret;
5687}
5688
5689int btrfs_block_rsv_refill(struct btrfs_root *root,
5690 struct btrfs_block_rsv *block_rsv, u64 min_reserved,
5691 enum btrfs_reserve_flush_enum flush)
5692{
5693 u64 num_bytes = 0;
5694 int ret = -ENOSPC;
5695
5696 if (!block_rsv)
5697 return 0;
5698
5699 spin_lock(&block_rsv->lock);
5700 num_bytes = min_reserved;
5701 if (block_rsv->reserved >= num_bytes)
5702 ret = 0;
5703 else
5704 num_bytes -= block_rsv->reserved;
5705 spin_unlock(&block_rsv->lock);
5706
5707 if (!ret)
5708 return 0;
5709
5710 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5711 if (!ret) {
5712 block_rsv_add_bytes(block_rsv, num_bytes, false);
5713 return 0;
5714 }
5715
5716 return ret;
5717}
5718
5719static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
5720 struct btrfs_block_rsv *block_rsv,
5721 u64 num_bytes, u64 *qgroup_to_release)
5722{
5723 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5724 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
5725 struct btrfs_block_rsv *target = delayed_rsv;
5726
5727 if (target->full || target == block_rsv)
5728 target = global_rsv;
5729
5730 if (block_rsv->space_info != target->space_info)
5731 target = NULL;
5732
5733 return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes,
5734 qgroup_to_release);
5735}
5736
5737void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
5738 struct btrfs_block_rsv *block_rsv,
5739 u64 num_bytes)
5740{
5741 __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
5742}
5743
5744/**
5745 * btrfs_inode_rsv_release - release any excessive reservation.
5746 * @inode - the inode we need to release from.
5747 * @qgroup_free - free or convert qgroup meta.
5748 * Unlike normal operation, qgroup meta reservation needs to know if we are
5749 * freeing qgroup reservation or just converting it into per-trans. Normally
5750 * @qgroup_free is true for error handling, and false for normal release.
5751 *
5752 * This is the same as btrfs_block_rsv_release, except that it handles the
5753 * tracepoint for the reservation.
5754 */
5755static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
5756{
5757 struct btrfs_fs_info *fs_info = inode->root->fs_info;
5758 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
5759 u64 released = 0;
5760 u64 qgroup_to_release = 0;
5761
5762 /*
5763 * Since we statically set the block_rsv->size we just want to say we
5764 * are releasing 0 bytes, and then we'll just get the reservation over
5765 * the size free'd.
5766 */
5767 released = __btrfs_block_rsv_release(fs_info, block_rsv, 0,
5768 &qgroup_to_release);
5769 if (released > 0)
5770 trace_btrfs_space_reservation(fs_info, "delalloc",
5771 btrfs_ino(inode), released, 0);
5772 if (qgroup_free)
5773 btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
5774 else
5775 btrfs_qgroup_convert_reserved_meta(inode->root,
5776 qgroup_to_release);
5777}
5778
5779/**
5780 * btrfs_delayed_refs_rsv_release - release a ref head's reservation.
5781 * @fs_info - the fs_info for our fs.
5782 * @nr - the number of items to drop.
5783 *
5784 * This drops the delayed ref head's count from the delayed refs rsv and frees
5785 * any excess reservation we had.
5786 */
5787void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
5788{
5789 struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
5790 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5791 u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr);
5792 u64 released = 0;
5793
5794 released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv,
5795 num_bytes, NULL);
5796 if (released)
5797 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
5798 0, released, 0);
5799}
5800
5801static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
5802{
5803 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
5804 struct btrfs_space_info *sinfo = block_rsv->space_info;
5805 u64 num_bytes;
5806
5807 /*
5808 * The global block rsv is based on the size of the extent tree, the
5809 * checksum tree and the root tree. If the fs is empty we want to set
5810 * it to a minimal amount for safety.
5811 */
5812 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
5813 btrfs_root_used(&fs_info->csum_root->root_item) +
5814 btrfs_root_used(&fs_info->tree_root->root_item);
5815 num_bytes = max_t(u64, num_bytes, SZ_16M);
5816
5817 spin_lock(&sinfo->lock);
5818 spin_lock(&block_rsv->lock);
5819
5820 block_rsv->size = min_t(u64, num_bytes, SZ_512M);
5821
5822 if (block_rsv->reserved < block_rsv->size) {
5823 num_bytes = btrfs_space_info_used(sinfo, true);
5824 if (sinfo->total_bytes > num_bytes) {
5825 num_bytes = sinfo->total_bytes - num_bytes;
5826 num_bytes = min(num_bytes,
5827 block_rsv->size - block_rsv->reserved);
5828 block_rsv->reserved += num_bytes;
5829 update_bytes_may_use(sinfo, num_bytes);
5830 trace_btrfs_space_reservation(fs_info, "space_info",
5831 sinfo->flags, num_bytes,
5832 1);
5833 }
5834 } else if (block_rsv->reserved > block_rsv->size) {
5835 num_bytes = block_rsv->reserved - block_rsv->size;
5836 update_bytes_may_use(sinfo, -num_bytes);
5837 trace_btrfs_space_reservation(fs_info, "space_info",
5838 sinfo->flags, num_bytes, 0);
5839 block_rsv->reserved = block_rsv->size;
5840 }
5841
5842 if (block_rsv->reserved == block_rsv->size)
5843 block_rsv->full = 1;
5844 else
5845 block_rsv->full = 0;
5846
5847 spin_unlock(&block_rsv->lock);
5848 spin_unlock(&sinfo->lock);
5849}
5850
5851static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
5852{
5853 struct btrfs_space_info *space_info;
5854
5855 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
5856 fs_info->chunk_block_rsv.space_info = space_info;
5857
5858 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5859 fs_info->global_block_rsv.space_info = space_info;
5860 fs_info->trans_block_rsv.space_info = space_info;
5861 fs_info->empty_block_rsv.space_info = space_info;
5862 fs_info->delayed_block_rsv.space_info = space_info;
5863 fs_info->delayed_refs_rsv.space_info = space_info;
5864
5865 fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
5866 fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
5867 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
5868 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
5869 if (fs_info->quota_root)
5870 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
5871 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
5872
5873 update_global_block_rsv(fs_info);
5874}
5875
5876static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
5877{
5878 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
5879 (u64)-1, NULL);
5880 WARN_ON(fs_info->trans_block_rsv.size > 0);
5881 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
5882 WARN_ON(fs_info->chunk_block_rsv.size > 0);
5883 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
5884 WARN_ON(fs_info->delayed_block_rsv.size > 0);
5885 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
5886 WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
5887 WARN_ON(fs_info->delayed_refs_rsv.size > 0);
5888}
5889
5890/*
5891 * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
5892 * @trans - the trans that may have generated delayed refs
5893 *
5894 * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
5895 * it'll calculate the additional size and add it to the delayed_refs_rsv.
5896 */
5897void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
5898{
5899 struct btrfs_fs_info *fs_info = trans->fs_info;
5900 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
5901 u64 num_bytes;
5902
5903 if (!trans->delayed_ref_updates)
5904 return;
5905
5906 num_bytes = btrfs_calc_trans_metadata_size(fs_info,
5907 trans->delayed_ref_updates);
5908 spin_lock(&delayed_rsv->lock);
5909 delayed_rsv->size += num_bytes;
5910 delayed_rsv->full = 0;
5911 spin_unlock(&delayed_rsv->lock);
5912 trans->delayed_ref_updates = 0;
5913}
5914
5915/*
5916 * To be called after all the new block groups attached to the transaction
5917 * handle have been created (btrfs_create_pending_block_groups()).
5918 */
5919void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
5920{
5921 struct btrfs_fs_info *fs_info = trans->fs_info;
5922
5923 if (!trans->chunk_bytes_reserved)
5924 return;
5925
5926 WARN_ON_ONCE(!list_empty(&trans->new_bgs));
5927
5928 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
5929 trans->chunk_bytes_reserved, NULL);
5930 trans->chunk_bytes_reserved = 0;
5931}
5932
5933/*
5934 * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation
5935 * root: the root of the parent directory
5936 * rsv: block reservation
5937 * items: the number of items that we need do reservation
5938 * use_global_rsv: allow fallback to the global block reservation
5939 *
5940 * This function is used to reserve the space for snapshot/subvolume
5941 * creation and deletion. Those operations are different with the
5942 * common file/directory operations, they change two fs/file trees
5943 * and root tree, the number of items that the qgroup reserves is
5944 * different with the free space reservation. So we can not use
5945 * the space reservation mechanism in start_transaction().
5946 */
5947int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
5948 struct btrfs_block_rsv *rsv, int items,
5949 bool use_global_rsv)
5950{
5951 u64 qgroup_num_bytes = 0;
5952 u64 num_bytes;
5953 int ret;
5954 struct btrfs_fs_info *fs_info = root->fs_info;
5955 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5956
5957 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
5958 /* One for parent inode, two for dir entries */
5959 qgroup_num_bytes = 3 * fs_info->nodesize;
5960 ret = btrfs_qgroup_reserve_meta_prealloc(root,
5961 qgroup_num_bytes, true);
5962 if (ret)
5963 return ret;
5964 }
5965
5966 num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
5967 rsv->space_info = __find_space_info(fs_info,
5968 BTRFS_BLOCK_GROUP_METADATA);
5969 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
5970 BTRFS_RESERVE_FLUSH_ALL);
5971
5972 if (ret == -ENOSPC && use_global_rsv)
5973 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true);
5974
5975 if (ret && qgroup_num_bytes)
5976 btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
5977
5978 return ret;
5979}
5980
5981void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
5982 struct btrfs_block_rsv *rsv)
5983{
5984 btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
5985}
5986
5987static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
5988 struct btrfs_inode *inode)
5989{
5990 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
5991 u64 reserve_size = 0;
5992 u64 qgroup_rsv_size = 0;
5993 u64 csum_leaves;
5994 unsigned outstanding_extents;
5995
5996 lockdep_assert_held(&inode->lock);
5997 outstanding_extents = inode->outstanding_extents;
5998 if (outstanding_extents)
5999 reserve_size = btrfs_calc_trans_metadata_size(fs_info,
6000 outstanding_extents + 1);
6001 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
6002 inode->csum_bytes);
6003 reserve_size += btrfs_calc_trans_metadata_size(fs_info,
6004 csum_leaves);
6005 /*
6006 * For qgroup rsv, the calculation is very simple:
6007 * account one nodesize for each outstanding extent
6008 *
6009 * This is overestimating in most cases.
6010 */
6011 qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize;
6012
6013 spin_lock(&block_rsv->lock);
6014 block_rsv->size = reserve_size;
6015 block_rsv->qgroup_rsv_size = qgroup_rsv_size;
6016 spin_unlock(&block_rsv->lock);
6017}
6018
6019static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
6020 u64 num_bytes, u64 *meta_reserve,
6021 u64 *qgroup_reserve)
6022{
6023 u64 nr_extents = count_max_extents(num_bytes);
6024 u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
6025
6026 /* We add one for the inode update at finish ordered time */
6027 *meta_reserve = btrfs_calc_trans_metadata_size(fs_info,
6028 nr_extents + csum_leaves + 1);
6029 *qgroup_reserve = nr_extents * fs_info->nodesize;
6030}
6031
6032int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
6033{
6034 struct btrfs_root *root = inode->root;
6035 struct btrfs_fs_info *fs_info = root->fs_info;
6036 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
6037 u64 meta_reserve, qgroup_reserve;
6038 unsigned nr_extents;
6039 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
6040 int ret = 0;
6041 bool delalloc_lock = true;
6042
6043 /* If we are a free space inode we need to not flush since we will be in
6044 * the middle of a transaction commit. We also don't need the delalloc
6045 * mutex since we won't race with anybody. We need this mostly to make
6046 * lockdep shut its filthy mouth.
6047 *
6048 * If we have a transaction open (can happen if we call truncate_block
6049 * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
6050 */
6051 if (btrfs_is_free_space_inode(inode)) {
6052 flush = BTRFS_RESERVE_NO_FLUSH;
6053 delalloc_lock = false;
6054 } else {
6055 if (current->journal_info)
6056 flush = BTRFS_RESERVE_FLUSH_LIMIT;
6057
6058 if (btrfs_transaction_in_commit(fs_info))
6059 schedule_timeout(1);
6060 }
6061
6062 if (delalloc_lock)
6063 mutex_lock(&inode->delalloc_mutex);
6064
6065 num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
6066
6067 /*
6068 * We always want to do it this way, every other way is wrong and ends
6069 * in tears. Pre-reserving the amount we are going to add will always
6070 * be the right way, because otherwise if we have enough parallelism we
6071 * could end up with thousands of inodes all holding little bits of
6072 * reservations they were able to make previously and the only way to
6073 * reclaim that space is to ENOSPC out the operations and clear
6074 * everything out and try again, which is bad. This way we just
6075 * over-reserve slightly, and clean up the mess when we are done.
6076 */
6077 calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
6078 &qgroup_reserve);
6079 ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
6080 if (ret)
6081 goto out_fail;
6082 ret = reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
6083 if (ret)
6084 goto out_qgroup;
6085
6086 /*
6087 * Now we need to update our outstanding extents and csum bytes _first_
6088 * and then add the reservation to the block_rsv. This keeps us from
6089 * racing with an ordered completion or some such that would think it
6090 * needs to free the reservation we just made.
6091 */
6092 spin_lock(&inode->lock);
6093 nr_extents = count_max_extents(num_bytes);
6094 btrfs_mod_outstanding_extents(inode, nr_extents);
6095 inode->csum_bytes += num_bytes;
6096 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6097 spin_unlock(&inode->lock);
6098
6099 /* Now we can safely add our space to our block rsv */
6100 block_rsv_add_bytes(block_rsv, meta_reserve, false);
6101 trace_btrfs_space_reservation(root->fs_info, "delalloc",
6102 btrfs_ino(inode), meta_reserve, 1);
6103
6104 spin_lock(&block_rsv->lock);
6105 block_rsv->qgroup_rsv_reserved += qgroup_reserve;
6106 spin_unlock(&block_rsv->lock);
6107
6108 if (delalloc_lock)
6109 mutex_unlock(&inode->delalloc_mutex);
6110 return 0;
6111out_qgroup:
6112 btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
6113out_fail:
6114 btrfs_inode_rsv_release(inode, true);
6115 if (delalloc_lock)
6116 mutex_unlock(&inode->delalloc_mutex);
6117 return ret;
6118}
6119
6120/**
6121 * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
6122 * @inode: the inode to release the reservation for.
6123 * @num_bytes: the number of bytes we are releasing.
6124 * @qgroup_free: free qgroup reservation or convert it to per-trans reservation
6125 *
6126 * This will release the metadata reservation for an inode. This can be called
6127 * once we complete IO for a given set of bytes to release their metadata
6128 * reservations, or on error for the same reason.
6129 */
6130void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
6131 bool qgroup_free)
6132{
6133 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6134
6135 num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
6136 spin_lock(&inode->lock);
6137 inode->csum_bytes -= num_bytes;
6138 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6139 spin_unlock(&inode->lock);
6140
6141 if (btrfs_is_testing(fs_info))
6142 return;
6143
6144 btrfs_inode_rsv_release(inode, qgroup_free);
6145}
6146
6147/**
6148 * btrfs_delalloc_release_extents - release our outstanding_extents
6149 * @inode: the inode to balance the reservation for.
6150 * @num_bytes: the number of bytes we originally reserved with
6151 * @qgroup_free: do we need to free qgroup meta reservation or convert them.
6152 *
6153 * When we reserve space we increase outstanding_extents for the extents we may
6154 * add. Once we've set the range as delalloc or created our ordered extents we
6155 * have outstanding_extents to track the real usage, so we use this to free our
6156 * temporarily tracked outstanding_extents. This _must_ be used in conjunction
6157 * with btrfs_delalloc_reserve_metadata.
6158 */
6159void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
6160 bool qgroup_free)
6161{
6162 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6163 unsigned num_extents;
6164
6165 spin_lock(&inode->lock);
6166 num_extents = count_max_extents(num_bytes);
6167 btrfs_mod_outstanding_extents(inode, -num_extents);
6168 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6169 spin_unlock(&inode->lock);
6170
6171 if (btrfs_is_testing(fs_info))
6172 return;
6173
6174 btrfs_inode_rsv_release(inode, qgroup_free);
6175}
6176
6177/**
6178 * btrfs_delalloc_reserve_space - reserve data and metadata space for
6179 * delalloc
6180 * @inode: inode we're writing to
6181 * @start: start range we are writing to
6182 * @len: how long the range we are writing to
6183 * @reserved: mandatory parameter, record actually reserved qgroup ranges of
6184 * current reservation.
6185 *
6186 * This will do the following things
6187 *
6188 * o reserve space in data space info for num bytes
6189 * and reserve precious corresponding qgroup space
6190 * (Done in check_data_free_space)
6191 *
6192 * o reserve space for metadata space, based on the number of outstanding
6193 * extents and how much csums will be needed
6194 * also reserve metadata space in a per root over-reserve method.
6195 * o add to the inodes->delalloc_bytes
6196 * o add it to the fs_info's delalloc inodes list.
6197 * (Above 3 all done in delalloc_reserve_metadata)
6198 *
6199 * Return 0 for success
6200 * Return <0 for error(-ENOSPC or -EQUOT)
6201 */
6202int btrfs_delalloc_reserve_space(struct inode *inode,
6203 struct extent_changeset **reserved, u64 start, u64 len)
6204{
6205 int ret;
6206
6207 ret = btrfs_check_data_free_space(inode, reserved, start, len);
6208 if (ret < 0)
6209 return ret;
6210 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
6211 if (ret < 0)
6212 btrfs_free_reserved_data_space(inode, *reserved, start, len);
6213 return ret;
6214}
6215
6216/**
6217 * btrfs_delalloc_release_space - release data and metadata space for delalloc
6218 * @inode: inode we're releasing space for
6219 * @start: start position of the space already reserved
6220 * @len: the len of the space already reserved
6221 * @release_bytes: the len of the space we consumed or didn't use
6222 *
6223 * This function will release the metadata space that was not used and will
6224 * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
6225 * list if there are no delalloc bytes left.
6226 * Also it will handle the qgroup reserved space.
6227 */
6228void btrfs_delalloc_release_space(struct inode *inode,
6229 struct extent_changeset *reserved,
6230 u64 start, u64 len, bool qgroup_free)
6231{
6232 btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
6233 btrfs_free_reserved_data_space(inode, reserved, start, len);
6234}
6235
6236static int update_block_group(struct btrfs_trans_handle *trans, 4108static int update_block_group(struct btrfs_trans_handle *trans,
6237 u64 bytenr, u64 num_bytes, int alloc) 4109 u64 bytenr, u64 num_bytes, int alloc)
6238{ 4110{
@@ -6296,7 +4168,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
6296 old_val -= num_bytes; 4168 old_val -= num_bytes;
6297 btrfs_set_block_group_used(&cache->item, old_val); 4169 btrfs_set_block_group_used(&cache->item, old_val);
6298 cache->pinned += num_bytes; 4170 cache->pinned += num_bytes;
6299 update_bytes_pinned(cache->space_info, num_bytes); 4171 btrfs_space_info_update_bytes_pinned(info,
4172 cache->space_info, num_bytes);
6300 cache->space_info->bytes_used -= num_bytes; 4173 cache->space_info->bytes_used -= num_bytes;
6301 cache->space_info->disk_used -= num_bytes * factor; 4174 cache->space_info->disk_used -= num_bytes * factor;
6302 spin_unlock(&cache->lock); 4175 spin_unlock(&cache->lock);
@@ -6371,7 +4244,8 @@ static int pin_down_extent(struct btrfs_block_group_cache *cache,
6371 spin_lock(&cache->space_info->lock); 4244 spin_lock(&cache->space_info->lock);
6372 spin_lock(&cache->lock); 4245 spin_lock(&cache->lock);
6373 cache->pinned += num_bytes; 4246 cache->pinned += num_bytes;
6374 update_bytes_pinned(cache->space_info, num_bytes); 4247 btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
4248 num_bytes);
6375 if (reserved) { 4249 if (reserved) {
6376 cache->reserved -= num_bytes; 4250 cache->reserved -= num_bytes;
6377 cache->space_info->bytes_reserved -= num_bytes; 4251 cache->space_info->bytes_reserved -= num_bytes;
@@ -6580,7 +4454,8 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
6580 } else { 4454 } else {
6581 cache->reserved += num_bytes; 4455 cache->reserved += num_bytes;
6582 space_info->bytes_reserved += num_bytes; 4456 space_info->bytes_reserved += num_bytes;
6583 update_bytes_may_use(space_info, -ram_bytes); 4457 btrfs_space_info_update_bytes_may_use(cache->fs_info,
4458 space_info, -ram_bytes);
6584 if (delalloc) 4459 if (delalloc)
6585 cache->delalloc_bytes += num_bytes; 4460 cache->delalloc_bytes += num_bytes;
6586 } 4461 }
@@ -6646,7 +4521,7 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
6646 4521
6647 up_write(&fs_info->commit_root_sem); 4522 up_write(&fs_info->commit_root_sem);
6648 4523
6649 update_global_block_rsv(fs_info); 4524 btrfs_update_global_block_rsv(fs_info);
6650} 4525}
6651 4526
6652/* 4527/*
@@ -6736,7 +4611,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
6736 spin_lock(&space_info->lock); 4611 spin_lock(&space_info->lock);
6737 spin_lock(&cache->lock); 4612 spin_lock(&cache->lock);
6738 cache->pinned -= len; 4613 cache->pinned -= len;
6739 update_bytes_pinned(space_info, -len); 4614 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
6740 4615
6741 trace_btrfs_space_reservation(fs_info, "pinned", 4616 trace_btrfs_space_reservation(fs_info, "pinned",
6742 space_info->flags, len, 0); 4617 space_info->flags, len, 0);
@@ -6757,7 +4632,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
6757 to_add = min(len, global_rsv->size - 4632 to_add = min(len, global_rsv->size -
6758 global_rsv->reserved); 4633 global_rsv->reserved);
6759 global_rsv->reserved += to_add; 4634 global_rsv->reserved += to_add;
6760 update_bytes_may_use(space_info, to_add); 4635 btrfs_space_info_update_bytes_may_use(fs_info,
4636 space_info, to_add);
6761 if (global_rsv->reserved >= global_rsv->size) 4637 if (global_rsv->reserved >= global_rsv->size)
6762 global_rsv->full = 1; 4638 global_rsv->full = 1;
6763 trace_btrfs_space_reservation(fs_info, 4639 trace_btrfs_space_reservation(fs_info,
@@ -6769,8 +4645,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
6769 spin_unlock(&global_rsv->lock); 4645 spin_unlock(&global_rsv->lock);
6770 /* Add to any tickets we may have */ 4646 /* Add to any tickets we may have */
6771 if (len) 4647 if (len)
6772 space_info_add_new_bytes(fs_info, space_info, 4648 btrfs_space_info_add_new_bytes(fs_info,
6773 len); 4649 space_info, len);
6774 } 4650 }
6775 spin_unlock(&space_info->lock); 4651 spin_unlock(&space_info->lock);
6776 } 4652 }
@@ -7191,7 +5067,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
7191 } 5067 }
7192out: 5068out:
7193 if (pin) 5069 if (pin)
7194 add_pinned_bytes(fs_info, &generic_ref, 1); 5070 add_pinned_bytes(fs_info, &generic_ref);
7195 5071
7196 if (last_ref) { 5072 if (last_ref) {
7197 /* 5073 /*
@@ -7239,7 +5115,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
7239 btrfs_ref_tree_mod(fs_info, ref); 5115 btrfs_ref_tree_mod(fs_info, ref);
7240 5116
7241 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) 5117 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
7242 add_pinned_bytes(fs_info, ref, 1); 5118 add_pinned_bytes(fs_info, ref);
7243 5119
7244 return ret; 5120 return ret;
7245} 5121}
@@ -7292,10 +5168,10 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
7292} 5168}
7293 5169
7294enum btrfs_loop_type { 5170enum btrfs_loop_type {
7295 LOOP_CACHING_NOWAIT = 0, 5171 LOOP_CACHING_NOWAIT,
7296 LOOP_CACHING_WAIT = 1, 5172 LOOP_CACHING_WAIT,
7297 LOOP_ALLOC_CHUNK = 2, 5173 LOOP_ALLOC_CHUNK,
7298 LOOP_NO_EMPTY_SIZE = 3, 5174 LOOP_NO_EMPTY_SIZE,
7299}; 5175};
7300 5176
7301static inline void 5177static inline void
@@ -7661,8 +5537,8 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
7661 return ret; 5537 return ret;
7662 } 5538 }
7663 5539
7664 ret = do_chunk_alloc(trans, ffe_ctl->flags, 5540 ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
7665 CHUNK_ALLOC_FORCE); 5541 CHUNK_ALLOC_FORCE);
7666 5542
7667 /* 5543 /*
7668 * If we can't allocate a new chunk we've already looped 5544 * If we can't allocate a new chunk we've already looped
@@ -7758,7 +5634,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
7758 5634
7759 trace_find_free_extent(fs_info, num_bytes, empty_size, flags); 5635 trace_find_free_extent(fs_info, num_bytes, empty_size, flags);
7760 5636
7761 space_info = __find_space_info(fs_info, flags); 5637 space_info = btrfs_find_space_info(fs_info, flags);
7762 if (!space_info) { 5638 if (!space_info) {
7763 btrfs_err(fs_info, "No space info for %llu", flags); 5639 btrfs_err(fs_info, "No space info for %llu", flags);
7764 return -ENOSPC; 5640 return -ENOSPC;
@@ -7863,9 +5739,8 @@ search:
7863 */ 5739 */
7864 if (!block_group_bits(block_group, flags)) { 5740 if (!block_group_bits(block_group, flags)) {
7865 u64 extra = BTRFS_BLOCK_GROUP_DUP | 5741 u64 extra = BTRFS_BLOCK_GROUP_DUP |
7866 BTRFS_BLOCK_GROUP_RAID1 | 5742 BTRFS_BLOCK_GROUP_RAID1_MASK |
7867 BTRFS_BLOCK_GROUP_RAID5 | 5743 BTRFS_BLOCK_GROUP_RAID56_MASK |
7868 BTRFS_BLOCK_GROUP_RAID6 |
7869 BTRFS_BLOCK_GROUP_RAID10; 5744 BTRFS_BLOCK_GROUP_RAID10;
7870 5745
7871 /* 5746 /*
@@ -7984,60 +5859,6 @@ loop:
7984 return ret; 5859 return ret;
7985} 5860}
7986 5861
7987#define DUMP_BLOCK_RSV(fs_info, rsv_name) \
7988do { \
7989 struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \
7990 spin_lock(&__rsv->lock); \
7991 btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \
7992 __rsv->size, __rsv->reserved); \
7993 spin_unlock(&__rsv->lock); \
7994} while (0)
7995
7996static void dump_space_info(struct btrfs_fs_info *fs_info,
7997 struct btrfs_space_info *info, u64 bytes,
7998 int dump_block_groups)
7999{
8000 struct btrfs_block_group_cache *cache;
8001 int index = 0;
8002
8003 spin_lock(&info->lock);
8004 btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
8005 info->flags,
8006 info->total_bytes - btrfs_space_info_used(info, true),
8007 info->full ? "" : "not ");
8008 btrfs_info(fs_info,
8009 "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
8010 info->total_bytes, info->bytes_used, info->bytes_pinned,
8011 info->bytes_reserved, info->bytes_may_use,
8012 info->bytes_readonly);
8013 spin_unlock(&info->lock);
8014
8015 DUMP_BLOCK_RSV(fs_info, global_block_rsv);
8016 DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
8017 DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
8018 DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
8019 DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
8020
8021 if (!dump_block_groups)
8022 return;
8023
8024 down_read(&info->groups_sem);
8025again:
8026 list_for_each_entry(cache, &info->block_groups[index], list) {
8027 spin_lock(&cache->lock);
8028 btrfs_info(fs_info,
8029 "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
8030 cache->key.objectid, cache->key.offset,
8031 btrfs_block_group_used(&cache->item), cache->pinned,
8032 cache->reserved, cache->ro ? "[readonly]" : "");
8033 btrfs_dump_free_space(cache, bytes);
8034 spin_unlock(&cache->lock);
8035 }
8036 if (++index < BTRFS_NR_RAID_TYPES)
8037 goto again;
8038 up_read(&info->groups_sem);
8039}
8040
8041/* 5862/*
8042 * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a 5863 * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
8043 * hole that is at least as big as @num_bytes. 5864 * hole that is at least as big as @num_bytes.
@@ -8113,12 +5934,13 @@ again:
8113 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { 5934 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
8114 struct btrfs_space_info *sinfo; 5935 struct btrfs_space_info *sinfo;
8115 5936
8116 sinfo = __find_space_info(fs_info, flags); 5937 sinfo = btrfs_find_space_info(fs_info, flags);
8117 btrfs_err(fs_info, 5938 btrfs_err(fs_info,
8118 "allocation failed flags %llu, wanted %llu", 5939 "allocation failed flags %llu, wanted %llu",
8119 flags, num_bytes); 5940 flags, num_bytes);
8120 if (sinfo) 5941 if (sinfo)
8121 dump_space_info(fs_info, sinfo, num_bytes, 1); 5942 btrfs_dump_space_info(fs_info, sinfo,
5943 num_bytes, 1);
8122 } 5944 }
8123 } 5945 }
8124 5946
@@ -8456,73 +6278,6 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
8456 return buf; 6278 return buf;
8457} 6279}
8458 6280
8459static struct btrfs_block_rsv *
8460use_block_rsv(struct btrfs_trans_handle *trans,
8461 struct btrfs_root *root, u32 blocksize)
8462{
8463 struct btrfs_fs_info *fs_info = root->fs_info;
8464 struct btrfs_block_rsv *block_rsv;
8465 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
8466 int ret;
8467 bool global_updated = false;
8468
8469 block_rsv = get_block_rsv(trans, root);
8470
8471 if (unlikely(block_rsv->size == 0))
8472 goto try_reserve;
8473again:
8474 ret = block_rsv_use_bytes(block_rsv, blocksize);
8475 if (!ret)
8476 return block_rsv;
8477
8478 if (block_rsv->failfast)
8479 return ERR_PTR(ret);
8480
8481 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
8482 global_updated = true;
8483 update_global_block_rsv(fs_info);
8484 goto again;
8485 }
8486
8487 /*
8488 * The global reserve still exists to save us from ourselves, so don't
8489 * warn_on if we are short on our delayed refs reserve.
8490 */
8491 if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS &&
8492 btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
8493 static DEFINE_RATELIMIT_STATE(_rs,
8494 DEFAULT_RATELIMIT_INTERVAL * 10,
8495 /*DEFAULT_RATELIMIT_BURST*/ 1);
8496 if (__ratelimit(&_rs))
8497 WARN(1, KERN_DEBUG
8498 "BTRFS: block rsv returned %d\n", ret);
8499 }
8500try_reserve:
8501 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
8502 BTRFS_RESERVE_NO_FLUSH);
8503 if (!ret)
8504 return block_rsv;
8505 /*
8506 * If we couldn't reserve metadata bytes try and use some from
8507 * the global reserve if its space type is the same as the global
8508 * reservation.
8509 */
8510 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
8511 block_rsv->space_info == global_rsv->space_info) {
8512 ret = block_rsv_use_bytes(global_rsv, blocksize);
8513 if (!ret)
8514 return global_rsv;
8515 }
8516 return ERR_PTR(ret);
8517}
8518
8519static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
8520 struct btrfs_block_rsv *block_rsv, u32 blocksize)
8521{
8522 block_rsv_add_bytes(block_rsv, blocksize, false);
8523 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
8524}
8525
8526/* 6281/*
8527 * finds a free extent and does all the dirty work required for allocation 6282 * finds a free extent and does all the dirty work required for allocation
8528 * returns the tree buffer or an ERR_PTR on error. 6283 * returns the tree buffer or an ERR_PTR on error.
@@ -8555,7 +6310,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8555 } 6310 }
8556#endif 6311#endif
8557 6312
8558 block_rsv = use_block_rsv(trans, root, blocksize); 6313 block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
8559 if (IS_ERR(block_rsv)) 6314 if (IS_ERR(block_rsv))
8560 return ERR_CAST(block_rsv); 6315 return ERR_CAST(block_rsv);
8561 6316
@@ -8613,7 +6368,7 @@ out_free_buf:
8613out_free_reserved: 6368out_free_reserved:
8614 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0); 6369 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
8615out_unuse: 6370out_unuse:
8616 unuse_block_rsv(fs_info, block_rsv, blocksize); 6371 btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
8617 return ERR_PTR(ret); 6372 return ERR_PTR(ret);
8618} 6373}
8619 6374
@@ -9552,9 +7307,8 @@ static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
9552 7307
9553 num_devices = fs_info->fs_devices->rw_devices; 7308 num_devices = fs_info->fs_devices->rw_devices;
9554 7309
9555 stripped = BTRFS_BLOCK_GROUP_RAID0 | 7310 stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK |
9556 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | 7311 BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10;
9557 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
9558 7312
9559 if (num_devices == 1) { 7313 if (num_devices == 1) {
9560 stripped |= BTRFS_BLOCK_GROUP_DUP; 7314 stripped |= BTRFS_BLOCK_GROUP_DUP;
@@ -9565,7 +7319,7 @@ static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
9565 return stripped; 7319 return stripped;
9566 7320
9567 /* turn mirroring into duplication */ 7321 /* turn mirroring into duplication */
9568 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 7322 if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK |
9569 BTRFS_BLOCK_GROUP_RAID10)) 7323 BTRFS_BLOCK_GROUP_RAID10))
9570 return stripped | BTRFS_BLOCK_GROUP_DUP; 7324 return stripped | BTRFS_BLOCK_GROUP_DUP;
9571 } else { 7325 } else {
@@ -9636,7 +7390,7 @@ out:
9636 btrfs_info(cache->fs_info, 7390 btrfs_info(cache->fs_info,
9637 "sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu", 7391 "sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu",
9638 sinfo_used, num_bytes, min_allocable_bytes); 7392 sinfo_used, num_bytes, min_allocable_bytes);
9639 dump_space_info(cache->fs_info, cache->space_info, 0, 0); 7393 btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
9640 } 7394 }
9641 return ret; 7395 return ret;
9642} 7396}
@@ -9678,8 +7432,7 @@ again:
9678 */ 7432 */
9679 alloc_flags = update_block_group_flags(fs_info, cache->flags); 7433 alloc_flags = update_block_group_flags(fs_info, cache->flags);
9680 if (alloc_flags != cache->flags) { 7434 if (alloc_flags != cache->flags) {
9681 ret = do_chunk_alloc(trans, alloc_flags, 7435 ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
9682 CHUNK_ALLOC_FORCE);
9683 /* 7436 /*
9684 * ENOSPC is allowed here, we may have enough space 7437 * ENOSPC is allowed here, we may have enough space
9685 * already allocated at the new raid level to 7438 * already allocated at the new raid level to
@@ -9695,7 +7448,7 @@ again:
9695 if (!ret) 7448 if (!ret)
9696 goto out; 7449 goto out;
9697 alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags); 7450 alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags);
9698 ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); 7451 ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
9699 if (ret < 0) 7452 if (ret < 0)
9700 goto out; 7453 goto out;
9701 ret = inc_block_group_ro(cache, 0); 7454 ret = inc_block_group_ro(cache, 0);
@@ -9716,7 +7469,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
9716{ 7469{
9717 u64 alloc_flags = get_alloc_profile(trans->fs_info, type); 7470 u64 alloc_flags = get_alloc_profile(trans->fs_info, type);
9718 7471
9719 return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); 7472 return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
9720} 7473}
9721 7474
9722/* 7475/*
@@ -9949,7 +7702,7 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
9949 struct extent_map_tree *em_tree; 7702 struct extent_map_tree *em_tree;
9950 struct extent_map *em; 7703 struct extent_map *em;
9951 7704
9952 em_tree = &root->fs_info->mapping_tree.map_tree; 7705 em_tree = &root->fs_info->mapping_tree;
9953 read_lock(&em_tree->lock); 7706 read_lock(&em_tree->lock);
9954 em = lookup_extent_mapping(em_tree, found_key.objectid, 7707 em = lookup_extent_mapping(em_tree, found_key.objectid,
9955 found_key.offset); 7708 found_key.offset);
@@ -10102,7 +7855,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
10102 */ 7855 */
10103 synchronize_rcu(); 7856 synchronize_rcu();
10104 7857
10105 release_global_block_rsv(info); 7858 btrfs_release_global_block_rsv(info);
10106 7859
10107 while (!list_empty(&info->space_info)) { 7860 while (!list_empty(&info->space_info)) {
10108 int i; 7861 int i;
@@ -10118,7 +7871,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
10118 if (WARN_ON(space_info->bytes_pinned > 0 || 7871 if (WARN_ON(space_info->bytes_pinned > 0 ||
10119 space_info->bytes_reserved > 0 || 7872 space_info->bytes_reserved > 0 ||
10120 space_info->bytes_may_use > 0)) 7873 space_info->bytes_may_use > 0))
10121 dump_space_info(info, space_info, 0, 0); 7874 btrfs_dump_space_info(info, space_info, 0, 0);
10122 list_del(&space_info->list); 7875 list_del(&space_info->list);
10123 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { 7876 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
10124 struct kobject *kobj; 7877 struct kobject *kobj;
@@ -10141,7 +7894,6 @@ void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
10141 struct btrfs_space_info *space_info; 7894 struct btrfs_space_info *space_info;
10142 struct raid_kobject *rkobj; 7895 struct raid_kobject *rkobj;
10143 LIST_HEAD(list); 7896 LIST_HEAD(list);
10144 int index;
10145 int ret = 0; 7897 int ret = 0;
10146 7898
10147 spin_lock(&fs_info->pending_raid_kobjs_lock); 7899 spin_lock(&fs_info->pending_raid_kobjs_lock);
@@ -10149,11 +7901,10 @@ void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
10149 spin_unlock(&fs_info->pending_raid_kobjs_lock); 7901 spin_unlock(&fs_info->pending_raid_kobjs_lock);
10150 7902
10151 list_for_each_entry(rkobj, &list, list) { 7903 list_for_each_entry(rkobj, &list, list) {
10152 space_info = __find_space_info(fs_info, rkobj->flags); 7904 space_info = btrfs_find_space_info(fs_info, rkobj->flags);
10153 index = btrfs_bg_flags_to_raid_index(rkobj->flags);
10154 7905
10155 ret = kobject_add(&rkobj->kobj, &space_info->kobj, 7906 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
10156 "%s", get_raid_name(index)); 7907 "%s", btrfs_bg_type_to_raid_name(rkobj->flags));
10157 if (ret) { 7908 if (ret) {
10158 kobject_put(&rkobj->kobj); 7909 kobject_put(&rkobj->kobj);
10159 break; 7910 break;
@@ -10243,21 +7994,21 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
10243 */ 7994 */
10244static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) 7995static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
10245{ 7996{
10246 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; 7997 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
10247 struct extent_map *em; 7998 struct extent_map *em;
10248 struct btrfs_block_group_cache *bg; 7999 struct btrfs_block_group_cache *bg;
10249 u64 start = 0; 8000 u64 start = 0;
10250 int ret = 0; 8001 int ret = 0;
10251 8002
10252 while (1) { 8003 while (1) {
10253 read_lock(&map_tree->map_tree.lock); 8004 read_lock(&map_tree->lock);
10254 /* 8005 /*
10255 * lookup_extent_mapping will return the first extent map 8006 * lookup_extent_mapping will return the first extent map
10256 * intersecting the range, so setting @len to 1 is enough to 8007 * intersecting the range, so setting @len to 1 is enough to
10257 * get the first chunk. 8008 * get the first chunk.
10258 */ 8009 */
10259 em = lookup_extent_mapping(&map_tree->map_tree, start, 1); 8010 em = lookup_extent_mapping(map_tree, start, 1);
10260 read_unlock(&map_tree->map_tree.lock); 8011 read_unlock(&map_tree->lock);
10261 if (!em) 8012 if (!em)
10262 break; 8013 break;
10263 8014
@@ -10417,9 +8168,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
10417 } 8168 }
10418 8169
10419 trace_btrfs_add_block_group(info, cache, 0); 8170 trace_btrfs_add_block_group(info, cache, 0);
10420 update_space_info(info, cache->flags, found_key.offset, 8171 btrfs_update_space_info(info, cache->flags, found_key.offset,
10421 btrfs_block_group_used(&cache->item), 8172 btrfs_block_group_used(&cache->item),
10422 cache->bytes_super, &space_info); 8173 cache->bytes_super, &space_info);
10423 8174
10424 cache->space_info = space_info; 8175 cache->space_info = space_info;
10425 8176
@@ -10437,9 +8188,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
10437 list_for_each_entry_rcu(space_info, &info->space_info, list) { 8188 list_for_each_entry_rcu(space_info, &info->space_info, list) {
10438 if (!(get_alloc_profile(info, space_info->flags) & 8189 if (!(get_alloc_profile(info, space_info->flags) &
10439 (BTRFS_BLOCK_GROUP_RAID10 | 8190 (BTRFS_BLOCK_GROUP_RAID10 |
10440 BTRFS_BLOCK_GROUP_RAID1 | 8191 BTRFS_BLOCK_GROUP_RAID1_MASK |
10441 BTRFS_BLOCK_GROUP_RAID5 | 8192 BTRFS_BLOCK_GROUP_RAID56_MASK |
10442 BTRFS_BLOCK_GROUP_RAID6 |
10443 BTRFS_BLOCK_GROUP_DUP))) 8193 BTRFS_BLOCK_GROUP_DUP)))
10444 continue; 8194 continue;
10445 /* 8195 /*
@@ -10457,7 +8207,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
10457 } 8207 }
10458 8208
10459 btrfs_add_raid_kobjects(info); 8209 btrfs_add_raid_kobjects(info);
10460 init_global_block_rsv(info); 8210 btrfs_init_global_block_rsv(info);
10461 ret = check_chunk_block_group_mappings(info); 8211 ret = check_chunk_block_group_mappings(info);
10462error: 8212error:
10463 btrfs_free_path(path); 8213 btrfs_free_path(path);
@@ -10554,7 +8304,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
10554 * assigned to our block group. We want our bg to be added to the rbtree 8304 * assigned to our block group. We want our bg to be added to the rbtree
10555 * with its ->space_info set. 8305 * with its ->space_info set.
10556 */ 8306 */
10557 cache->space_info = __find_space_info(fs_info, cache->flags); 8307 cache->space_info = btrfs_find_space_info(fs_info, cache->flags);
10558 ASSERT(cache->space_info); 8308 ASSERT(cache->space_info);
10559 8309
10560 ret = btrfs_add_block_group_cache(fs_info, cache); 8310 ret = btrfs_add_block_group_cache(fs_info, cache);
@@ -10569,9 +8319,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
10569 * the rbtree, update the space info's counters. 8319 * the rbtree, update the space info's counters.
10570 */ 8320 */
10571 trace_btrfs_add_block_group(fs_info, cache, 1); 8321 trace_btrfs_add_block_group(fs_info, cache, 1);
10572 update_space_info(fs_info, cache->flags, size, bytes_used, 8322 btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
10573 cache->bytes_super, &cache->space_info); 8323 cache->bytes_super, &cache->space_info);
10574 update_global_block_rsv(fs_info); 8324 btrfs_update_global_block_rsv(fs_info);
10575 8325
10576 link_block_group(cache); 8326 link_block_group(cache);
10577 8327
@@ -10598,6 +8348,35 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
10598 write_sequnlock(&fs_info->profiles_lock); 8348 write_sequnlock(&fs_info->profiles_lock);
10599} 8349}
10600 8350
8351/*
8352 * Clear incompat bits for the following feature(s):
8353 *
8354 * - RAID56 - in case there's neither RAID5 nor RAID6 profile block group
8355 * in the whole filesystem
8356 */
8357static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
8358{
8359 if (flags & BTRFS_BLOCK_GROUP_RAID56_MASK) {
8360 struct list_head *head = &fs_info->space_info;
8361 struct btrfs_space_info *sinfo;
8362
8363 list_for_each_entry_rcu(sinfo, head, list) {
8364 bool found = false;
8365
8366 down_read(&sinfo->groups_sem);
8367 if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID5]))
8368 found = true;
8369 if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID6]))
8370 found = true;
8371 up_read(&sinfo->groups_sem);
8372
8373 if (found)
8374 return;
8375 }
8376 btrfs_clear_fs_incompat(fs_info, RAID56);
8377 }
8378}
8379
10601int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 8380int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10602 u64 group_start, struct extent_map *em) 8381 u64 group_start, struct extent_map *em)
10603{ 8382{
@@ -10744,6 +8523,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10744 clear_avail_alloc_bits(fs_info, block_group->flags); 8523 clear_avail_alloc_bits(fs_info, block_group->flags);
10745 } 8524 }
10746 up_write(&block_group->space_info->groups_sem); 8525 up_write(&block_group->space_info->groups_sem);
8526 clear_incompat_bg_bits(fs_info, block_group->flags);
10747 if (kobj) { 8527 if (kobj) {
10748 kobject_del(kobj); 8528 kobject_del(kobj);
10749 kobject_put(kobj); 8529 kobject_put(kobj);
@@ -10853,7 +8633,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10853 if (remove_em) { 8633 if (remove_em) {
10854 struct extent_map_tree *em_tree; 8634 struct extent_map_tree *em_tree;
10855 8635
10856 em_tree = &fs_info->mapping_tree.map_tree; 8636 em_tree = &fs_info->mapping_tree;
10857 write_lock(&em_tree->lock); 8637 write_lock(&em_tree->lock);
10858 remove_extent_mapping(em_tree, em); 8638 remove_extent_mapping(em_tree, em);
10859 write_unlock(&em_tree->lock); 8639 write_unlock(&em_tree->lock);
@@ -10871,7 +8651,7 @@ struct btrfs_trans_handle *
10871btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, 8651btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
10872 const u64 chunk_offset) 8652 const u64 chunk_offset)
10873{ 8653{
10874 struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; 8654 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
10875 struct extent_map *em; 8655 struct extent_map *em;
10876 struct map_lookup *map; 8656 struct map_lookup *map;
10877 unsigned int num_items; 8657 unsigned int num_items;
@@ -11020,7 +8800,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
11020 spin_lock(&space_info->lock); 8800 spin_lock(&space_info->lock);
11021 spin_lock(&block_group->lock); 8801 spin_lock(&block_group->lock);
11022 8802
11023 update_bytes_pinned(space_info, -block_group->pinned); 8803 btrfs_space_info_update_bytes_pinned(fs_info, space_info,
8804 -block_group->pinned);
11024 space_info->bytes_readonly += block_group->pinned; 8805 space_info->bytes_readonly += block_group->pinned;
11025 percpu_counter_add_batch(&space_info->total_bytes_pinned, 8806 percpu_counter_add_batch(&space_info->total_bytes_pinned,
11026 -block_group->pinned, 8807 -block_group->pinned,
@@ -11076,43 +8857,6 @@ next:
11076 spin_unlock(&fs_info->unused_bgs_lock); 8857 spin_unlock(&fs_info->unused_bgs_lock);
11077} 8858}
11078 8859
11079int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
11080{
11081 struct btrfs_super_block *disk_super;
11082 u64 features;
11083 u64 flags;
11084 int mixed = 0;
11085 int ret;
11086
11087 disk_super = fs_info->super_copy;
11088 if (!btrfs_super_root(disk_super))
11089 return -EINVAL;
11090
11091 features = btrfs_super_incompat_flags(disk_super);
11092 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
11093 mixed = 1;
11094
11095 flags = BTRFS_BLOCK_GROUP_SYSTEM;
11096 ret = create_space_info(fs_info, flags);
11097 if (ret)
11098 goto out;
11099
11100 if (mixed) {
11101 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
11102 ret = create_space_info(fs_info, flags);
11103 } else {
11104 flags = BTRFS_BLOCK_GROUP_METADATA;
11105 ret = create_space_info(fs_info, flags);
11106 if (ret)
11107 goto out;
11108
11109 flags = BTRFS_BLOCK_GROUP_DATA;
11110 ret = create_space_info(fs_info, flags);
11111 }
11112out:
11113 return ret;
11114}
11115
11116int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, 8860int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
11117 u64 start, u64 end) 8861 u64 start, u64 end)
11118{ 8862{
@@ -11171,12 +8915,17 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
11171 find_first_clear_extent_bit(&device->alloc_state, start, 8915 find_first_clear_extent_bit(&device->alloc_state, start,
11172 &start, &end, 8916 &start, &end,
11173 CHUNK_TRIMMED | CHUNK_ALLOCATED); 8917 CHUNK_TRIMMED | CHUNK_ALLOCATED);
8918
8919 /* Ensure we skip the reserved area in the first 1M */
8920 start = max_t(u64, start, SZ_1M);
8921
11174 /* 8922 /*
11175 * If find_first_clear_extent_bit find a range that spans the 8923 * If find_first_clear_extent_bit find a range that spans the
11176 * end of the device it will set end to -1, in this case it's up 8924 * end of the device it will set end to -1, in this case it's up
11177 * to the caller to trim the value to the size of the device. 8925 * to the caller to trim the value to the size of the device.
11178 */ 8926 */
11179 end = min(end, device->total_bytes - 1); 8927 end = min(end, device->total_bytes - 1);
8928
11180 len = end - start + 1; 8929 len = end - start + 1;
11181 8930
11182 /* We didn't find any extents */ 8931 /* We didn't find any extents */