diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 2503 |
1 files changed, 126 insertions, 2377 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5faf057f6f37..d3b58e388535 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -28,46 +28,12 @@ | |||
28 | #include "sysfs.h" | 28 | #include "sysfs.h" |
29 | #include "qgroup.h" | 29 | #include "qgroup.h" |
30 | #include "ref-verify.h" | 30 | #include "ref-verify.h" |
31 | #include "space-info.h" | ||
32 | #include "block-rsv.h" | ||
33 | #include "delalloc-space.h" | ||
31 | 34 | ||
32 | #undef SCRAMBLE_DELAYED_REFS | 35 | #undef SCRAMBLE_DELAYED_REFS |
33 | 36 | ||
34 | /* | ||
35 | * control flags for do_chunk_alloc's force field | ||
36 | * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk | ||
37 | * if we really need one. | ||
38 | * | ||
39 | * CHUNK_ALLOC_LIMITED means to only try and allocate one | ||
40 | * if we have very few chunks already allocated. This is | ||
41 | * used as part of the clustering code to help make sure | ||
42 | * we have a good pool of storage to cluster in, without | ||
43 | * filling the FS with empty chunks | ||
44 | * | ||
45 | * CHUNK_ALLOC_FORCE means it must try to allocate one | ||
46 | * | ||
47 | */ | ||
48 | enum { | ||
49 | CHUNK_ALLOC_NO_FORCE = 0, | ||
50 | CHUNK_ALLOC_LIMITED = 1, | ||
51 | CHUNK_ALLOC_FORCE = 2, | ||
52 | }; | ||
53 | |||
54 | /* | ||
55 | * Declare a helper function to detect underflow of various space info members | ||
56 | */ | ||
57 | #define DECLARE_SPACE_INFO_UPDATE(name) \ | ||
58 | static inline void update_##name(struct btrfs_space_info *sinfo, \ | ||
59 | s64 bytes) \ | ||
60 | { \ | ||
61 | if (bytes < 0 && sinfo->name < -bytes) { \ | ||
62 | WARN_ON(1); \ | ||
63 | sinfo->name = 0; \ | ||
64 | return; \ | ||
65 | } \ | ||
66 | sinfo->name += bytes; \ | ||
67 | } | ||
68 | |||
69 | DECLARE_SPACE_INFO_UPDATE(bytes_may_use); | ||
70 | DECLARE_SPACE_INFO_UPDATE(bytes_pinned); | ||
71 | 37 | ||
72 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 38 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
73 | struct btrfs_delayed_ref_node *node, u64 parent, | 39 | struct btrfs_delayed_ref_node *node, u64 parent, |
@@ -84,21 +50,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
84 | static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | 50 | static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, |
85 | struct btrfs_delayed_ref_node *node, | 51 | struct btrfs_delayed_ref_node *node, |
86 | struct btrfs_delayed_extent_op *extent_op); | 52 | struct btrfs_delayed_extent_op *extent_op); |
87 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, | ||
88 | int force); | ||
89 | static int find_next_key(struct btrfs_path *path, int level, | 53 | static int find_next_key(struct btrfs_path *path, int level, |
90 | struct btrfs_key *key); | 54 | struct btrfs_key *key); |
91 | static void dump_space_info(struct btrfs_fs_info *fs_info, | ||
92 | struct btrfs_space_info *info, u64 bytes, | ||
93 | int dump_block_groups); | ||
94 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | ||
95 | u64 num_bytes); | ||
96 | static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, | ||
97 | struct btrfs_space_info *space_info, | ||
98 | u64 num_bytes); | ||
99 | static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info, | ||
100 | struct btrfs_space_info *space_info, | ||
101 | u64 num_bytes); | ||
102 | 55 | ||
103 | static noinline int | 56 | static noinline int |
104 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 57 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
@@ -737,62 +690,39 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group( | |||
737 | return block_group_cache_tree_search(info, bytenr, 1); | 690 | return block_group_cache_tree_search(info, bytenr, 1); |
738 | } | 691 | } |
739 | 692 | ||
740 | static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | 693 | static u64 generic_ref_to_space_flags(struct btrfs_ref *ref) |
741 | u64 flags) | ||
742 | { | 694 | { |
743 | struct list_head *head = &info->space_info; | 695 | if (ref->type == BTRFS_REF_METADATA) { |
744 | struct btrfs_space_info *found; | 696 | if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID) |
745 | 697 | return BTRFS_BLOCK_GROUP_SYSTEM; | |
746 | flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; | 698 | else |
747 | 699 | return BTRFS_BLOCK_GROUP_METADATA; | |
748 | rcu_read_lock(); | ||
749 | list_for_each_entry_rcu(found, head, list) { | ||
750 | if (found->flags & flags) { | ||
751 | rcu_read_unlock(); | ||
752 | return found; | ||
753 | } | ||
754 | } | 700 | } |
755 | rcu_read_unlock(); | 701 | return BTRFS_BLOCK_GROUP_DATA; |
756 | return NULL; | ||
757 | } | 702 | } |
758 | 703 | ||
759 | static void add_pinned_bytes(struct btrfs_fs_info *fs_info, | 704 | static void add_pinned_bytes(struct btrfs_fs_info *fs_info, |
760 | struct btrfs_ref *ref, int sign) | 705 | struct btrfs_ref *ref) |
761 | { | 706 | { |
762 | struct btrfs_space_info *space_info; | 707 | struct btrfs_space_info *space_info; |
763 | s64 num_bytes; | 708 | u64 flags = generic_ref_to_space_flags(ref); |
764 | u64 flags; | ||
765 | |||
766 | ASSERT(sign == 1 || sign == -1); | ||
767 | num_bytes = sign * ref->len; | ||
768 | if (ref->type == BTRFS_REF_METADATA) { | ||
769 | if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID) | ||
770 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
771 | else | ||
772 | flags = BTRFS_BLOCK_GROUP_METADATA; | ||
773 | } else { | ||
774 | flags = BTRFS_BLOCK_GROUP_DATA; | ||
775 | } | ||
776 | 709 | ||
777 | space_info = __find_space_info(fs_info, flags); | 710 | space_info = btrfs_find_space_info(fs_info, flags); |
778 | ASSERT(space_info); | 711 | ASSERT(space_info); |
779 | percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes, | 712 | percpu_counter_add_batch(&space_info->total_bytes_pinned, ref->len, |
780 | BTRFS_TOTAL_BYTES_PINNED_BATCH); | 713 | BTRFS_TOTAL_BYTES_PINNED_BATCH); |
781 | } | 714 | } |
782 | 715 | ||
783 | /* | 716 | static void sub_pinned_bytes(struct btrfs_fs_info *fs_info, |
784 | * after adding space to the filesystem, we need to clear the full flags | 717 | struct btrfs_ref *ref) |
785 | * on all the space infos. | ||
786 | */ | ||
787 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info) | ||
788 | { | 718 | { |
789 | struct list_head *head = &info->space_info; | 719 | struct btrfs_space_info *space_info; |
790 | struct btrfs_space_info *found; | 720 | u64 flags = generic_ref_to_space_flags(ref); |
791 | 721 | ||
792 | rcu_read_lock(); | 722 | space_info = btrfs_find_space_info(fs_info, flags); |
793 | list_for_each_entry_rcu(found, head, list) | 723 | ASSERT(space_info); |
794 | found->full = 0; | 724 | percpu_counter_add_batch(&space_info->total_bytes_pinned, -ref->len, |
795 | rcu_read_unlock(); | 725 | BTRFS_TOTAL_BYTES_PINNED_BATCH); |
796 | } | 726 | } |
797 | 727 | ||
798 | /* simple helper to search for an existing data extent at a given offset */ | 728 | /* simple helper to search for an existing data extent at a given offset */ |
@@ -1121,11 +1051,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) | |||
1121 | __le64 lenum; | 1051 | __le64 lenum; |
1122 | 1052 | ||
1123 | lenum = cpu_to_le64(root_objectid); | 1053 | lenum = cpu_to_le64(root_objectid); |
1124 | high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); | 1054 | high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum)); |
1125 | lenum = cpu_to_le64(owner); | 1055 | lenum = cpu_to_le64(owner); |
1126 | low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); | 1056 | low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); |
1127 | lenum = cpu_to_le64(offset); | 1057 | lenum = cpu_to_le64(offset); |
1128 | low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); | 1058 | low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); |
1129 | 1059 | ||
1130 | return ((u64)high_crc << 31) ^ (u64)low_crc; | 1060 | return ((u64)high_crc << 31) ^ (u64)low_crc; |
1131 | } | 1061 | } |
@@ -2065,7 +1995,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
2065 | btrfs_ref_tree_mod(fs_info, generic_ref); | 1995 | btrfs_ref_tree_mod(fs_info, generic_ref); |
2066 | 1996 | ||
2067 | if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) | 1997 | if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) |
2068 | add_pinned_bytes(fs_info, generic_ref, -1); | 1998 | sub_pinned_bytes(fs_info, generic_ref); |
2069 | 1999 | ||
2070 | return ret; | 2000 | return ret; |
2071 | } | 2001 | } |
@@ -2462,7 +2392,7 @@ void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, | |||
2462 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | 2392 | flags = BTRFS_BLOCK_GROUP_SYSTEM; |
2463 | else | 2393 | else |
2464 | flags = BTRFS_BLOCK_GROUP_METADATA; | 2394 | flags = BTRFS_BLOCK_GROUP_METADATA; |
2465 | space_info = __find_space_info(fs_info, flags); | 2395 | space_info = btrfs_find_space_info(fs_info, flags); |
2466 | ASSERT(space_info); | 2396 | ASSERT(space_info); |
2467 | percpu_counter_add_batch(&space_info->total_bytes_pinned, | 2397 | percpu_counter_add_batch(&space_info->total_bytes_pinned, |
2468 | -head->num_bytes, | 2398 | -head->num_bytes, |
@@ -2824,49 +2754,6 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes) | |||
2824 | return num_csums; | 2754 | return num_csums; |
2825 | } | 2755 | } |
2826 | 2756 | ||
2827 | bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info) | ||
2828 | { | ||
2829 | struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; | ||
2830 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
2831 | bool ret = false; | ||
2832 | u64 reserved; | ||
2833 | |||
2834 | spin_lock(&global_rsv->lock); | ||
2835 | reserved = global_rsv->reserved; | ||
2836 | spin_unlock(&global_rsv->lock); | ||
2837 | |||
2838 | /* | ||
2839 | * Since the global reserve is just kind of magic we don't really want | ||
2840 | * to rely on it to save our bacon, so if our size is more than the | ||
2841 | * delayed_refs_rsv and the global rsv then it's time to think about | ||
2842 | * bailing. | ||
2843 | */ | ||
2844 | spin_lock(&delayed_refs_rsv->lock); | ||
2845 | reserved += delayed_refs_rsv->reserved; | ||
2846 | if (delayed_refs_rsv->size >= reserved) | ||
2847 | ret = true; | ||
2848 | spin_unlock(&delayed_refs_rsv->lock); | ||
2849 | return ret; | ||
2850 | } | ||
2851 | |||
2852 | int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans) | ||
2853 | { | ||
2854 | u64 num_entries = | ||
2855 | atomic_read(&trans->transaction->delayed_refs.num_entries); | ||
2856 | u64 avg_runtime; | ||
2857 | u64 val; | ||
2858 | |||
2859 | smp_mb(); | ||
2860 | avg_runtime = trans->fs_info->avg_delayed_ref_runtime; | ||
2861 | val = num_entries * avg_runtime; | ||
2862 | if (val >= NSEC_PER_SEC) | ||
2863 | return 1; | ||
2864 | if (val >= NSEC_PER_SEC / 2) | ||
2865 | return 2; | ||
2866 | |||
2867 | return btrfs_check_space_for_delayed_refs(trans->fs_info); | ||
2868 | } | ||
2869 | |||
2870 | /* | 2757 | /* |
2871 | * this starts processing the delayed reference count updates and | 2758 | * this starts processing the delayed reference count updates and |
2872 | * extent insertions we have queued up so far. count can be | 2759 | * extent insertions we have queued up so far. count can be |
@@ -3834,93 +3721,6 @@ void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg) | |||
3834 | wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers)); | 3721 | wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers)); |
3835 | } | 3722 | } |
3836 | 3723 | ||
3837 | static const char *alloc_name(u64 flags) | ||
3838 | { | ||
3839 | switch (flags) { | ||
3840 | case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA: | ||
3841 | return "mixed"; | ||
3842 | case BTRFS_BLOCK_GROUP_METADATA: | ||
3843 | return "metadata"; | ||
3844 | case BTRFS_BLOCK_GROUP_DATA: | ||
3845 | return "data"; | ||
3846 | case BTRFS_BLOCK_GROUP_SYSTEM: | ||
3847 | return "system"; | ||
3848 | default: | ||
3849 | WARN_ON(1); | ||
3850 | return "invalid-combination"; | ||
3851 | }; | ||
3852 | } | ||
3853 | |||
3854 | static int create_space_info(struct btrfs_fs_info *info, u64 flags) | ||
3855 | { | ||
3856 | |||
3857 | struct btrfs_space_info *space_info; | ||
3858 | int i; | ||
3859 | int ret; | ||
3860 | |||
3861 | space_info = kzalloc(sizeof(*space_info), GFP_NOFS); | ||
3862 | if (!space_info) | ||
3863 | return -ENOMEM; | ||
3864 | |||
3865 | ret = percpu_counter_init(&space_info->total_bytes_pinned, 0, | ||
3866 | GFP_KERNEL); | ||
3867 | if (ret) { | ||
3868 | kfree(space_info); | ||
3869 | return ret; | ||
3870 | } | ||
3871 | |||
3872 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) | ||
3873 | INIT_LIST_HEAD(&space_info->block_groups[i]); | ||
3874 | init_rwsem(&space_info->groups_sem); | ||
3875 | spin_lock_init(&space_info->lock); | ||
3876 | space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; | ||
3877 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; | ||
3878 | init_waitqueue_head(&space_info->wait); | ||
3879 | INIT_LIST_HEAD(&space_info->ro_bgs); | ||
3880 | INIT_LIST_HEAD(&space_info->tickets); | ||
3881 | INIT_LIST_HEAD(&space_info->priority_tickets); | ||
3882 | |||
3883 | ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype, | ||
3884 | info->space_info_kobj, "%s", | ||
3885 | alloc_name(space_info->flags)); | ||
3886 | if (ret) { | ||
3887 | kobject_put(&space_info->kobj); | ||
3888 | return ret; | ||
3889 | } | ||
3890 | |||
3891 | list_add_rcu(&space_info->list, &info->space_info); | ||
3892 | if (flags & BTRFS_BLOCK_GROUP_DATA) | ||
3893 | info->data_sinfo = space_info; | ||
3894 | |||
3895 | return ret; | ||
3896 | } | ||
3897 | |||
3898 | static void update_space_info(struct btrfs_fs_info *info, u64 flags, | ||
3899 | u64 total_bytes, u64 bytes_used, | ||
3900 | u64 bytes_readonly, | ||
3901 | struct btrfs_space_info **space_info) | ||
3902 | { | ||
3903 | struct btrfs_space_info *found; | ||
3904 | int factor; | ||
3905 | |||
3906 | factor = btrfs_bg_type_to_factor(flags); | ||
3907 | |||
3908 | found = __find_space_info(info, flags); | ||
3909 | ASSERT(found); | ||
3910 | spin_lock(&found->lock); | ||
3911 | found->total_bytes += total_bytes; | ||
3912 | found->disk_total += total_bytes * factor; | ||
3913 | found->bytes_used += bytes_used; | ||
3914 | found->disk_used += bytes_used * factor; | ||
3915 | found->bytes_readonly += bytes_readonly; | ||
3916 | if (total_bytes > 0) | ||
3917 | found->full = 0; | ||
3918 | space_info_add_new_bytes(info, found, total_bytes - | ||
3919 | bytes_used - bytes_readonly); | ||
3920 | spin_unlock(&found->lock); | ||
3921 | *space_info = found; | ||
3922 | } | ||
3923 | |||
3924 | static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | 3724 | static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) |
3925 | { | 3725 | { |
3926 | u64 extra_flags = chunk_to_extended(flags) & | 3726 | u64 extra_flags = chunk_to_extended(flags) & |
@@ -4068,215 +3868,6 @@ u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info) | |||
4068 | return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | 3868 | return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); |
4069 | } | 3869 | } |
4070 | 3870 | ||
4071 | static u64 btrfs_space_info_used(struct btrfs_space_info *s_info, | ||
4072 | bool may_use_included) | ||
4073 | { | ||
4074 | ASSERT(s_info); | ||
4075 | return s_info->bytes_used + s_info->bytes_reserved + | ||
4076 | s_info->bytes_pinned + s_info->bytes_readonly + | ||
4077 | (may_use_included ? s_info->bytes_may_use : 0); | ||
4078 | } | ||
4079 | |||
4080 | int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes) | ||
4081 | { | ||
4082 | struct btrfs_root *root = inode->root; | ||
4083 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
4084 | struct btrfs_space_info *data_sinfo = fs_info->data_sinfo; | ||
4085 | u64 used; | ||
4086 | int ret = 0; | ||
4087 | int need_commit = 2; | ||
4088 | int have_pinned_space; | ||
4089 | |||
4090 | /* make sure bytes are sectorsize aligned */ | ||
4091 | bytes = ALIGN(bytes, fs_info->sectorsize); | ||
4092 | |||
4093 | if (btrfs_is_free_space_inode(inode)) { | ||
4094 | need_commit = 0; | ||
4095 | ASSERT(current->journal_info); | ||
4096 | } | ||
4097 | |||
4098 | again: | ||
4099 | /* make sure we have enough space to handle the data first */ | ||
4100 | spin_lock(&data_sinfo->lock); | ||
4101 | used = btrfs_space_info_used(data_sinfo, true); | ||
4102 | |||
4103 | if (used + bytes > data_sinfo->total_bytes) { | ||
4104 | struct btrfs_trans_handle *trans; | ||
4105 | |||
4106 | /* | ||
4107 | * if we don't have enough free bytes in this space then we need | ||
4108 | * to alloc a new chunk. | ||
4109 | */ | ||
4110 | if (!data_sinfo->full) { | ||
4111 | u64 alloc_target; | ||
4112 | |||
4113 | data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; | ||
4114 | spin_unlock(&data_sinfo->lock); | ||
4115 | |||
4116 | alloc_target = btrfs_data_alloc_profile(fs_info); | ||
4117 | /* | ||
4118 | * It is ugly that we don't call nolock join | ||
4119 | * transaction for the free space inode case here. | ||
4120 | * But it is safe because we only do the data space | ||
4121 | * reservation for the free space cache in the | ||
4122 | * transaction context, the common join transaction | ||
4123 | * just increase the counter of the current transaction | ||
4124 | * handler, doesn't try to acquire the trans_lock of | ||
4125 | * the fs. | ||
4126 | */ | ||
4127 | trans = btrfs_join_transaction(root); | ||
4128 | if (IS_ERR(trans)) | ||
4129 | return PTR_ERR(trans); | ||
4130 | |||
4131 | ret = do_chunk_alloc(trans, alloc_target, | ||
4132 | CHUNK_ALLOC_NO_FORCE); | ||
4133 | btrfs_end_transaction(trans); | ||
4134 | if (ret < 0) { | ||
4135 | if (ret != -ENOSPC) | ||
4136 | return ret; | ||
4137 | else { | ||
4138 | have_pinned_space = 1; | ||
4139 | goto commit_trans; | ||
4140 | } | ||
4141 | } | ||
4142 | |||
4143 | goto again; | ||
4144 | } | ||
4145 | |||
4146 | /* | ||
4147 | * If we don't have enough pinned space to deal with this | ||
4148 | * allocation, and no removed chunk in current transaction, | ||
4149 | * don't bother committing the transaction. | ||
4150 | */ | ||
4151 | have_pinned_space = __percpu_counter_compare( | ||
4152 | &data_sinfo->total_bytes_pinned, | ||
4153 | used + bytes - data_sinfo->total_bytes, | ||
4154 | BTRFS_TOTAL_BYTES_PINNED_BATCH); | ||
4155 | spin_unlock(&data_sinfo->lock); | ||
4156 | |||
4157 | /* commit the current transaction and try again */ | ||
4158 | commit_trans: | ||
4159 | if (need_commit) { | ||
4160 | need_commit--; | ||
4161 | |||
4162 | if (need_commit > 0) { | ||
4163 | btrfs_start_delalloc_roots(fs_info, -1); | ||
4164 | btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, | ||
4165 | (u64)-1); | ||
4166 | } | ||
4167 | |||
4168 | trans = btrfs_join_transaction(root); | ||
4169 | if (IS_ERR(trans)) | ||
4170 | return PTR_ERR(trans); | ||
4171 | if (have_pinned_space >= 0 || | ||
4172 | test_bit(BTRFS_TRANS_HAVE_FREE_BGS, | ||
4173 | &trans->transaction->flags) || | ||
4174 | need_commit > 0) { | ||
4175 | ret = btrfs_commit_transaction(trans); | ||
4176 | if (ret) | ||
4177 | return ret; | ||
4178 | /* | ||
4179 | * The cleaner kthread might still be doing iput | ||
4180 | * operations. Wait for it to finish so that | ||
4181 | * more space is released. We don't need to | ||
4182 | * explicitly run the delayed iputs here because | ||
4183 | * the commit_transaction would have woken up | ||
4184 | * the cleaner. | ||
4185 | */ | ||
4186 | ret = btrfs_wait_on_delayed_iputs(fs_info); | ||
4187 | if (ret) | ||
4188 | return ret; | ||
4189 | goto again; | ||
4190 | } else { | ||
4191 | btrfs_end_transaction(trans); | ||
4192 | } | ||
4193 | } | ||
4194 | |||
4195 | trace_btrfs_space_reservation(fs_info, | ||
4196 | "space_info:enospc", | ||
4197 | data_sinfo->flags, bytes, 1); | ||
4198 | return -ENOSPC; | ||
4199 | } | ||
4200 | update_bytes_may_use(data_sinfo, bytes); | ||
4201 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
4202 | data_sinfo->flags, bytes, 1); | ||
4203 | spin_unlock(&data_sinfo->lock); | ||
4204 | |||
4205 | return 0; | ||
4206 | } | ||
4207 | |||
4208 | int btrfs_check_data_free_space(struct inode *inode, | ||
4209 | struct extent_changeset **reserved, u64 start, u64 len) | ||
4210 | { | ||
4211 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | ||
4212 | int ret; | ||
4213 | |||
4214 | /* align the range */ | ||
4215 | len = round_up(start + len, fs_info->sectorsize) - | ||
4216 | round_down(start, fs_info->sectorsize); | ||
4217 | start = round_down(start, fs_info->sectorsize); | ||
4218 | |||
4219 | ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len); | ||
4220 | if (ret < 0) | ||
4221 | return ret; | ||
4222 | |||
4223 | /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ | ||
4224 | ret = btrfs_qgroup_reserve_data(inode, reserved, start, len); | ||
4225 | if (ret < 0) | ||
4226 | btrfs_free_reserved_data_space_noquota(inode, start, len); | ||
4227 | else | ||
4228 | ret = 0; | ||
4229 | return ret; | ||
4230 | } | ||
4231 | |||
4232 | /* | ||
4233 | * Called if we need to clear a data reservation for this inode | ||
4234 | * Normally in a error case. | ||
4235 | * | ||
4236 | * This one will *NOT* use accurate qgroup reserved space API, just for case | ||
4237 | * which we can't sleep and is sure it won't affect qgroup reserved space. | ||
4238 | * Like clear_bit_hook(). | ||
4239 | */ | ||
4240 | void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start, | ||
4241 | u64 len) | ||
4242 | { | ||
4243 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | ||
4244 | struct btrfs_space_info *data_sinfo; | ||
4245 | |||
4246 | /* Make sure the range is aligned to sectorsize */ | ||
4247 | len = round_up(start + len, fs_info->sectorsize) - | ||
4248 | round_down(start, fs_info->sectorsize); | ||
4249 | start = round_down(start, fs_info->sectorsize); | ||
4250 | |||
4251 | data_sinfo = fs_info->data_sinfo; | ||
4252 | spin_lock(&data_sinfo->lock); | ||
4253 | update_bytes_may_use(data_sinfo, -len); | ||
4254 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
4255 | data_sinfo->flags, len, 0); | ||
4256 | spin_unlock(&data_sinfo->lock); | ||
4257 | } | ||
4258 | |||
4259 | /* | ||
4260 | * Called if we need to clear a data reservation for this inode | ||
4261 | * Normally in a error case. | ||
4262 | * | ||
4263 | * This one will handle the per-inode data rsv map for accurate reserved | ||
4264 | * space framework. | ||
4265 | */ | ||
4266 | void btrfs_free_reserved_data_space(struct inode *inode, | ||
4267 | struct extent_changeset *reserved, u64 start, u64 len) | ||
4268 | { | ||
4269 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
4270 | |||
4271 | /* Make sure the range is aligned to sectorsize */ | ||
4272 | len = round_up(start + len, root->fs_info->sectorsize) - | ||
4273 | round_down(start, root->fs_info->sectorsize); | ||
4274 | start = round_down(start, root->fs_info->sectorsize); | ||
4275 | |||
4276 | btrfs_free_reserved_data_space_noquota(inode, start, len); | ||
4277 | btrfs_qgroup_free_data(inode, reserved, start, len); | ||
4278 | } | ||
4279 | |||
4280 | static void force_metadata_allocation(struct btrfs_fs_info *info) | 3871 | static void force_metadata_allocation(struct btrfs_fs_info *info) |
4281 | { | 3872 | { |
4282 | struct list_head *head = &info->space_info; | 3873 | struct list_head *head = &info->space_info; |
@@ -4290,11 +3881,6 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) | |||
4290 | rcu_read_unlock(); | 3881 | rcu_read_unlock(); |
4291 | } | 3882 | } |
4292 | 3883 | ||
4293 | static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global) | ||
4294 | { | ||
4295 | return (global->size << 1); | ||
4296 | } | ||
4297 | |||
4298 | static int should_alloc_chunk(struct btrfs_fs_info *fs_info, | 3884 | static int should_alloc_chunk(struct btrfs_fs_info *fs_info, |
4299 | struct btrfs_space_info *sinfo, int force) | 3885 | struct btrfs_space_info *sinfo, int force) |
4300 | { | 3886 | { |
@@ -4325,15 +3911,9 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type) | |||
4325 | { | 3911 | { |
4326 | u64 num_dev; | 3912 | u64 num_dev; |
4327 | 3913 | ||
4328 | if (type & (BTRFS_BLOCK_GROUP_RAID10 | | 3914 | num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max; |
4329 | BTRFS_BLOCK_GROUP_RAID0 | | 3915 | if (!num_dev) |
4330 | BTRFS_BLOCK_GROUP_RAID5 | | ||
4331 | BTRFS_BLOCK_GROUP_RAID6)) | ||
4332 | num_dev = fs_info->fs_devices->rw_devices; | 3916 | num_dev = fs_info->fs_devices->rw_devices; |
4333 | else if (type & BTRFS_BLOCK_GROUP_RAID1) | ||
4334 | num_dev = 2; | ||
4335 | else | ||
4336 | num_dev = 1; /* DUP or single */ | ||
4337 | 3917 | ||
4338 | return num_dev; | 3918 | return num_dev; |
4339 | } | 3919 | } |
@@ -4358,7 +3938,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) | |||
4358 | */ | 3938 | */ |
4359 | lockdep_assert_held(&fs_info->chunk_mutex); | 3939 | lockdep_assert_held(&fs_info->chunk_mutex); |
4360 | 3940 | ||
4361 | info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | 3941 | info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); |
4362 | spin_lock(&info->lock); | 3942 | spin_lock(&info->lock); |
4363 | left = info->total_bytes - btrfs_space_info_used(info, true); | 3943 | left = info->total_bytes - btrfs_space_info_used(info, true); |
4364 | spin_unlock(&info->lock); | 3944 | spin_unlock(&info->lock); |
@@ -4372,7 +3952,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) | |||
4372 | if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { | 3952 | if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { |
4373 | btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu", | 3953 | btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu", |
4374 | left, thresh, type); | 3954 | left, thresh, type); |
4375 | dump_space_info(fs_info, info, 0, 0); | 3955 | btrfs_dump_space_info(fs_info, info, 0, 0); |
4376 | } | 3956 | } |
4377 | 3957 | ||
4378 | if (left < thresh) { | 3958 | if (left < thresh) { |
@@ -4405,8 +3985,8 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) | |||
4405 | * - return 1 if it successfully allocates a chunk, | 3985 | * - return 1 if it successfully allocates a chunk, |
4406 | * - return errors including -ENOSPC otherwise. | 3986 | * - return errors including -ENOSPC otherwise. |
4407 | */ | 3987 | */ |
4408 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, | 3988 | int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, |
4409 | int force) | 3989 | enum btrfs_chunk_alloc_enum force) |
4410 | { | 3990 | { |
4411 | struct btrfs_fs_info *fs_info = trans->fs_info; | 3991 | struct btrfs_fs_info *fs_info = trans->fs_info; |
4412 | struct btrfs_space_info *space_info; | 3992 | struct btrfs_space_info *space_info; |
@@ -4418,7 +3998,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, | |||
4418 | if (trans->allocating_chunk) | 3998 | if (trans->allocating_chunk) |
4419 | return -ENOSPC; | 3999 | return -ENOSPC; |
4420 | 4000 | ||
4421 | space_info = __find_space_info(fs_info, flags); | 4001 | space_info = btrfs_find_space_info(fs_info, flags); |
4422 | ASSERT(space_info); | 4002 | ASSERT(space_info); |
4423 | 4003 | ||
4424 | do { | 4004 | do { |
@@ -4525,1714 +4105,6 @@ out: | |||
4525 | return ret; | 4105 | return ret; |
4526 | } | 4106 | } |
4527 | 4107 | ||
4528 | static int can_overcommit(struct btrfs_fs_info *fs_info, | ||
4529 | struct btrfs_space_info *space_info, u64 bytes, | ||
4530 | enum btrfs_reserve_flush_enum flush, | ||
4531 | bool system_chunk) | ||
4532 | { | ||
4533 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
4534 | u64 profile; | ||
4535 | u64 space_size; | ||
4536 | u64 avail; | ||
4537 | u64 used; | ||
4538 | int factor; | ||
4539 | |||
4540 | /* Don't overcommit when in mixed mode. */ | ||
4541 | if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) | ||
4542 | return 0; | ||
4543 | |||
4544 | if (system_chunk) | ||
4545 | profile = btrfs_system_alloc_profile(fs_info); | ||
4546 | else | ||
4547 | profile = btrfs_metadata_alloc_profile(fs_info); | ||
4548 | |||
4549 | used = btrfs_space_info_used(space_info, false); | ||
4550 | |||
4551 | /* | ||
4552 | * We only want to allow over committing if we have lots of actual space | ||
4553 | * free, but if we don't have enough space to handle the global reserve | ||
4554 | * space then we could end up having a real enospc problem when trying | ||
4555 | * to allocate a chunk or some other such important allocation. | ||
4556 | */ | ||
4557 | spin_lock(&global_rsv->lock); | ||
4558 | space_size = calc_global_rsv_need_space(global_rsv); | ||
4559 | spin_unlock(&global_rsv->lock); | ||
4560 | if (used + space_size >= space_info->total_bytes) | ||
4561 | return 0; | ||
4562 | |||
4563 | used += space_info->bytes_may_use; | ||
4564 | |||
4565 | avail = atomic64_read(&fs_info->free_chunk_space); | ||
4566 | |||
4567 | /* | ||
4568 | * If we have dup, raid1 or raid10 then only half of the free | ||
4569 | * space is actually usable. For raid56, the space info used | ||
4570 | * doesn't include the parity drive, so we don't have to | ||
4571 | * change the math | ||
4572 | */ | ||
4573 | factor = btrfs_bg_type_to_factor(profile); | ||
4574 | avail = div_u64(avail, factor); | ||
4575 | |||
4576 | /* | ||
4577 | * If we aren't flushing all things, let us overcommit up to | ||
4578 | * 1/2th of the space. If we can flush, don't let us overcommit | ||
4579 | * too much, let it overcommit up to 1/8 of the space. | ||
4580 | */ | ||
4581 | if (flush == BTRFS_RESERVE_FLUSH_ALL) | ||
4582 | avail >>= 3; | ||
4583 | else | ||
4584 | avail >>= 1; | ||
4585 | |||
4586 | if (used + bytes < space_info->total_bytes + avail) | ||
4587 | return 1; | ||
4588 | return 0; | ||
4589 | } | ||
4590 | |||
4591 | static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info, | ||
4592 | unsigned long nr_pages, int nr_items) | ||
4593 | { | ||
4594 | struct super_block *sb = fs_info->sb; | ||
4595 | |||
4596 | if (down_read_trylock(&sb->s_umount)) { | ||
4597 | writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE); | ||
4598 | up_read(&sb->s_umount); | ||
4599 | } else { | ||
4600 | /* | ||
4601 | * We needn't worry the filesystem going from r/w to r/o though | ||
4602 | * we don't acquire ->s_umount mutex, because the filesystem | ||
4603 | * should guarantee the delalloc inodes list be empty after | ||
4604 | * the filesystem is readonly(all dirty pages are written to | ||
4605 | * the disk). | ||
4606 | */ | ||
4607 | btrfs_start_delalloc_roots(fs_info, nr_items); | ||
4608 | if (!current->journal_info) | ||
4609 | btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1); | ||
4610 | } | ||
4611 | } | ||
4612 | |||
4613 | static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info, | ||
4614 | u64 to_reclaim) | ||
4615 | { | ||
4616 | u64 bytes; | ||
4617 | u64 nr; | ||
4618 | |||
4619 | bytes = btrfs_calc_trans_metadata_size(fs_info, 1); | ||
4620 | nr = div64_u64(to_reclaim, bytes); | ||
4621 | if (!nr) | ||
4622 | nr = 1; | ||
4623 | return nr; | ||
4624 | } | ||
4625 | |||
4626 | #define EXTENT_SIZE_PER_ITEM SZ_256K | ||
4627 | |||
4628 | /* | ||
4629 | * shrink metadata reservation for delalloc | ||
4630 | */ | ||
4631 | static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, | ||
4632 | u64 orig, bool wait_ordered) | ||
4633 | { | ||
4634 | struct btrfs_space_info *space_info; | ||
4635 | struct btrfs_trans_handle *trans; | ||
4636 | u64 delalloc_bytes; | ||
4637 | u64 dio_bytes; | ||
4638 | u64 async_pages; | ||
4639 | u64 items; | ||
4640 | long time_left; | ||
4641 | unsigned long nr_pages; | ||
4642 | int loops; | ||
4643 | |||
4644 | /* Calc the number of the pages we need flush for space reservation */ | ||
4645 | items = calc_reclaim_items_nr(fs_info, to_reclaim); | ||
4646 | to_reclaim = items * EXTENT_SIZE_PER_ITEM; | ||
4647 | |||
4648 | trans = (struct btrfs_trans_handle *)current->journal_info; | ||
4649 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
4650 | |||
4651 | delalloc_bytes = percpu_counter_sum_positive( | ||
4652 | &fs_info->delalloc_bytes); | ||
4653 | dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes); | ||
4654 | if (delalloc_bytes == 0 && dio_bytes == 0) { | ||
4655 | if (trans) | ||
4656 | return; | ||
4657 | if (wait_ordered) | ||
4658 | btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1); | ||
4659 | return; | ||
4660 | } | ||
4661 | |||
4662 | /* | ||
4663 | * If we are doing more ordered than delalloc we need to just wait on | ||
4664 | * ordered extents, otherwise we'll waste time trying to flush delalloc | ||
4665 | * that likely won't give us the space back we need. | ||
4666 | */ | ||
4667 | if (dio_bytes > delalloc_bytes) | ||
4668 | wait_ordered = true; | ||
4669 | |||
4670 | loops = 0; | ||
4671 | while ((delalloc_bytes || dio_bytes) && loops < 3) { | ||
4672 | nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT; | ||
4673 | |||
4674 | /* | ||
4675 | * Triggers inode writeback for up to nr_pages. This will invoke | ||
4676 | * ->writepages callback and trigger delalloc filling | ||
4677 | * (btrfs_run_delalloc_range()). | ||
4678 | */ | ||
4679 | btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items); | ||
4680 | |||
4681 | /* | ||
4682 | * We need to wait for the compressed pages to start before | ||
4683 | * we continue. | ||
4684 | */ | ||
4685 | async_pages = atomic_read(&fs_info->async_delalloc_pages); | ||
4686 | if (!async_pages) | ||
4687 | goto skip_async; | ||
4688 | |||
4689 | /* | ||
4690 | * Calculate how many compressed pages we want to be written | ||
4691 | * before we continue. I.e if there are more async pages than we | ||
4692 | * require wait_event will wait until nr_pages are written. | ||
4693 | */ | ||
4694 | if (async_pages <= nr_pages) | ||
4695 | async_pages = 0; | ||
4696 | else | ||
4697 | async_pages -= nr_pages; | ||
4698 | |||
4699 | wait_event(fs_info->async_submit_wait, | ||
4700 | atomic_read(&fs_info->async_delalloc_pages) <= | ||
4701 | (int)async_pages); | ||
4702 | skip_async: | ||
4703 | spin_lock(&space_info->lock); | ||
4704 | if (list_empty(&space_info->tickets) && | ||
4705 | list_empty(&space_info->priority_tickets)) { | ||
4706 | spin_unlock(&space_info->lock); | ||
4707 | break; | ||
4708 | } | ||
4709 | spin_unlock(&space_info->lock); | ||
4710 | |||
4711 | loops++; | ||
4712 | if (wait_ordered && !trans) { | ||
4713 | btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1); | ||
4714 | } else { | ||
4715 | time_left = schedule_timeout_killable(1); | ||
4716 | if (time_left) | ||
4717 | break; | ||
4718 | } | ||
4719 | delalloc_bytes = percpu_counter_sum_positive( | ||
4720 | &fs_info->delalloc_bytes); | ||
4721 | dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes); | ||
4722 | } | ||
4723 | } | ||
4724 | |||
4725 | struct reserve_ticket { | ||
4726 | u64 orig_bytes; | ||
4727 | u64 bytes; | ||
4728 | int error; | ||
4729 | struct list_head list; | ||
4730 | wait_queue_head_t wait; | ||
4731 | }; | ||
4732 | |||
4733 | /** | ||
4734 | * maybe_commit_transaction - possibly commit the transaction if its ok to | ||
4735 | * @root - the root we're allocating for | ||
4736 | * @bytes - the number of bytes we want to reserve | ||
4737 | * @force - force the commit | ||
4738 | * | ||
4739 | * This will check to make sure that committing the transaction will actually | ||
4740 | * get us somewhere and then commit the transaction if it does. Otherwise it | ||
4741 | * will return -ENOSPC. | ||
4742 | */ | ||
4743 | static int may_commit_transaction(struct btrfs_fs_info *fs_info, | ||
4744 | struct btrfs_space_info *space_info) | ||
4745 | { | ||
4746 | struct reserve_ticket *ticket = NULL; | ||
4747 | struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv; | ||
4748 | struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; | ||
4749 | struct btrfs_trans_handle *trans; | ||
4750 | u64 bytes_needed; | ||
4751 | u64 reclaim_bytes = 0; | ||
4752 | |||
4753 | trans = (struct btrfs_trans_handle *)current->journal_info; | ||
4754 | if (trans) | ||
4755 | return -EAGAIN; | ||
4756 | |||
4757 | spin_lock(&space_info->lock); | ||
4758 | if (!list_empty(&space_info->priority_tickets)) | ||
4759 | ticket = list_first_entry(&space_info->priority_tickets, | ||
4760 | struct reserve_ticket, list); | ||
4761 | else if (!list_empty(&space_info->tickets)) | ||
4762 | ticket = list_first_entry(&space_info->tickets, | ||
4763 | struct reserve_ticket, list); | ||
4764 | bytes_needed = (ticket) ? ticket->bytes : 0; | ||
4765 | spin_unlock(&space_info->lock); | ||
4766 | |||
4767 | if (!bytes_needed) | ||
4768 | return 0; | ||
4769 | |||
4770 | trans = btrfs_join_transaction(fs_info->extent_root); | ||
4771 | if (IS_ERR(trans)) | ||
4772 | return PTR_ERR(trans); | ||
4773 | |||
4774 | /* | ||
4775 | * See if there is enough pinned space to make this reservation, or if | ||
4776 | * we have block groups that are going to be freed, allowing us to | ||
4777 | * possibly do a chunk allocation the next loop through. | ||
4778 | */ | ||
4779 | if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) || | ||
4780 | __percpu_counter_compare(&space_info->total_bytes_pinned, | ||
4781 | bytes_needed, | ||
4782 | BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0) | ||
4783 | goto commit; | ||
4784 | |||
4785 | /* | ||
4786 | * See if there is some space in the delayed insertion reservation for | ||
4787 | * this reservation. | ||
4788 | */ | ||
4789 | if (space_info != delayed_rsv->space_info) | ||
4790 | goto enospc; | ||
4791 | |||
4792 | spin_lock(&delayed_rsv->lock); | ||
4793 | reclaim_bytes += delayed_rsv->reserved; | ||
4794 | spin_unlock(&delayed_rsv->lock); | ||
4795 | |||
4796 | spin_lock(&delayed_refs_rsv->lock); | ||
4797 | reclaim_bytes += delayed_refs_rsv->reserved; | ||
4798 | spin_unlock(&delayed_refs_rsv->lock); | ||
4799 | if (reclaim_bytes >= bytes_needed) | ||
4800 | goto commit; | ||
4801 | bytes_needed -= reclaim_bytes; | ||
4802 | |||
4803 | if (__percpu_counter_compare(&space_info->total_bytes_pinned, | ||
4804 | bytes_needed, | ||
4805 | BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) | ||
4806 | goto enospc; | ||
4807 | |||
4808 | commit: | ||
4809 | return btrfs_commit_transaction(trans); | ||
4810 | enospc: | ||
4811 | btrfs_end_transaction(trans); | ||
4812 | return -ENOSPC; | ||
4813 | } | ||
4814 | |||
4815 | /* | ||
4816 | * Try to flush some data based on policy set by @state. This is only advisory | ||
4817 | * and may fail for various reasons. The caller is supposed to examine the | ||
4818 | * state of @space_info to detect the outcome. | ||
4819 | */ | ||
4820 | static void flush_space(struct btrfs_fs_info *fs_info, | ||
4821 | struct btrfs_space_info *space_info, u64 num_bytes, | ||
4822 | int state) | ||
4823 | { | ||
4824 | struct btrfs_root *root = fs_info->extent_root; | ||
4825 | struct btrfs_trans_handle *trans; | ||
4826 | int nr; | ||
4827 | int ret = 0; | ||
4828 | |||
4829 | switch (state) { | ||
4830 | case FLUSH_DELAYED_ITEMS_NR: | ||
4831 | case FLUSH_DELAYED_ITEMS: | ||
4832 | if (state == FLUSH_DELAYED_ITEMS_NR) | ||
4833 | nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2; | ||
4834 | else | ||
4835 | nr = -1; | ||
4836 | |||
4837 | trans = btrfs_join_transaction(root); | ||
4838 | if (IS_ERR(trans)) { | ||
4839 | ret = PTR_ERR(trans); | ||
4840 | break; | ||
4841 | } | ||
4842 | ret = btrfs_run_delayed_items_nr(trans, nr); | ||
4843 | btrfs_end_transaction(trans); | ||
4844 | break; | ||
4845 | case FLUSH_DELALLOC: | ||
4846 | case FLUSH_DELALLOC_WAIT: | ||
4847 | shrink_delalloc(fs_info, num_bytes * 2, num_bytes, | ||
4848 | state == FLUSH_DELALLOC_WAIT); | ||
4849 | break; | ||
4850 | case FLUSH_DELAYED_REFS_NR: | ||
4851 | case FLUSH_DELAYED_REFS: | ||
4852 | trans = btrfs_join_transaction(root); | ||
4853 | if (IS_ERR(trans)) { | ||
4854 | ret = PTR_ERR(trans); | ||
4855 | break; | ||
4856 | } | ||
4857 | if (state == FLUSH_DELAYED_REFS_NR) | ||
4858 | nr = calc_reclaim_items_nr(fs_info, num_bytes); | ||
4859 | else | ||
4860 | nr = 0; | ||
4861 | btrfs_run_delayed_refs(trans, nr); | ||
4862 | btrfs_end_transaction(trans); | ||
4863 | break; | ||
4864 | case ALLOC_CHUNK: | ||
4865 | case ALLOC_CHUNK_FORCE: | ||
4866 | trans = btrfs_join_transaction(root); | ||
4867 | if (IS_ERR(trans)) { | ||
4868 | ret = PTR_ERR(trans); | ||
4869 | break; | ||
4870 | } | ||
4871 | ret = do_chunk_alloc(trans, | ||
4872 | btrfs_metadata_alloc_profile(fs_info), | ||
4873 | (state == ALLOC_CHUNK) ? | ||
4874 | CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE); | ||
4875 | btrfs_end_transaction(trans); | ||
4876 | if (ret > 0 || ret == -ENOSPC) | ||
4877 | ret = 0; | ||
4878 | break; | ||
4879 | case COMMIT_TRANS: | ||
4880 | /* | ||
4881 | * If we have pending delayed iputs then we could free up a | ||
4882 | * bunch of pinned space, so make sure we run the iputs before | ||
4883 | * we do our pinned bytes check below. | ||
4884 | */ | ||
4885 | btrfs_run_delayed_iputs(fs_info); | ||
4886 | btrfs_wait_on_delayed_iputs(fs_info); | ||
4887 | |||
4888 | ret = may_commit_transaction(fs_info, space_info); | ||
4889 | break; | ||
4890 | default: | ||
4891 | ret = -ENOSPC; | ||
4892 | break; | ||
4893 | } | ||
4894 | |||
4895 | trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state, | ||
4896 | ret); | ||
4897 | return; | ||
4898 | } | ||
4899 | |||
4900 | static inline u64 | ||
4901 | btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, | ||
4902 | struct btrfs_space_info *space_info, | ||
4903 | bool system_chunk) | ||
4904 | { | ||
4905 | struct reserve_ticket *ticket; | ||
4906 | u64 used; | ||
4907 | u64 expected; | ||
4908 | u64 to_reclaim = 0; | ||
4909 | |||
4910 | list_for_each_entry(ticket, &space_info->tickets, list) | ||
4911 | to_reclaim += ticket->bytes; | ||
4912 | list_for_each_entry(ticket, &space_info->priority_tickets, list) | ||
4913 | to_reclaim += ticket->bytes; | ||
4914 | if (to_reclaim) | ||
4915 | return to_reclaim; | ||
4916 | |||
4917 | to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); | ||
4918 | if (can_overcommit(fs_info, space_info, to_reclaim, | ||
4919 | BTRFS_RESERVE_FLUSH_ALL, system_chunk)) | ||
4920 | return 0; | ||
4921 | |||
4922 | used = btrfs_space_info_used(space_info, true); | ||
4923 | |||
4924 | if (can_overcommit(fs_info, space_info, SZ_1M, | ||
4925 | BTRFS_RESERVE_FLUSH_ALL, system_chunk)) | ||
4926 | expected = div_factor_fine(space_info->total_bytes, 95); | ||
4927 | else | ||
4928 | expected = div_factor_fine(space_info->total_bytes, 90); | ||
4929 | |||
4930 | if (used > expected) | ||
4931 | to_reclaim = used - expected; | ||
4932 | else | ||
4933 | to_reclaim = 0; | ||
4934 | to_reclaim = min(to_reclaim, space_info->bytes_may_use + | ||
4935 | space_info->bytes_reserved); | ||
4936 | return to_reclaim; | ||
4937 | } | ||
4938 | |||
4939 | static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info, | ||
4940 | struct btrfs_space_info *space_info, | ||
4941 | u64 used, bool system_chunk) | ||
4942 | { | ||
4943 | u64 thresh = div_factor_fine(space_info->total_bytes, 98); | ||
4944 | |||
4945 | /* If we're just plain full then async reclaim just slows us down. */ | ||
4946 | if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) | ||
4947 | return 0; | ||
4948 | |||
4949 | if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info, | ||
4950 | system_chunk)) | ||
4951 | return 0; | ||
4952 | |||
4953 | return (used >= thresh && !btrfs_fs_closing(fs_info) && | ||
4954 | !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); | ||
4955 | } | ||
4956 | |||
4957 | static bool wake_all_tickets(struct list_head *head) | ||
4958 | { | ||
4959 | struct reserve_ticket *ticket; | ||
4960 | |||
4961 | while (!list_empty(head)) { | ||
4962 | ticket = list_first_entry(head, struct reserve_ticket, list); | ||
4963 | list_del_init(&ticket->list); | ||
4964 | ticket->error = -ENOSPC; | ||
4965 | wake_up(&ticket->wait); | ||
4966 | if (ticket->bytes != ticket->orig_bytes) | ||
4967 | return true; | ||
4968 | } | ||
4969 | return false; | ||
4970 | } | ||
4971 | |||
4972 | /* | ||
4973 | * This is for normal flushers, we can wait all goddamned day if we want to. We | ||
4974 | * will loop and continuously try to flush as long as we are making progress. | ||
4975 | * We count progress as clearing off tickets each time we have to loop. | ||
4976 | */ | ||
4977 | static void btrfs_async_reclaim_metadata_space(struct work_struct *work) | ||
4978 | { | ||
4979 | struct btrfs_fs_info *fs_info; | ||
4980 | struct btrfs_space_info *space_info; | ||
4981 | u64 to_reclaim; | ||
4982 | int flush_state; | ||
4983 | int commit_cycles = 0; | ||
4984 | u64 last_tickets_id; | ||
4985 | |||
4986 | fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); | ||
4987 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
4988 | |||
4989 | spin_lock(&space_info->lock); | ||
4990 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info, | ||
4991 | false); | ||
4992 | if (!to_reclaim) { | ||
4993 | space_info->flush = 0; | ||
4994 | spin_unlock(&space_info->lock); | ||
4995 | return; | ||
4996 | } | ||
4997 | last_tickets_id = space_info->tickets_id; | ||
4998 | spin_unlock(&space_info->lock); | ||
4999 | |||
5000 | flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
5001 | do { | ||
5002 | flush_space(fs_info, space_info, to_reclaim, flush_state); | ||
5003 | spin_lock(&space_info->lock); | ||
5004 | if (list_empty(&space_info->tickets)) { | ||
5005 | space_info->flush = 0; | ||
5006 | spin_unlock(&space_info->lock); | ||
5007 | return; | ||
5008 | } | ||
5009 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, | ||
5010 | space_info, | ||
5011 | false); | ||
5012 | if (last_tickets_id == space_info->tickets_id) { | ||
5013 | flush_state++; | ||
5014 | } else { | ||
5015 | last_tickets_id = space_info->tickets_id; | ||
5016 | flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
5017 | if (commit_cycles) | ||
5018 | commit_cycles--; | ||
5019 | } | ||
5020 | |||
5021 | /* | ||
5022 | * We don't want to force a chunk allocation until we've tried | ||
5023 | * pretty hard to reclaim space. Think of the case where we | ||
5024 | * freed up a bunch of space and so have a lot of pinned space | ||
5025 | * to reclaim. We would rather use that than possibly create a | ||
5026 | * underutilized metadata chunk. So if this is our first run | ||
5027 | * through the flushing state machine skip ALLOC_CHUNK_FORCE and | ||
5028 | * commit the transaction. If nothing has changed the next go | ||
5029 | * around then we can force a chunk allocation. | ||
5030 | */ | ||
5031 | if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles) | ||
5032 | flush_state++; | ||
5033 | |||
5034 | if (flush_state > COMMIT_TRANS) { | ||
5035 | commit_cycles++; | ||
5036 | if (commit_cycles > 2) { | ||
5037 | if (wake_all_tickets(&space_info->tickets)) { | ||
5038 | flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
5039 | commit_cycles--; | ||
5040 | } else { | ||
5041 | space_info->flush = 0; | ||
5042 | } | ||
5043 | } else { | ||
5044 | flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
5045 | } | ||
5046 | } | ||
5047 | spin_unlock(&space_info->lock); | ||
5048 | } while (flush_state <= COMMIT_TRANS); | ||
5049 | } | ||
5050 | |||
5051 | void btrfs_init_async_reclaim_work(struct work_struct *work) | ||
5052 | { | ||
5053 | INIT_WORK(work, btrfs_async_reclaim_metadata_space); | ||
5054 | } | ||
5055 | |||
5056 | static const enum btrfs_flush_state priority_flush_states[] = { | ||
5057 | FLUSH_DELAYED_ITEMS_NR, | ||
5058 | FLUSH_DELAYED_ITEMS, | ||
5059 | ALLOC_CHUNK, | ||
5060 | }; | ||
5061 | |||
5062 | static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, | ||
5063 | struct btrfs_space_info *space_info, | ||
5064 | struct reserve_ticket *ticket) | ||
5065 | { | ||
5066 | u64 to_reclaim; | ||
5067 | int flush_state; | ||
5068 | |||
5069 | spin_lock(&space_info->lock); | ||
5070 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info, | ||
5071 | false); | ||
5072 | if (!to_reclaim) { | ||
5073 | spin_unlock(&space_info->lock); | ||
5074 | return; | ||
5075 | } | ||
5076 | spin_unlock(&space_info->lock); | ||
5077 | |||
5078 | flush_state = 0; | ||
5079 | do { | ||
5080 | flush_space(fs_info, space_info, to_reclaim, | ||
5081 | priority_flush_states[flush_state]); | ||
5082 | flush_state++; | ||
5083 | spin_lock(&space_info->lock); | ||
5084 | if (ticket->bytes == 0) { | ||
5085 | spin_unlock(&space_info->lock); | ||
5086 | return; | ||
5087 | } | ||
5088 | spin_unlock(&space_info->lock); | ||
5089 | } while (flush_state < ARRAY_SIZE(priority_flush_states)); | ||
5090 | } | ||
5091 | |||
5092 | static int wait_reserve_ticket(struct btrfs_fs_info *fs_info, | ||
5093 | struct btrfs_space_info *space_info, | ||
5094 | struct reserve_ticket *ticket) | ||
5095 | |||
5096 | { | ||
5097 | DEFINE_WAIT(wait); | ||
5098 | u64 reclaim_bytes = 0; | ||
5099 | int ret = 0; | ||
5100 | |||
5101 | spin_lock(&space_info->lock); | ||
5102 | while (ticket->bytes > 0 && ticket->error == 0) { | ||
5103 | ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE); | ||
5104 | if (ret) { | ||
5105 | ret = -EINTR; | ||
5106 | break; | ||
5107 | } | ||
5108 | spin_unlock(&space_info->lock); | ||
5109 | |||
5110 | schedule(); | ||
5111 | |||
5112 | finish_wait(&ticket->wait, &wait); | ||
5113 | spin_lock(&space_info->lock); | ||
5114 | } | ||
5115 | if (!ret) | ||
5116 | ret = ticket->error; | ||
5117 | if (!list_empty(&ticket->list)) | ||
5118 | list_del_init(&ticket->list); | ||
5119 | if (ticket->bytes && ticket->bytes < ticket->orig_bytes) | ||
5120 | reclaim_bytes = ticket->orig_bytes - ticket->bytes; | ||
5121 | spin_unlock(&space_info->lock); | ||
5122 | |||
5123 | if (reclaim_bytes) | ||
5124 | space_info_add_old_bytes(fs_info, space_info, reclaim_bytes); | ||
5125 | return ret; | ||
5126 | } | ||
5127 | |||
5128 | /** | ||
5129 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | ||
5130 | * @root - the root we're allocating for | ||
5131 | * @space_info - the space info we want to allocate from | ||
5132 | * @orig_bytes - the number of bytes we want | ||
5133 | * @flush - whether or not we can flush to make our reservation | ||
5134 | * | ||
5135 | * This will reserve orig_bytes number of bytes from the space info associated | ||
5136 | * with the block_rsv. If there is not enough space it will make an attempt to | ||
5137 | * flush out space to make room. It will do this by flushing delalloc if | ||
5138 | * possible or committing the transaction. If flush is 0 then no attempts to | ||
5139 | * regain reservations will be made and this will fail if there is not enough | ||
5140 | * space already. | ||
5141 | */ | ||
5142 | static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, | ||
5143 | struct btrfs_space_info *space_info, | ||
5144 | u64 orig_bytes, | ||
5145 | enum btrfs_reserve_flush_enum flush, | ||
5146 | bool system_chunk) | ||
5147 | { | ||
5148 | struct reserve_ticket ticket; | ||
5149 | u64 used; | ||
5150 | u64 reclaim_bytes = 0; | ||
5151 | int ret = 0; | ||
5152 | |||
5153 | ASSERT(orig_bytes); | ||
5154 | ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL); | ||
5155 | |||
5156 | spin_lock(&space_info->lock); | ||
5157 | ret = -ENOSPC; | ||
5158 | used = btrfs_space_info_used(space_info, true); | ||
5159 | |||
5160 | /* | ||
5161 | * If we have enough space then hooray, make our reservation and carry | ||
5162 | * on. If not see if we can overcommit, and if we can, hooray carry on. | ||
5163 | * If not things get more complicated. | ||
5164 | */ | ||
5165 | if (used + orig_bytes <= space_info->total_bytes) { | ||
5166 | update_bytes_may_use(space_info, orig_bytes); | ||
5167 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5168 | space_info->flags, orig_bytes, 1); | ||
5169 | ret = 0; | ||
5170 | } else if (can_overcommit(fs_info, space_info, orig_bytes, flush, | ||
5171 | system_chunk)) { | ||
5172 | update_bytes_may_use(space_info, orig_bytes); | ||
5173 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5174 | space_info->flags, orig_bytes, 1); | ||
5175 | ret = 0; | ||
5176 | } | ||
5177 | |||
5178 | /* | ||
5179 | * If we couldn't make a reservation then setup our reservation ticket | ||
5180 | * and kick the async worker if it's not already running. | ||
5181 | * | ||
5182 | * If we are a priority flusher then we just need to add our ticket to | ||
5183 | * the list and we will do our own flushing further down. | ||
5184 | */ | ||
5185 | if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { | ||
5186 | ticket.orig_bytes = orig_bytes; | ||
5187 | ticket.bytes = orig_bytes; | ||
5188 | ticket.error = 0; | ||
5189 | init_waitqueue_head(&ticket.wait); | ||
5190 | if (flush == BTRFS_RESERVE_FLUSH_ALL) { | ||
5191 | list_add_tail(&ticket.list, &space_info->tickets); | ||
5192 | if (!space_info->flush) { | ||
5193 | space_info->flush = 1; | ||
5194 | trace_btrfs_trigger_flush(fs_info, | ||
5195 | space_info->flags, | ||
5196 | orig_bytes, flush, | ||
5197 | "enospc"); | ||
5198 | queue_work(system_unbound_wq, | ||
5199 | &fs_info->async_reclaim_work); | ||
5200 | } | ||
5201 | } else { | ||
5202 | list_add_tail(&ticket.list, | ||
5203 | &space_info->priority_tickets); | ||
5204 | } | ||
5205 | } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
5206 | used += orig_bytes; | ||
5207 | /* | ||
5208 | * We will do the space reservation dance during log replay, | ||
5209 | * which means we won't have fs_info->fs_root set, so don't do | ||
5210 | * the async reclaim as we will panic. | ||
5211 | */ | ||
5212 | if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) && | ||
5213 | need_do_async_reclaim(fs_info, space_info, | ||
5214 | used, system_chunk) && | ||
5215 | !work_busy(&fs_info->async_reclaim_work)) { | ||
5216 | trace_btrfs_trigger_flush(fs_info, space_info->flags, | ||
5217 | orig_bytes, flush, "preempt"); | ||
5218 | queue_work(system_unbound_wq, | ||
5219 | &fs_info->async_reclaim_work); | ||
5220 | } | ||
5221 | } | ||
5222 | spin_unlock(&space_info->lock); | ||
5223 | if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) | ||
5224 | return ret; | ||
5225 | |||
5226 | if (flush == BTRFS_RESERVE_FLUSH_ALL) | ||
5227 | return wait_reserve_ticket(fs_info, space_info, &ticket); | ||
5228 | |||
5229 | ret = 0; | ||
5230 | priority_reclaim_metadata_space(fs_info, space_info, &ticket); | ||
5231 | spin_lock(&space_info->lock); | ||
5232 | if (ticket.bytes) { | ||
5233 | if (ticket.bytes < orig_bytes) | ||
5234 | reclaim_bytes = orig_bytes - ticket.bytes; | ||
5235 | list_del_init(&ticket.list); | ||
5236 | ret = -ENOSPC; | ||
5237 | } | ||
5238 | spin_unlock(&space_info->lock); | ||
5239 | |||
5240 | if (reclaim_bytes) | ||
5241 | space_info_add_old_bytes(fs_info, space_info, reclaim_bytes); | ||
5242 | ASSERT(list_empty(&ticket.list)); | ||
5243 | return ret; | ||
5244 | } | ||
5245 | |||
5246 | /** | ||
5247 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | ||
5248 | * @root - the root we're allocating for | ||
5249 | * @block_rsv - the block_rsv we're allocating for | ||
5250 | * @orig_bytes - the number of bytes we want | ||
5251 | * @flush - whether or not we can flush to make our reservation | ||
5252 | * | ||
5253 | * This will reserve orig_bytes number of bytes from the space info associated | ||
5254 | * with the block_rsv. If there is not enough space it will make an attempt to | ||
5255 | * flush out space to make room. It will do this by flushing delalloc if | ||
5256 | * possible or committing the transaction. If flush is 0 then no attempts to | ||
5257 | * regain reservations will be made and this will fail if there is not enough | ||
5258 | * space already. | ||
5259 | */ | ||
5260 | static int reserve_metadata_bytes(struct btrfs_root *root, | ||
5261 | struct btrfs_block_rsv *block_rsv, | ||
5262 | u64 orig_bytes, | ||
5263 | enum btrfs_reserve_flush_enum flush) | ||
5264 | { | ||
5265 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
5266 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
5267 | int ret; | ||
5268 | bool system_chunk = (root == fs_info->chunk_root); | ||
5269 | |||
5270 | ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info, | ||
5271 | orig_bytes, flush, system_chunk); | ||
5272 | if (ret == -ENOSPC && | ||
5273 | unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { | ||
5274 | if (block_rsv != global_rsv && | ||
5275 | !block_rsv_use_bytes(global_rsv, orig_bytes)) | ||
5276 | ret = 0; | ||
5277 | } | ||
5278 | if (ret == -ENOSPC) { | ||
5279 | trace_btrfs_space_reservation(fs_info, "space_info:enospc", | ||
5280 | block_rsv->space_info->flags, | ||
5281 | orig_bytes, 1); | ||
5282 | |||
5283 | if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) | ||
5284 | dump_space_info(fs_info, block_rsv->space_info, | ||
5285 | orig_bytes, 0); | ||
5286 | } | ||
5287 | return ret; | ||
5288 | } | ||
5289 | |||
5290 | static struct btrfs_block_rsv *get_block_rsv( | ||
5291 | const struct btrfs_trans_handle *trans, | ||
5292 | const struct btrfs_root *root) | ||
5293 | { | ||
5294 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
5295 | struct btrfs_block_rsv *block_rsv = NULL; | ||
5296 | |||
5297 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || | ||
5298 | (root == fs_info->csum_root && trans->adding_csums) || | ||
5299 | (root == fs_info->uuid_root)) | ||
5300 | block_rsv = trans->block_rsv; | ||
5301 | |||
5302 | if (!block_rsv) | ||
5303 | block_rsv = root->block_rsv; | ||
5304 | |||
5305 | if (!block_rsv) | ||
5306 | block_rsv = &fs_info->empty_block_rsv; | ||
5307 | |||
5308 | return block_rsv; | ||
5309 | } | ||
5310 | |||
5311 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | ||
5312 | u64 num_bytes) | ||
5313 | { | ||
5314 | int ret = -ENOSPC; | ||
5315 | spin_lock(&block_rsv->lock); | ||
5316 | if (block_rsv->reserved >= num_bytes) { | ||
5317 | block_rsv->reserved -= num_bytes; | ||
5318 | if (block_rsv->reserved < block_rsv->size) | ||
5319 | block_rsv->full = 0; | ||
5320 | ret = 0; | ||
5321 | } | ||
5322 | spin_unlock(&block_rsv->lock); | ||
5323 | return ret; | ||
5324 | } | ||
5325 | |||
5326 | static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, | ||
5327 | u64 num_bytes, bool update_size) | ||
5328 | { | ||
5329 | spin_lock(&block_rsv->lock); | ||
5330 | block_rsv->reserved += num_bytes; | ||
5331 | if (update_size) | ||
5332 | block_rsv->size += num_bytes; | ||
5333 | else if (block_rsv->reserved >= block_rsv->size) | ||
5334 | block_rsv->full = 1; | ||
5335 | spin_unlock(&block_rsv->lock); | ||
5336 | } | ||
5337 | |||
5338 | int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, | ||
5339 | struct btrfs_block_rsv *dest, u64 num_bytes, | ||
5340 | int min_factor) | ||
5341 | { | ||
5342 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
5343 | u64 min_bytes; | ||
5344 | |||
5345 | if (global_rsv->space_info != dest->space_info) | ||
5346 | return -ENOSPC; | ||
5347 | |||
5348 | spin_lock(&global_rsv->lock); | ||
5349 | min_bytes = div_factor(global_rsv->size, min_factor); | ||
5350 | if (global_rsv->reserved < min_bytes + num_bytes) { | ||
5351 | spin_unlock(&global_rsv->lock); | ||
5352 | return -ENOSPC; | ||
5353 | } | ||
5354 | global_rsv->reserved -= num_bytes; | ||
5355 | if (global_rsv->reserved < global_rsv->size) | ||
5356 | global_rsv->full = 0; | ||
5357 | spin_unlock(&global_rsv->lock); | ||
5358 | |||
5359 | block_rsv_add_bytes(dest, num_bytes, true); | ||
5360 | return 0; | ||
5361 | } | ||
5362 | |||
5363 | /** | ||
5364 | * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv. | ||
5365 | * @fs_info - the fs info for our fs. | ||
5366 | * @src - the source block rsv to transfer from. | ||
5367 | * @num_bytes - the number of bytes to transfer. | ||
5368 | * | ||
5369 | * This transfers up to the num_bytes amount from the src rsv to the | ||
5370 | * delayed_refs_rsv. Any extra bytes are returned to the space info. | ||
5371 | */ | ||
5372 | void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, | ||
5373 | struct btrfs_block_rsv *src, | ||
5374 | u64 num_bytes) | ||
5375 | { | ||
5376 | struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; | ||
5377 | u64 to_free = 0; | ||
5378 | |||
5379 | spin_lock(&src->lock); | ||
5380 | src->reserved -= num_bytes; | ||
5381 | src->size -= num_bytes; | ||
5382 | spin_unlock(&src->lock); | ||
5383 | |||
5384 | spin_lock(&delayed_refs_rsv->lock); | ||
5385 | if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) { | ||
5386 | u64 delta = delayed_refs_rsv->size - | ||
5387 | delayed_refs_rsv->reserved; | ||
5388 | if (num_bytes > delta) { | ||
5389 | to_free = num_bytes - delta; | ||
5390 | num_bytes = delta; | ||
5391 | } | ||
5392 | } else { | ||
5393 | to_free = num_bytes; | ||
5394 | num_bytes = 0; | ||
5395 | } | ||
5396 | |||
5397 | if (num_bytes) | ||
5398 | delayed_refs_rsv->reserved += num_bytes; | ||
5399 | if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size) | ||
5400 | delayed_refs_rsv->full = 1; | ||
5401 | spin_unlock(&delayed_refs_rsv->lock); | ||
5402 | |||
5403 | if (num_bytes) | ||
5404 | trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", | ||
5405 | 0, num_bytes, 1); | ||
5406 | if (to_free) | ||
5407 | space_info_add_old_bytes(fs_info, delayed_refs_rsv->space_info, | ||
5408 | to_free); | ||
5409 | } | ||
5410 | |||
5411 | /** | ||
5412 | * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage. | ||
5413 | * @fs_info - the fs_info for our fs. | ||
5414 | * @flush - control how we can flush for this reservation. | ||
5415 | * | ||
5416 | * This will refill the delayed block_rsv up to 1 items size worth of space and | ||
5417 | * will return -ENOSPC if we can't make the reservation. | ||
5418 | */ | ||
5419 | int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, | ||
5420 | enum btrfs_reserve_flush_enum flush) | ||
5421 | { | ||
5422 | struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; | ||
5423 | u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1); | ||
5424 | u64 num_bytes = 0; | ||
5425 | int ret = -ENOSPC; | ||
5426 | |||
5427 | spin_lock(&block_rsv->lock); | ||
5428 | if (block_rsv->reserved < block_rsv->size) { | ||
5429 | num_bytes = block_rsv->size - block_rsv->reserved; | ||
5430 | num_bytes = min(num_bytes, limit); | ||
5431 | } | ||
5432 | spin_unlock(&block_rsv->lock); | ||
5433 | |||
5434 | if (!num_bytes) | ||
5435 | return 0; | ||
5436 | |||
5437 | ret = reserve_metadata_bytes(fs_info->extent_root, block_rsv, | ||
5438 | num_bytes, flush); | ||
5439 | if (ret) | ||
5440 | return ret; | ||
5441 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | ||
5442 | trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", | ||
5443 | 0, num_bytes, 1); | ||
5444 | return 0; | ||
5445 | } | ||
5446 | |||
5447 | /* | ||
5448 | * This is for space we already have accounted in space_info->bytes_may_use, so | ||
5449 | * basically when we're returning space from block_rsv's. | ||
5450 | */ | ||
5451 | static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info, | ||
5452 | struct btrfs_space_info *space_info, | ||
5453 | u64 num_bytes) | ||
5454 | { | ||
5455 | struct reserve_ticket *ticket; | ||
5456 | struct list_head *head; | ||
5457 | u64 used; | ||
5458 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH; | ||
5459 | bool check_overcommit = false; | ||
5460 | |||
5461 | spin_lock(&space_info->lock); | ||
5462 | head = &space_info->priority_tickets; | ||
5463 | |||
5464 | /* | ||
5465 | * If we are over our limit then we need to check and see if we can | ||
5466 | * overcommit, and if we can't then we just need to free up our space | ||
5467 | * and not satisfy any requests. | ||
5468 | */ | ||
5469 | used = btrfs_space_info_used(space_info, true); | ||
5470 | if (used - num_bytes >= space_info->total_bytes) | ||
5471 | check_overcommit = true; | ||
5472 | again: | ||
5473 | while (!list_empty(head) && num_bytes) { | ||
5474 | ticket = list_first_entry(head, struct reserve_ticket, | ||
5475 | list); | ||
5476 | /* | ||
5477 | * We use 0 bytes because this space is already reserved, so | ||
5478 | * adding the ticket space would be a double count. | ||
5479 | */ | ||
5480 | if (check_overcommit && | ||
5481 | !can_overcommit(fs_info, space_info, 0, flush, false)) | ||
5482 | break; | ||
5483 | if (num_bytes >= ticket->bytes) { | ||
5484 | list_del_init(&ticket->list); | ||
5485 | num_bytes -= ticket->bytes; | ||
5486 | ticket->bytes = 0; | ||
5487 | space_info->tickets_id++; | ||
5488 | wake_up(&ticket->wait); | ||
5489 | } else { | ||
5490 | ticket->bytes -= num_bytes; | ||
5491 | num_bytes = 0; | ||
5492 | } | ||
5493 | } | ||
5494 | |||
5495 | if (num_bytes && head == &space_info->priority_tickets) { | ||
5496 | head = &space_info->tickets; | ||
5497 | flush = BTRFS_RESERVE_FLUSH_ALL; | ||
5498 | goto again; | ||
5499 | } | ||
5500 | update_bytes_may_use(space_info, -num_bytes); | ||
5501 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5502 | space_info->flags, num_bytes, 0); | ||
5503 | spin_unlock(&space_info->lock); | ||
5504 | } | ||
5505 | |||
5506 | /* | ||
5507 | * This is for newly allocated space that isn't accounted in | ||
5508 | * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent | ||
5509 | * we use this helper. | ||
5510 | */ | ||
5511 | static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, | ||
5512 | struct btrfs_space_info *space_info, | ||
5513 | u64 num_bytes) | ||
5514 | { | ||
5515 | struct reserve_ticket *ticket; | ||
5516 | struct list_head *head = &space_info->priority_tickets; | ||
5517 | |||
5518 | again: | ||
5519 | while (!list_empty(head) && num_bytes) { | ||
5520 | ticket = list_first_entry(head, struct reserve_ticket, | ||
5521 | list); | ||
5522 | if (num_bytes >= ticket->bytes) { | ||
5523 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5524 | space_info->flags, | ||
5525 | ticket->bytes, 1); | ||
5526 | list_del_init(&ticket->list); | ||
5527 | num_bytes -= ticket->bytes; | ||
5528 | update_bytes_may_use(space_info, ticket->bytes); | ||
5529 | ticket->bytes = 0; | ||
5530 | space_info->tickets_id++; | ||
5531 | wake_up(&ticket->wait); | ||
5532 | } else { | ||
5533 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5534 | space_info->flags, | ||
5535 | num_bytes, 1); | ||
5536 | update_bytes_may_use(space_info, num_bytes); | ||
5537 | ticket->bytes -= num_bytes; | ||
5538 | num_bytes = 0; | ||
5539 | } | ||
5540 | } | ||
5541 | |||
5542 | if (num_bytes && head == &space_info->priority_tickets) { | ||
5543 | head = &space_info->tickets; | ||
5544 | goto again; | ||
5545 | } | ||
5546 | } | ||
5547 | |||
5548 | static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, | ||
5549 | struct btrfs_block_rsv *block_rsv, | ||
5550 | struct btrfs_block_rsv *dest, u64 num_bytes, | ||
5551 | u64 *qgroup_to_release_ret) | ||
5552 | { | ||
5553 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
5554 | u64 qgroup_to_release = 0; | ||
5555 | u64 ret; | ||
5556 | |||
5557 | spin_lock(&block_rsv->lock); | ||
5558 | if (num_bytes == (u64)-1) { | ||
5559 | num_bytes = block_rsv->size; | ||
5560 | qgroup_to_release = block_rsv->qgroup_rsv_size; | ||
5561 | } | ||
5562 | block_rsv->size -= num_bytes; | ||
5563 | if (block_rsv->reserved >= block_rsv->size) { | ||
5564 | num_bytes = block_rsv->reserved - block_rsv->size; | ||
5565 | block_rsv->reserved = block_rsv->size; | ||
5566 | block_rsv->full = 1; | ||
5567 | } else { | ||
5568 | num_bytes = 0; | ||
5569 | } | ||
5570 | if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { | ||
5571 | qgroup_to_release = block_rsv->qgroup_rsv_reserved - | ||
5572 | block_rsv->qgroup_rsv_size; | ||
5573 | block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size; | ||
5574 | } else { | ||
5575 | qgroup_to_release = 0; | ||
5576 | } | ||
5577 | spin_unlock(&block_rsv->lock); | ||
5578 | |||
5579 | ret = num_bytes; | ||
5580 | if (num_bytes > 0) { | ||
5581 | if (dest) { | ||
5582 | spin_lock(&dest->lock); | ||
5583 | if (!dest->full) { | ||
5584 | u64 bytes_to_add; | ||
5585 | |||
5586 | bytes_to_add = dest->size - dest->reserved; | ||
5587 | bytes_to_add = min(num_bytes, bytes_to_add); | ||
5588 | dest->reserved += bytes_to_add; | ||
5589 | if (dest->reserved >= dest->size) | ||
5590 | dest->full = 1; | ||
5591 | num_bytes -= bytes_to_add; | ||
5592 | } | ||
5593 | spin_unlock(&dest->lock); | ||
5594 | } | ||
5595 | if (num_bytes) | ||
5596 | space_info_add_old_bytes(fs_info, space_info, | ||
5597 | num_bytes); | ||
5598 | } | ||
5599 | if (qgroup_to_release_ret) | ||
5600 | *qgroup_to_release_ret = qgroup_to_release; | ||
5601 | return ret; | ||
5602 | } | ||
5603 | |||
5604 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src, | ||
5605 | struct btrfs_block_rsv *dst, u64 num_bytes, | ||
5606 | bool update_size) | ||
5607 | { | ||
5608 | int ret; | ||
5609 | |||
5610 | ret = block_rsv_use_bytes(src, num_bytes); | ||
5611 | if (ret) | ||
5612 | return ret; | ||
5613 | |||
5614 | block_rsv_add_bytes(dst, num_bytes, update_size); | ||
5615 | return 0; | ||
5616 | } | ||
5617 | |||
5618 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type) | ||
5619 | { | ||
5620 | memset(rsv, 0, sizeof(*rsv)); | ||
5621 | spin_lock_init(&rsv->lock); | ||
5622 | rsv->type = type; | ||
5623 | } | ||
5624 | |||
5625 | void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info, | ||
5626 | struct btrfs_block_rsv *rsv, | ||
5627 | unsigned short type) | ||
5628 | { | ||
5629 | btrfs_init_block_rsv(rsv, type); | ||
5630 | rsv->space_info = __find_space_info(fs_info, | ||
5631 | BTRFS_BLOCK_GROUP_METADATA); | ||
5632 | } | ||
5633 | |||
5634 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, | ||
5635 | unsigned short type) | ||
5636 | { | ||
5637 | struct btrfs_block_rsv *block_rsv; | ||
5638 | |||
5639 | block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); | ||
5640 | if (!block_rsv) | ||
5641 | return NULL; | ||
5642 | |||
5643 | btrfs_init_metadata_block_rsv(fs_info, block_rsv, type); | ||
5644 | return block_rsv; | ||
5645 | } | ||
5646 | |||
5647 | void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, | ||
5648 | struct btrfs_block_rsv *rsv) | ||
5649 | { | ||
5650 | if (!rsv) | ||
5651 | return; | ||
5652 | btrfs_block_rsv_release(fs_info, rsv, (u64)-1); | ||
5653 | kfree(rsv); | ||
5654 | } | ||
5655 | |||
5656 | int btrfs_block_rsv_add(struct btrfs_root *root, | ||
5657 | struct btrfs_block_rsv *block_rsv, u64 num_bytes, | ||
5658 | enum btrfs_reserve_flush_enum flush) | ||
5659 | { | ||
5660 | int ret; | ||
5661 | |||
5662 | if (num_bytes == 0) | ||
5663 | return 0; | ||
5664 | |||
5665 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); | ||
5666 | if (!ret) | ||
5667 | block_rsv_add_bytes(block_rsv, num_bytes, true); | ||
5668 | |||
5669 | return ret; | ||
5670 | } | ||
5671 | |||
5672 | int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor) | ||
5673 | { | ||
5674 | u64 num_bytes = 0; | ||
5675 | int ret = -ENOSPC; | ||
5676 | |||
5677 | if (!block_rsv) | ||
5678 | return 0; | ||
5679 | |||
5680 | spin_lock(&block_rsv->lock); | ||
5681 | num_bytes = div_factor(block_rsv->size, min_factor); | ||
5682 | if (block_rsv->reserved >= num_bytes) | ||
5683 | ret = 0; | ||
5684 | spin_unlock(&block_rsv->lock); | ||
5685 | |||
5686 | return ret; | ||
5687 | } | ||
5688 | |||
5689 | int btrfs_block_rsv_refill(struct btrfs_root *root, | ||
5690 | struct btrfs_block_rsv *block_rsv, u64 min_reserved, | ||
5691 | enum btrfs_reserve_flush_enum flush) | ||
5692 | { | ||
5693 | u64 num_bytes = 0; | ||
5694 | int ret = -ENOSPC; | ||
5695 | |||
5696 | if (!block_rsv) | ||
5697 | return 0; | ||
5698 | |||
5699 | spin_lock(&block_rsv->lock); | ||
5700 | num_bytes = min_reserved; | ||
5701 | if (block_rsv->reserved >= num_bytes) | ||
5702 | ret = 0; | ||
5703 | else | ||
5704 | num_bytes -= block_rsv->reserved; | ||
5705 | spin_unlock(&block_rsv->lock); | ||
5706 | |||
5707 | if (!ret) | ||
5708 | return 0; | ||
5709 | |||
5710 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); | ||
5711 | if (!ret) { | ||
5712 | block_rsv_add_bytes(block_rsv, num_bytes, false); | ||
5713 | return 0; | ||
5714 | } | ||
5715 | |||
5716 | return ret; | ||
5717 | } | ||
5718 | |||
5719 | static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, | ||
5720 | struct btrfs_block_rsv *block_rsv, | ||
5721 | u64 num_bytes, u64 *qgroup_to_release) | ||
5722 | { | ||
5723 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
5724 | struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; | ||
5725 | struct btrfs_block_rsv *target = delayed_rsv; | ||
5726 | |||
5727 | if (target->full || target == block_rsv) | ||
5728 | target = global_rsv; | ||
5729 | |||
5730 | if (block_rsv->space_info != target->space_info) | ||
5731 | target = NULL; | ||
5732 | |||
5733 | return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes, | ||
5734 | qgroup_to_release); | ||
5735 | } | ||
5736 | |||
5737 | void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, | ||
5738 | struct btrfs_block_rsv *block_rsv, | ||
5739 | u64 num_bytes) | ||
5740 | { | ||
5741 | __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL); | ||
5742 | } | ||
5743 | |||
5744 | /** | ||
5745 | * btrfs_inode_rsv_release - release any excessive reservation. | ||
5746 | * @inode - the inode we need to release from. | ||
5747 | * @qgroup_free - free or convert qgroup meta. | ||
5748 | * Unlike normal operation, qgroup meta reservation needs to know if we are | ||
5749 | * freeing qgroup reservation or just converting it into per-trans. Normally | ||
5750 | * @qgroup_free is true for error handling, and false for normal release. | ||
5751 | * | ||
5752 | * This is the same as btrfs_block_rsv_release, except that it handles the | ||
5753 | * tracepoint for the reservation. | ||
5754 | */ | ||
5755 | static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free) | ||
5756 | { | ||
5757 | struct btrfs_fs_info *fs_info = inode->root->fs_info; | ||
5758 | struct btrfs_block_rsv *block_rsv = &inode->block_rsv; | ||
5759 | u64 released = 0; | ||
5760 | u64 qgroup_to_release = 0; | ||
5761 | |||
5762 | /* | ||
5763 | * Since we statically set the block_rsv->size we just want to say we | ||
5764 | * are releasing 0 bytes, and then we'll just get the reservation over | ||
5765 | * the size free'd. | ||
5766 | */ | ||
5767 | released = __btrfs_block_rsv_release(fs_info, block_rsv, 0, | ||
5768 | &qgroup_to_release); | ||
5769 | if (released > 0) | ||
5770 | trace_btrfs_space_reservation(fs_info, "delalloc", | ||
5771 | btrfs_ino(inode), released, 0); | ||
5772 | if (qgroup_free) | ||
5773 | btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release); | ||
5774 | else | ||
5775 | btrfs_qgroup_convert_reserved_meta(inode->root, | ||
5776 | qgroup_to_release); | ||
5777 | } | ||
5778 | |||
5779 | /** | ||
5780 | * btrfs_delayed_refs_rsv_release - release a ref head's reservation. | ||
5781 | * @fs_info - the fs_info for our fs. | ||
5782 | * @nr - the number of items to drop. | ||
5783 | * | ||
5784 | * This drops the delayed ref head's count from the delayed refs rsv and frees | ||
5785 | * any excess reservation we had. | ||
5786 | */ | ||
5787 | void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr) | ||
5788 | { | ||
5789 | struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; | ||
5790 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
5791 | u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr); | ||
5792 | u64 released = 0; | ||
5793 | |||
5794 | released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, | ||
5795 | num_bytes, NULL); | ||
5796 | if (released) | ||
5797 | trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", | ||
5798 | 0, released, 0); | ||
5799 | } | ||
5800 | |||
5801 | static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
5802 | { | ||
5803 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | ||
5804 | struct btrfs_space_info *sinfo = block_rsv->space_info; | ||
5805 | u64 num_bytes; | ||
5806 | |||
5807 | /* | ||
5808 | * The global block rsv is based on the size of the extent tree, the | ||
5809 | * checksum tree and the root tree. If the fs is empty we want to set | ||
5810 | * it to a minimal amount for safety. | ||
5811 | */ | ||
5812 | num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) + | ||
5813 | btrfs_root_used(&fs_info->csum_root->root_item) + | ||
5814 | btrfs_root_used(&fs_info->tree_root->root_item); | ||
5815 | num_bytes = max_t(u64, num_bytes, SZ_16M); | ||
5816 | |||
5817 | spin_lock(&sinfo->lock); | ||
5818 | spin_lock(&block_rsv->lock); | ||
5819 | |||
5820 | block_rsv->size = min_t(u64, num_bytes, SZ_512M); | ||
5821 | |||
5822 | if (block_rsv->reserved < block_rsv->size) { | ||
5823 | num_bytes = btrfs_space_info_used(sinfo, true); | ||
5824 | if (sinfo->total_bytes > num_bytes) { | ||
5825 | num_bytes = sinfo->total_bytes - num_bytes; | ||
5826 | num_bytes = min(num_bytes, | ||
5827 | block_rsv->size - block_rsv->reserved); | ||
5828 | block_rsv->reserved += num_bytes; | ||
5829 | update_bytes_may_use(sinfo, num_bytes); | ||
5830 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5831 | sinfo->flags, num_bytes, | ||
5832 | 1); | ||
5833 | } | ||
5834 | } else if (block_rsv->reserved > block_rsv->size) { | ||
5835 | num_bytes = block_rsv->reserved - block_rsv->size; | ||
5836 | update_bytes_may_use(sinfo, -num_bytes); | ||
5837 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
5838 | sinfo->flags, num_bytes, 0); | ||
5839 | block_rsv->reserved = block_rsv->size; | ||
5840 | } | ||
5841 | |||
5842 | if (block_rsv->reserved == block_rsv->size) | ||
5843 | block_rsv->full = 1; | ||
5844 | else | ||
5845 | block_rsv->full = 0; | ||
5846 | |||
5847 | spin_unlock(&block_rsv->lock); | ||
5848 | spin_unlock(&sinfo->lock); | ||
5849 | } | ||
5850 | |||
5851 | static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
5852 | { | ||
5853 | struct btrfs_space_info *space_info; | ||
5854 | |||
5855 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | ||
5856 | fs_info->chunk_block_rsv.space_info = space_info; | ||
5857 | |||
5858 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
5859 | fs_info->global_block_rsv.space_info = space_info; | ||
5860 | fs_info->trans_block_rsv.space_info = space_info; | ||
5861 | fs_info->empty_block_rsv.space_info = space_info; | ||
5862 | fs_info->delayed_block_rsv.space_info = space_info; | ||
5863 | fs_info->delayed_refs_rsv.space_info = space_info; | ||
5864 | |||
5865 | fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv; | ||
5866 | fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv; | ||
5867 | fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; | ||
5868 | fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; | ||
5869 | if (fs_info->quota_root) | ||
5870 | fs_info->quota_root->block_rsv = &fs_info->global_block_rsv; | ||
5871 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; | ||
5872 | |||
5873 | update_global_block_rsv(fs_info); | ||
5874 | } | ||
5875 | |||
5876 | static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
5877 | { | ||
5878 | block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL, | ||
5879 | (u64)-1, NULL); | ||
5880 | WARN_ON(fs_info->trans_block_rsv.size > 0); | ||
5881 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); | ||
5882 | WARN_ON(fs_info->chunk_block_rsv.size > 0); | ||
5883 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); | ||
5884 | WARN_ON(fs_info->delayed_block_rsv.size > 0); | ||
5885 | WARN_ON(fs_info->delayed_block_rsv.reserved > 0); | ||
5886 | WARN_ON(fs_info->delayed_refs_rsv.reserved > 0); | ||
5887 | WARN_ON(fs_info->delayed_refs_rsv.size > 0); | ||
5888 | } | ||
5889 | |||
5890 | /* | ||
5891 | * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv | ||
5892 | * @trans - the trans that may have generated delayed refs | ||
5893 | * | ||
5894 | * This is to be called anytime we may have adjusted trans->delayed_ref_updates, | ||
5895 | * it'll calculate the additional size and add it to the delayed_refs_rsv. | ||
5896 | */ | ||
5897 | void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) | ||
5898 | { | ||
5899 | struct btrfs_fs_info *fs_info = trans->fs_info; | ||
5900 | struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; | ||
5901 | u64 num_bytes; | ||
5902 | |||
5903 | if (!trans->delayed_ref_updates) | ||
5904 | return; | ||
5905 | |||
5906 | num_bytes = btrfs_calc_trans_metadata_size(fs_info, | ||
5907 | trans->delayed_ref_updates); | ||
5908 | spin_lock(&delayed_rsv->lock); | ||
5909 | delayed_rsv->size += num_bytes; | ||
5910 | delayed_rsv->full = 0; | ||
5911 | spin_unlock(&delayed_rsv->lock); | ||
5912 | trans->delayed_ref_updates = 0; | ||
5913 | } | ||
5914 | |||
5915 | /* | ||
5916 | * To be called after all the new block groups attached to the transaction | ||
5917 | * handle have been created (btrfs_create_pending_block_groups()). | ||
5918 | */ | ||
5919 | void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) | ||
5920 | { | ||
5921 | struct btrfs_fs_info *fs_info = trans->fs_info; | ||
5922 | |||
5923 | if (!trans->chunk_bytes_reserved) | ||
5924 | return; | ||
5925 | |||
5926 | WARN_ON_ONCE(!list_empty(&trans->new_bgs)); | ||
5927 | |||
5928 | block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL, | ||
5929 | trans->chunk_bytes_reserved, NULL); | ||
5930 | trans->chunk_bytes_reserved = 0; | ||
5931 | } | ||
5932 | |||
5933 | /* | ||
5934 | * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation | ||
5935 | * root: the root of the parent directory | ||
5936 | * rsv: block reservation | ||
5937 | * items: the number of items that we need do reservation | ||
5938 | * use_global_rsv: allow fallback to the global block reservation | ||
5939 | * | ||
5940 | * This function is used to reserve the space for snapshot/subvolume | ||
5941 | * creation and deletion. Those operations are different with the | ||
5942 | * common file/directory operations, they change two fs/file trees | ||
5943 | * and root tree, the number of items that the qgroup reserves is | ||
5944 | * different with the free space reservation. So we can not use | ||
5945 | * the space reservation mechanism in start_transaction(). | ||
5946 | */ | ||
5947 | int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, | ||
5948 | struct btrfs_block_rsv *rsv, int items, | ||
5949 | bool use_global_rsv) | ||
5950 | { | ||
5951 | u64 qgroup_num_bytes = 0; | ||
5952 | u64 num_bytes; | ||
5953 | int ret; | ||
5954 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
5955 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
5956 | |||
5957 | if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { | ||
5958 | /* One for parent inode, two for dir entries */ | ||
5959 | qgroup_num_bytes = 3 * fs_info->nodesize; | ||
5960 | ret = btrfs_qgroup_reserve_meta_prealloc(root, | ||
5961 | qgroup_num_bytes, true); | ||
5962 | if (ret) | ||
5963 | return ret; | ||
5964 | } | ||
5965 | |||
5966 | num_bytes = btrfs_calc_trans_metadata_size(fs_info, items); | ||
5967 | rsv->space_info = __find_space_info(fs_info, | ||
5968 | BTRFS_BLOCK_GROUP_METADATA); | ||
5969 | ret = btrfs_block_rsv_add(root, rsv, num_bytes, | ||
5970 | BTRFS_RESERVE_FLUSH_ALL); | ||
5971 | |||
5972 | if (ret == -ENOSPC && use_global_rsv) | ||
5973 | ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true); | ||
5974 | |||
5975 | if (ret && qgroup_num_bytes) | ||
5976 | btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes); | ||
5977 | |||
5978 | return ret; | ||
5979 | } | ||
5980 | |||
5981 | void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, | ||
5982 | struct btrfs_block_rsv *rsv) | ||
5983 | { | ||
5984 | btrfs_block_rsv_release(fs_info, rsv, (u64)-1); | ||
5985 | } | ||
5986 | |||
5987 | static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, | ||
5988 | struct btrfs_inode *inode) | ||
5989 | { | ||
5990 | struct btrfs_block_rsv *block_rsv = &inode->block_rsv; | ||
5991 | u64 reserve_size = 0; | ||
5992 | u64 qgroup_rsv_size = 0; | ||
5993 | u64 csum_leaves; | ||
5994 | unsigned outstanding_extents; | ||
5995 | |||
5996 | lockdep_assert_held(&inode->lock); | ||
5997 | outstanding_extents = inode->outstanding_extents; | ||
5998 | if (outstanding_extents) | ||
5999 | reserve_size = btrfs_calc_trans_metadata_size(fs_info, | ||
6000 | outstanding_extents + 1); | ||
6001 | csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, | ||
6002 | inode->csum_bytes); | ||
6003 | reserve_size += btrfs_calc_trans_metadata_size(fs_info, | ||
6004 | csum_leaves); | ||
6005 | /* | ||
6006 | * For qgroup rsv, the calculation is very simple: | ||
6007 | * account one nodesize for each outstanding extent | ||
6008 | * | ||
6009 | * This is overestimating in most cases. | ||
6010 | */ | ||
6011 | qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize; | ||
6012 | |||
6013 | spin_lock(&block_rsv->lock); | ||
6014 | block_rsv->size = reserve_size; | ||
6015 | block_rsv->qgroup_rsv_size = qgroup_rsv_size; | ||
6016 | spin_unlock(&block_rsv->lock); | ||
6017 | } | ||
6018 | |||
6019 | static void calc_inode_reservations(struct btrfs_fs_info *fs_info, | ||
6020 | u64 num_bytes, u64 *meta_reserve, | ||
6021 | u64 *qgroup_reserve) | ||
6022 | { | ||
6023 | u64 nr_extents = count_max_extents(num_bytes); | ||
6024 | u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes); | ||
6025 | |||
6026 | /* We add one for the inode update at finish ordered time */ | ||
6027 | *meta_reserve = btrfs_calc_trans_metadata_size(fs_info, | ||
6028 | nr_extents + csum_leaves + 1); | ||
6029 | *qgroup_reserve = nr_extents * fs_info->nodesize; | ||
6030 | } | ||
6031 | |||
6032 | int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) | ||
6033 | { | ||
6034 | struct btrfs_root *root = inode->root; | ||
6035 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
6036 | struct btrfs_block_rsv *block_rsv = &inode->block_rsv; | ||
6037 | u64 meta_reserve, qgroup_reserve; | ||
6038 | unsigned nr_extents; | ||
6039 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; | ||
6040 | int ret = 0; | ||
6041 | bool delalloc_lock = true; | ||
6042 | |||
6043 | /* If we are a free space inode we need to not flush since we will be in | ||
6044 | * the middle of a transaction commit. We also don't need the delalloc | ||
6045 | * mutex since we won't race with anybody. We need this mostly to make | ||
6046 | * lockdep shut its filthy mouth. | ||
6047 | * | ||
6048 | * If we have a transaction open (can happen if we call truncate_block | ||
6049 | * from truncate), then we need FLUSH_LIMIT so we don't deadlock. | ||
6050 | */ | ||
6051 | if (btrfs_is_free_space_inode(inode)) { | ||
6052 | flush = BTRFS_RESERVE_NO_FLUSH; | ||
6053 | delalloc_lock = false; | ||
6054 | } else { | ||
6055 | if (current->journal_info) | ||
6056 | flush = BTRFS_RESERVE_FLUSH_LIMIT; | ||
6057 | |||
6058 | if (btrfs_transaction_in_commit(fs_info)) | ||
6059 | schedule_timeout(1); | ||
6060 | } | ||
6061 | |||
6062 | if (delalloc_lock) | ||
6063 | mutex_lock(&inode->delalloc_mutex); | ||
6064 | |||
6065 | num_bytes = ALIGN(num_bytes, fs_info->sectorsize); | ||
6066 | |||
6067 | /* | ||
6068 | * We always want to do it this way, every other way is wrong and ends | ||
6069 | * in tears. Pre-reserving the amount we are going to add will always | ||
6070 | * be the right way, because otherwise if we have enough parallelism we | ||
6071 | * could end up with thousands of inodes all holding little bits of | ||
6072 | * reservations they were able to make previously and the only way to | ||
6073 | * reclaim that space is to ENOSPC out the operations and clear | ||
6074 | * everything out and try again, which is bad. This way we just | ||
6075 | * over-reserve slightly, and clean up the mess when we are done. | ||
6076 | */ | ||
6077 | calc_inode_reservations(fs_info, num_bytes, &meta_reserve, | ||
6078 | &qgroup_reserve); | ||
6079 | ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true); | ||
6080 | if (ret) | ||
6081 | goto out_fail; | ||
6082 | ret = reserve_metadata_bytes(root, block_rsv, meta_reserve, flush); | ||
6083 | if (ret) | ||
6084 | goto out_qgroup; | ||
6085 | |||
6086 | /* | ||
6087 | * Now we need to update our outstanding extents and csum bytes _first_ | ||
6088 | * and then add the reservation to the block_rsv. This keeps us from | ||
6089 | * racing with an ordered completion or some such that would think it | ||
6090 | * needs to free the reservation we just made. | ||
6091 | */ | ||
6092 | spin_lock(&inode->lock); | ||
6093 | nr_extents = count_max_extents(num_bytes); | ||
6094 | btrfs_mod_outstanding_extents(inode, nr_extents); | ||
6095 | inode->csum_bytes += num_bytes; | ||
6096 | btrfs_calculate_inode_block_rsv_size(fs_info, inode); | ||
6097 | spin_unlock(&inode->lock); | ||
6098 | |||
6099 | /* Now we can safely add our space to our block rsv */ | ||
6100 | block_rsv_add_bytes(block_rsv, meta_reserve, false); | ||
6101 | trace_btrfs_space_reservation(root->fs_info, "delalloc", | ||
6102 | btrfs_ino(inode), meta_reserve, 1); | ||
6103 | |||
6104 | spin_lock(&block_rsv->lock); | ||
6105 | block_rsv->qgroup_rsv_reserved += qgroup_reserve; | ||
6106 | spin_unlock(&block_rsv->lock); | ||
6107 | |||
6108 | if (delalloc_lock) | ||
6109 | mutex_unlock(&inode->delalloc_mutex); | ||
6110 | return 0; | ||
6111 | out_qgroup: | ||
6112 | btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve); | ||
6113 | out_fail: | ||
6114 | btrfs_inode_rsv_release(inode, true); | ||
6115 | if (delalloc_lock) | ||
6116 | mutex_unlock(&inode->delalloc_mutex); | ||
6117 | return ret; | ||
6118 | } | ||
6119 | |||
6120 | /** | ||
6121 | * btrfs_delalloc_release_metadata - release a metadata reservation for an inode | ||
6122 | * @inode: the inode to release the reservation for. | ||
6123 | * @num_bytes: the number of bytes we are releasing. | ||
6124 | * @qgroup_free: free qgroup reservation or convert it to per-trans reservation | ||
6125 | * | ||
6126 | * This will release the metadata reservation for an inode. This can be called | ||
6127 | * once we complete IO for a given set of bytes to release their metadata | ||
6128 | * reservations, or on error for the same reason. | ||
6129 | */ | ||
6130 | void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, | ||
6131 | bool qgroup_free) | ||
6132 | { | ||
6133 | struct btrfs_fs_info *fs_info = inode->root->fs_info; | ||
6134 | |||
6135 | num_bytes = ALIGN(num_bytes, fs_info->sectorsize); | ||
6136 | spin_lock(&inode->lock); | ||
6137 | inode->csum_bytes -= num_bytes; | ||
6138 | btrfs_calculate_inode_block_rsv_size(fs_info, inode); | ||
6139 | spin_unlock(&inode->lock); | ||
6140 | |||
6141 | if (btrfs_is_testing(fs_info)) | ||
6142 | return; | ||
6143 | |||
6144 | btrfs_inode_rsv_release(inode, qgroup_free); | ||
6145 | } | ||
6146 | |||
6147 | /** | ||
6148 | * btrfs_delalloc_release_extents - release our outstanding_extents | ||
6149 | * @inode: the inode to balance the reservation for. | ||
6150 | * @num_bytes: the number of bytes we originally reserved with | ||
6151 | * @qgroup_free: do we need to free qgroup meta reservation or convert them. | ||
6152 | * | ||
6153 | * When we reserve space we increase outstanding_extents for the extents we may | ||
6154 | * add. Once we've set the range as delalloc or created our ordered extents we | ||
6155 | * have outstanding_extents to track the real usage, so we use this to free our | ||
6156 | * temporarily tracked outstanding_extents. This _must_ be used in conjunction | ||
6157 | * with btrfs_delalloc_reserve_metadata. | ||
6158 | */ | ||
6159 | void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, | ||
6160 | bool qgroup_free) | ||
6161 | { | ||
6162 | struct btrfs_fs_info *fs_info = inode->root->fs_info; | ||
6163 | unsigned num_extents; | ||
6164 | |||
6165 | spin_lock(&inode->lock); | ||
6166 | num_extents = count_max_extents(num_bytes); | ||
6167 | btrfs_mod_outstanding_extents(inode, -num_extents); | ||
6168 | btrfs_calculate_inode_block_rsv_size(fs_info, inode); | ||
6169 | spin_unlock(&inode->lock); | ||
6170 | |||
6171 | if (btrfs_is_testing(fs_info)) | ||
6172 | return; | ||
6173 | |||
6174 | btrfs_inode_rsv_release(inode, qgroup_free); | ||
6175 | } | ||
6176 | |||
6177 | /** | ||
6178 | * btrfs_delalloc_reserve_space - reserve data and metadata space for | ||
6179 | * delalloc | ||
6180 | * @inode: inode we're writing to | ||
6181 | * @start: start range we are writing to | ||
6182 | * @len: how long the range we are writing to | ||
6183 | * @reserved: mandatory parameter, record actually reserved qgroup ranges of | ||
6184 | * current reservation. | ||
6185 | * | ||
6186 | * This will do the following things | ||
6187 | * | ||
6188 | * o reserve space in data space info for num bytes | ||
6189 | * and reserve precious corresponding qgroup space | ||
6190 | * (Done in check_data_free_space) | ||
6191 | * | ||
6192 | * o reserve space for metadata space, based on the number of outstanding | ||
6193 | * extents and how much csums will be needed | ||
6194 | * also reserve metadata space in a per root over-reserve method. | ||
6195 | * o add to the inodes->delalloc_bytes | ||
6196 | * o add it to the fs_info's delalloc inodes list. | ||
6197 | * (Above 3 all done in delalloc_reserve_metadata) | ||
6198 | * | ||
6199 | * Return 0 for success | ||
6200 | * Return <0 for error(-ENOSPC or -EQUOT) | ||
6201 | */ | ||
6202 | int btrfs_delalloc_reserve_space(struct inode *inode, | ||
6203 | struct extent_changeset **reserved, u64 start, u64 len) | ||
6204 | { | ||
6205 | int ret; | ||
6206 | |||
6207 | ret = btrfs_check_data_free_space(inode, reserved, start, len); | ||
6208 | if (ret < 0) | ||
6209 | return ret; | ||
6210 | ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len); | ||
6211 | if (ret < 0) | ||
6212 | btrfs_free_reserved_data_space(inode, *reserved, start, len); | ||
6213 | return ret; | ||
6214 | } | ||
6215 | |||
6216 | /** | ||
6217 | * btrfs_delalloc_release_space - release data and metadata space for delalloc | ||
6218 | * @inode: inode we're releasing space for | ||
6219 | * @start: start position of the space already reserved | ||
6220 | * @len: the len of the space already reserved | ||
6221 | * @release_bytes: the len of the space we consumed or didn't use | ||
6222 | * | ||
6223 | * This function will release the metadata space that was not used and will | ||
6224 | * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes | ||
6225 | * list if there are no delalloc bytes left. | ||
6226 | * Also it will handle the qgroup reserved space. | ||
6227 | */ | ||
6228 | void btrfs_delalloc_release_space(struct inode *inode, | ||
6229 | struct extent_changeset *reserved, | ||
6230 | u64 start, u64 len, bool qgroup_free) | ||
6231 | { | ||
6232 | btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free); | ||
6233 | btrfs_free_reserved_data_space(inode, reserved, start, len); | ||
6234 | } | ||
6235 | |||
6236 | static int update_block_group(struct btrfs_trans_handle *trans, | 4108 | static int update_block_group(struct btrfs_trans_handle *trans, |
6237 | u64 bytenr, u64 num_bytes, int alloc) | 4109 | u64 bytenr, u64 num_bytes, int alloc) |
6238 | { | 4110 | { |
@@ -6296,7 +4168,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
6296 | old_val -= num_bytes; | 4168 | old_val -= num_bytes; |
6297 | btrfs_set_block_group_used(&cache->item, old_val); | 4169 | btrfs_set_block_group_used(&cache->item, old_val); |
6298 | cache->pinned += num_bytes; | 4170 | cache->pinned += num_bytes; |
6299 | update_bytes_pinned(cache->space_info, num_bytes); | 4171 | btrfs_space_info_update_bytes_pinned(info, |
4172 | cache->space_info, num_bytes); | ||
6300 | cache->space_info->bytes_used -= num_bytes; | 4173 | cache->space_info->bytes_used -= num_bytes; |
6301 | cache->space_info->disk_used -= num_bytes * factor; | 4174 | cache->space_info->disk_used -= num_bytes * factor; |
6302 | spin_unlock(&cache->lock); | 4175 | spin_unlock(&cache->lock); |
@@ -6371,7 +4244,8 @@ static int pin_down_extent(struct btrfs_block_group_cache *cache, | |||
6371 | spin_lock(&cache->space_info->lock); | 4244 | spin_lock(&cache->space_info->lock); |
6372 | spin_lock(&cache->lock); | 4245 | spin_lock(&cache->lock); |
6373 | cache->pinned += num_bytes; | 4246 | cache->pinned += num_bytes; |
6374 | update_bytes_pinned(cache->space_info, num_bytes); | 4247 | btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info, |
4248 | num_bytes); | ||
6375 | if (reserved) { | 4249 | if (reserved) { |
6376 | cache->reserved -= num_bytes; | 4250 | cache->reserved -= num_bytes; |
6377 | cache->space_info->bytes_reserved -= num_bytes; | 4251 | cache->space_info->bytes_reserved -= num_bytes; |
@@ -6580,7 +4454,8 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, | |||
6580 | } else { | 4454 | } else { |
6581 | cache->reserved += num_bytes; | 4455 | cache->reserved += num_bytes; |
6582 | space_info->bytes_reserved += num_bytes; | 4456 | space_info->bytes_reserved += num_bytes; |
6583 | update_bytes_may_use(space_info, -ram_bytes); | 4457 | btrfs_space_info_update_bytes_may_use(cache->fs_info, |
4458 | space_info, -ram_bytes); | ||
6584 | if (delalloc) | 4459 | if (delalloc) |
6585 | cache->delalloc_bytes += num_bytes; | 4460 | cache->delalloc_bytes += num_bytes; |
6586 | } | 4461 | } |
@@ -6646,7 +4521,7 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info) | |||
6646 | 4521 | ||
6647 | up_write(&fs_info->commit_root_sem); | 4522 | up_write(&fs_info->commit_root_sem); |
6648 | 4523 | ||
6649 | update_global_block_rsv(fs_info); | 4524 | btrfs_update_global_block_rsv(fs_info); |
6650 | } | 4525 | } |
6651 | 4526 | ||
6652 | /* | 4527 | /* |
@@ -6736,7 +4611,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, | |||
6736 | spin_lock(&space_info->lock); | 4611 | spin_lock(&space_info->lock); |
6737 | spin_lock(&cache->lock); | 4612 | spin_lock(&cache->lock); |
6738 | cache->pinned -= len; | 4613 | cache->pinned -= len; |
6739 | update_bytes_pinned(space_info, -len); | 4614 | btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len); |
6740 | 4615 | ||
6741 | trace_btrfs_space_reservation(fs_info, "pinned", | 4616 | trace_btrfs_space_reservation(fs_info, "pinned", |
6742 | space_info->flags, len, 0); | 4617 | space_info->flags, len, 0); |
@@ -6757,7 +4632,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, | |||
6757 | to_add = min(len, global_rsv->size - | 4632 | to_add = min(len, global_rsv->size - |
6758 | global_rsv->reserved); | 4633 | global_rsv->reserved); |
6759 | global_rsv->reserved += to_add; | 4634 | global_rsv->reserved += to_add; |
6760 | update_bytes_may_use(space_info, to_add); | 4635 | btrfs_space_info_update_bytes_may_use(fs_info, |
4636 | space_info, to_add); | ||
6761 | if (global_rsv->reserved >= global_rsv->size) | 4637 | if (global_rsv->reserved >= global_rsv->size) |
6762 | global_rsv->full = 1; | 4638 | global_rsv->full = 1; |
6763 | trace_btrfs_space_reservation(fs_info, | 4639 | trace_btrfs_space_reservation(fs_info, |
@@ -6769,8 +4645,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, | |||
6769 | spin_unlock(&global_rsv->lock); | 4645 | spin_unlock(&global_rsv->lock); |
6770 | /* Add to any tickets we may have */ | 4646 | /* Add to any tickets we may have */ |
6771 | if (len) | 4647 | if (len) |
6772 | space_info_add_new_bytes(fs_info, space_info, | 4648 | btrfs_space_info_add_new_bytes(fs_info, |
6773 | len); | 4649 | space_info, len); |
6774 | } | 4650 | } |
6775 | spin_unlock(&space_info->lock); | 4651 | spin_unlock(&space_info->lock); |
6776 | } | 4652 | } |
@@ -7191,7 +5067,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
7191 | } | 5067 | } |
7192 | out: | 5068 | out: |
7193 | if (pin) | 5069 | if (pin) |
7194 | add_pinned_bytes(fs_info, &generic_ref, 1); | 5070 | add_pinned_bytes(fs_info, &generic_ref); |
7195 | 5071 | ||
7196 | if (last_ref) { | 5072 | if (last_ref) { |
7197 | /* | 5073 | /* |
@@ -7239,7 +5115,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref) | |||
7239 | btrfs_ref_tree_mod(fs_info, ref); | 5115 | btrfs_ref_tree_mod(fs_info, ref); |
7240 | 5116 | ||
7241 | if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) | 5117 | if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) |
7242 | add_pinned_bytes(fs_info, ref, 1); | 5118 | add_pinned_bytes(fs_info, ref); |
7243 | 5119 | ||
7244 | return ret; | 5120 | return ret; |
7245 | } | 5121 | } |
@@ -7292,10 +5168,10 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
7292 | } | 5168 | } |
7293 | 5169 | ||
7294 | enum btrfs_loop_type { | 5170 | enum btrfs_loop_type { |
7295 | LOOP_CACHING_NOWAIT = 0, | 5171 | LOOP_CACHING_NOWAIT, |
7296 | LOOP_CACHING_WAIT = 1, | 5172 | LOOP_CACHING_WAIT, |
7297 | LOOP_ALLOC_CHUNK = 2, | 5173 | LOOP_ALLOC_CHUNK, |
7298 | LOOP_NO_EMPTY_SIZE = 3, | 5174 | LOOP_NO_EMPTY_SIZE, |
7299 | }; | 5175 | }; |
7300 | 5176 | ||
7301 | static inline void | 5177 | static inline void |
@@ -7661,8 +5537,8 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, | |||
7661 | return ret; | 5537 | return ret; |
7662 | } | 5538 | } |
7663 | 5539 | ||
7664 | ret = do_chunk_alloc(trans, ffe_ctl->flags, | 5540 | ret = btrfs_chunk_alloc(trans, ffe_ctl->flags, |
7665 | CHUNK_ALLOC_FORCE); | 5541 | CHUNK_ALLOC_FORCE); |
7666 | 5542 | ||
7667 | /* | 5543 | /* |
7668 | * If we can't allocate a new chunk we've already looped | 5544 | * If we can't allocate a new chunk we've already looped |
@@ -7758,7 +5634,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, | |||
7758 | 5634 | ||
7759 | trace_find_free_extent(fs_info, num_bytes, empty_size, flags); | 5635 | trace_find_free_extent(fs_info, num_bytes, empty_size, flags); |
7760 | 5636 | ||
7761 | space_info = __find_space_info(fs_info, flags); | 5637 | space_info = btrfs_find_space_info(fs_info, flags); |
7762 | if (!space_info) { | 5638 | if (!space_info) { |
7763 | btrfs_err(fs_info, "No space info for %llu", flags); | 5639 | btrfs_err(fs_info, "No space info for %llu", flags); |
7764 | return -ENOSPC; | 5640 | return -ENOSPC; |
@@ -7863,9 +5739,8 @@ search: | |||
7863 | */ | 5739 | */ |
7864 | if (!block_group_bits(block_group, flags)) { | 5740 | if (!block_group_bits(block_group, flags)) { |
7865 | u64 extra = BTRFS_BLOCK_GROUP_DUP | | 5741 | u64 extra = BTRFS_BLOCK_GROUP_DUP | |
7866 | BTRFS_BLOCK_GROUP_RAID1 | | 5742 | BTRFS_BLOCK_GROUP_RAID1_MASK | |
7867 | BTRFS_BLOCK_GROUP_RAID5 | | 5743 | BTRFS_BLOCK_GROUP_RAID56_MASK | |
7868 | BTRFS_BLOCK_GROUP_RAID6 | | ||
7869 | BTRFS_BLOCK_GROUP_RAID10; | 5744 | BTRFS_BLOCK_GROUP_RAID10; |
7870 | 5745 | ||
7871 | /* | 5746 | /* |
@@ -7984,60 +5859,6 @@ loop: | |||
7984 | return ret; | 5859 | return ret; |
7985 | } | 5860 | } |
7986 | 5861 | ||
7987 | #define DUMP_BLOCK_RSV(fs_info, rsv_name) \ | ||
7988 | do { \ | ||
7989 | struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \ | ||
7990 | spin_lock(&__rsv->lock); \ | ||
7991 | btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \ | ||
7992 | __rsv->size, __rsv->reserved); \ | ||
7993 | spin_unlock(&__rsv->lock); \ | ||
7994 | } while (0) | ||
7995 | |||
7996 | static void dump_space_info(struct btrfs_fs_info *fs_info, | ||
7997 | struct btrfs_space_info *info, u64 bytes, | ||
7998 | int dump_block_groups) | ||
7999 | { | ||
8000 | struct btrfs_block_group_cache *cache; | ||
8001 | int index = 0; | ||
8002 | |||
8003 | spin_lock(&info->lock); | ||
8004 | btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull", | ||
8005 | info->flags, | ||
8006 | info->total_bytes - btrfs_space_info_used(info, true), | ||
8007 | info->full ? "" : "not "); | ||
8008 | btrfs_info(fs_info, | ||
8009 | "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu", | ||
8010 | info->total_bytes, info->bytes_used, info->bytes_pinned, | ||
8011 | info->bytes_reserved, info->bytes_may_use, | ||
8012 | info->bytes_readonly); | ||
8013 | spin_unlock(&info->lock); | ||
8014 | |||
8015 | DUMP_BLOCK_RSV(fs_info, global_block_rsv); | ||
8016 | DUMP_BLOCK_RSV(fs_info, trans_block_rsv); | ||
8017 | DUMP_BLOCK_RSV(fs_info, chunk_block_rsv); | ||
8018 | DUMP_BLOCK_RSV(fs_info, delayed_block_rsv); | ||
8019 | DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv); | ||
8020 | |||
8021 | if (!dump_block_groups) | ||
8022 | return; | ||
8023 | |||
8024 | down_read(&info->groups_sem); | ||
8025 | again: | ||
8026 | list_for_each_entry(cache, &info->block_groups[index], list) { | ||
8027 | spin_lock(&cache->lock); | ||
8028 | btrfs_info(fs_info, | ||
8029 | "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s", | ||
8030 | cache->key.objectid, cache->key.offset, | ||
8031 | btrfs_block_group_used(&cache->item), cache->pinned, | ||
8032 | cache->reserved, cache->ro ? "[readonly]" : ""); | ||
8033 | btrfs_dump_free_space(cache, bytes); | ||
8034 | spin_unlock(&cache->lock); | ||
8035 | } | ||
8036 | if (++index < BTRFS_NR_RAID_TYPES) | ||
8037 | goto again; | ||
8038 | up_read(&info->groups_sem); | ||
8039 | } | ||
8040 | |||
8041 | /* | 5862 | /* |
8042 | * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a | 5863 | * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a |
8043 | * hole that is at least as big as @num_bytes. | 5864 | * hole that is at least as big as @num_bytes. |
@@ -8113,12 +5934,13 @@ again: | |||
8113 | } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { | 5934 | } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { |
8114 | struct btrfs_space_info *sinfo; | 5935 | struct btrfs_space_info *sinfo; |
8115 | 5936 | ||
8116 | sinfo = __find_space_info(fs_info, flags); | 5937 | sinfo = btrfs_find_space_info(fs_info, flags); |
8117 | btrfs_err(fs_info, | 5938 | btrfs_err(fs_info, |
8118 | "allocation failed flags %llu, wanted %llu", | 5939 | "allocation failed flags %llu, wanted %llu", |
8119 | flags, num_bytes); | 5940 | flags, num_bytes); |
8120 | if (sinfo) | 5941 | if (sinfo) |
8121 | dump_space_info(fs_info, sinfo, num_bytes, 1); | 5942 | btrfs_dump_space_info(fs_info, sinfo, |
5943 | num_bytes, 1); | ||
8122 | } | 5944 | } |
8123 | } | 5945 | } |
8124 | 5946 | ||
@@ -8456,73 +6278,6 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
8456 | return buf; | 6278 | return buf; |
8457 | } | 6279 | } |
8458 | 6280 | ||
8459 | static struct btrfs_block_rsv * | ||
8460 | use_block_rsv(struct btrfs_trans_handle *trans, | ||
8461 | struct btrfs_root *root, u32 blocksize) | ||
8462 | { | ||
8463 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
8464 | struct btrfs_block_rsv *block_rsv; | ||
8465 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
8466 | int ret; | ||
8467 | bool global_updated = false; | ||
8468 | |||
8469 | block_rsv = get_block_rsv(trans, root); | ||
8470 | |||
8471 | if (unlikely(block_rsv->size == 0)) | ||
8472 | goto try_reserve; | ||
8473 | again: | ||
8474 | ret = block_rsv_use_bytes(block_rsv, blocksize); | ||
8475 | if (!ret) | ||
8476 | return block_rsv; | ||
8477 | |||
8478 | if (block_rsv->failfast) | ||
8479 | return ERR_PTR(ret); | ||
8480 | |||
8481 | if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) { | ||
8482 | global_updated = true; | ||
8483 | update_global_block_rsv(fs_info); | ||
8484 | goto again; | ||
8485 | } | ||
8486 | |||
8487 | /* | ||
8488 | * The global reserve still exists to save us from ourselves, so don't | ||
8489 | * warn_on if we are short on our delayed refs reserve. | ||
8490 | */ | ||
8491 | if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS && | ||
8492 | btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { | ||
8493 | static DEFINE_RATELIMIT_STATE(_rs, | ||
8494 | DEFAULT_RATELIMIT_INTERVAL * 10, | ||
8495 | /*DEFAULT_RATELIMIT_BURST*/ 1); | ||
8496 | if (__ratelimit(&_rs)) | ||
8497 | WARN(1, KERN_DEBUG | ||
8498 | "BTRFS: block rsv returned %d\n", ret); | ||
8499 | } | ||
8500 | try_reserve: | ||
8501 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, | ||
8502 | BTRFS_RESERVE_NO_FLUSH); | ||
8503 | if (!ret) | ||
8504 | return block_rsv; | ||
8505 | /* | ||
8506 | * If we couldn't reserve metadata bytes try and use some from | ||
8507 | * the global reserve if its space type is the same as the global | ||
8508 | * reservation. | ||
8509 | */ | ||
8510 | if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL && | ||
8511 | block_rsv->space_info == global_rsv->space_info) { | ||
8512 | ret = block_rsv_use_bytes(global_rsv, blocksize); | ||
8513 | if (!ret) | ||
8514 | return global_rsv; | ||
8515 | } | ||
8516 | return ERR_PTR(ret); | ||
8517 | } | ||
8518 | |||
8519 | static void unuse_block_rsv(struct btrfs_fs_info *fs_info, | ||
8520 | struct btrfs_block_rsv *block_rsv, u32 blocksize) | ||
8521 | { | ||
8522 | block_rsv_add_bytes(block_rsv, blocksize, false); | ||
8523 | block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL); | ||
8524 | } | ||
8525 | |||
8526 | /* | 6281 | /* |
8527 | * finds a free extent and does all the dirty work required for allocation | 6282 | * finds a free extent and does all the dirty work required for allocation |
8528 | * returns the tree buffer or an ERR_PTR on error. | 6283 | * returns the tree buffer or an ERR_PTR on error. |
@@ -8555,7 +6310,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, | |||
8555 | } | 6310 | } |
8556 | #endif | 6311 | #endif |
8557 | 6312 | ||
8558 | block_rsv = use_block_rsv(trans, root, blocksize); | 6313 | block_rsv = btrfs_use_block_rsv(trans, root, blocksize); |
8559 | if (IS_ERR(block_rsv)) | 6314 | if (IS_ERR(block_rsv)) |
8560 | return ERR_CAST(block_rsv); | 6315 | return ERR_CAST(block_rsv); |
8561 | 6316 | ||
@@ -8613,7 +6368,7 @@ out_free_buf: | |||
8613 | out_free_reserved: | 6368 | out_free_reserved: |
8614 | btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0); | 6369 | btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0); |
8615 | out_unuse: | 6370 | out_unuse: |
8616 | unuse_block_rsv(fs_info, block_rsv, blocksize); | 6371 | btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize); |
8617 | return ERR_PTR(ret); | 6372 | return ERR_PTR(ret); |
8618 | } | 6373 | } |
8619 | 6374 | ||
@@ -9552,9 +7307,8 @@ static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags) | |||
9552 | 7307 | ||
9553 | num_devices = fs_info->fs_devices->rw_devices; | 7308 | num_devices = fs_info->fs_devices->rw_devices; |
9554 | 7309 | ||
9555 | stripped = BTRFS_BLOCK_GROUP_RAID0 | | 7310 | stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK | |
9556 | BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | | 7311 | BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10; |
9557 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; | ||
9558 | 7312 | ||
9559 | if (num_devices == 1) { | 7313 | if (num_devices == 1) { |
9560 | stripped |= BTRFS_BLOCK_GROUP_DUP; | 7314 | stripped |= BTRFS_BLOCK_GROUP_DUP; |
@@ -9565,7 +7319,7 @@ static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags) | |||
9565 | return stripped; | 7319 | return stripped; |
9566 | 7320 | ||
9567 | /* turn mirroring into duplication */ | 7321 | /* turn mirroring into duplication */ |
9568 | if (flags & (BTRFS_BLOCK_GROUP_RAID1 | | 7322 | if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK | |
9569 | BTRFS_BLOCK_GROUP_RAID10)) | 7323 | BTRFS_BLOCK_GROUP_RAID10)) |
9570 | return stripped | BTRFS_BLOCK_GROUP_DUP; | 7324 | return stripped | BTRFS_BLOCK_GROUP_DUP; |
9571 | } else { | 7325 | } else { |
@@ -9636,7 +7390,7 @@ out: | |||
9636 | btrfs_info(cache->fs_info, | 7390 | btrfs_info(cache->fs_info, |
9637 | "sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu", | 7391 | "sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu", |
9638 | sinfo_used, num_bytes, min_allocable_bytes); | 7392 | sinfo_used, num_bytes, min_allocable_bytes); |
9639 | dump_space_info(cache->fs_info, cache->space_info, 0, 0); | 7393 | btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0); |
9640 | } | 7394 | } |
9641 | return ret; | 7395 | return ret; |
9642 | } | 7396 | } |
@@ -9678,8 +7432,7 @@ again: | |||
9678 | */ | 7432 | */ |
9679 | alloc_flags = update_block_group_flags(fs_info, cache->flags); | 7433 | alloc_flags = update_block_group_flags(fs_info, cache->flags); |
9680 | if (alloc_flags != cache->flags) { | 7434 | if (alloc_flags != cache->flags) { |
9681 | ret = do_chunk_alloc(trans, alloc_flags, | 7435 | ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); |
9682 | CHUNK_ALLOC_FORCE); | ||
9683 | /* | 7436 | /* |
9684 | * ENOSPC is allowed here, we may have enough space | 7437 | * ENOSPC is allowed here, we may have enough space |
9685 | * already allocated at the new raid level to | 7438 | * already allocated at the new raid level to |
@@ -9695,7 +7448,7 @@ again: | |||
9695 | if (!ret) | 7448 | if (!ret) |
9696 | goto out; | 7449 | goto out; |
9697 | alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags); | 7450 | alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags); |
9698 | ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); | 7451 | ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); |
9699 | if (ret < 0) | 7452 | if (ret < 0) |
9700 | goto out; | 7453 | goto out; |
9701 | ret = inc_block_group_ro(cache, 0); | 7454 | ret = inc_block_group_ro(cache, 0); |
@@ -9716,7 +7469,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type) | |||
9716 | { | 7469 | { |
9717 | u64 alloc_flags = get_alloc_profile(trans->fs_info, type); | 7470 | u64 alloc_flags = get_alloc_profile(trans->fs_info, type); |
9718 | 7471 | ||
9719 | return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); | 7472 | return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); |
9720 | } | 7473 | } |
9721 | 7474 | ||
9722 | /* | 7475 | /* |
@@ -9949,7 +7702,7 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info, | |||
9949 | struct extent_map_tree *em_tree; | 7702 | struct extent_map_tree *em_tree; |
9950 | struct extent_map *em; | 7703 | struct extent_map *em; |
9951 | 7704 | ||
9952 | em_tree = &root->fs_info->mapping_tree.map_tree; | 7705 | em_tree = &root->fs_info->mapping_tree; |
9953 | read_lock(&em_tree->lock); | 7706 | read_lock(&em_tree->lock); |
9954 | em = lookup_extent_mapping(em_tree, found_key.objectid, | 7707 | em = lookup_extent_mapping(em_tree, found_key.objectid, |
9955 | found_key.offset); | 7708 | found_key.offset); |
@@ -10102,7 +7855,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
10102 | */ | 7855 | */ |
10103 | synchronize_rcu(); | 7856 | synchronize_rcu(); |
10104 | 7857 | ||
10105 | release_global_block_rsv(info); | 7858 | btrfs_release_global_block_rsv(info); |
10106 | 7859 | ||
10107 | while (!list_empty(&info->space_info)) { | 7860 | while (!list_empty(&info->space_info)) { |
10108 | int i; | 7861 | int i; |
@@ -10118,7 +7871,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
10118 | if (WARN_ON(space_info->bytes_pinned > 0 || | 7871 | if (WARN_ON(space_info->bytes_pinned > 0 || |
10119 | space_info->bytes_reserved > 0 || | 7872 | space_info->bytes_reserved > 0 || |
10120 | space_info->bytes_may_use > 0)) | 7873 | space_info->bytes_may_use > 0)) |
10121 | dump_space_info(info, space_info, 0, 0); | 7874 | btrfs_dump_space_info(info, space_info, 0, 0); |
10122 | list_del(&space_info->list); | 7875 | list_del(&space_info->list); |
10123 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { | 7876 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { |
10124 | struct kobject *kobj; | 7877 | struct kobject *kobj; |
@@ -10141,7 +7894,6 @@ void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info) | |||
10141 | struct btrfs_space_info *space_info; | 7894 | struct btrfs_space_info *space_info; |
10142 | struct raid_kobject *rkobj; | 7895 | struct raid_kobject *rkobj; |
10143 | LIST_HEAD(list); | 7896 | LIST_HEAD(list); |
10144 | int index; | ||
10145 | int ret = 0; | 7897 | int ret = 0; |
10146 | 7898 | ||
10147 | spin_lock(&fs_info->pending_raid_kobjs_lock); | 7899 | spin_lock(&fs_info->pending_raid_kobjs_lock); |
@@ -10149,11 +7901,10 @@ void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info) | |||
10149 | spin_unlock(&fs_info->pending_raid_kobjs_lock); | 7901 | spin_unlock(&fs_info->pending_raid_kobjs_lock); |
10150 | 7902 | ||
10151 | list_for_each_entry(rkobj, &list, list) { | 7903 | list_for_each_entry(rkobj, &list, list) { |
10152 | space_info = __find_space_info(fs_info, rkobj->flags); | 7904 | space_info = btrfs_find_space_info(fs_info, rkobj->flags); |
10153 | index = btrfs_bg_flags_to_raid_index(rkobj->flags); | ||
10154 | 7905 | ||
10155 | ret = kobject_add(&rkobj->kobj, &space_info->kobj, | 7906 | ret = kobject_add(&rkobj->kobj, &space_info->kobj, |
10156 | "%s", get_raid_name(index)); | 7907 | "%s", btrfs_bg_type_to_raid_name(rkobj->flags)); |
10157 | if (ret) { | 7908 | if (ret) { |
10158 | kobject_put(&rkobj->kobj); | 7909 | kobject_put(&rkobj->kobj); |
10159 | break; | 7910 | break; |
@@ -10243,21 +7994,21 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info, | |||
10243 | */ | 7994 | */ |
10244 | static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) | 7995 | static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) |
10245 | { | 7996 | { |
10246 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | 7997 | struct extent_map_tree *map_tree = &fs_info->mapping_tree; |
10247 | struct extent_map *em; | 7998 | struct extent_map *em; |
10248 | struct btrfs_block_group_cache *bg; | 7999 | struct btrfs_block_group_cache *bg; |
10249 | u64 start = 0; | 8000 | u64 start = 0; |
10250 | int ret = 0; | 8001 | int ret = 0; |
10251 | 8002 | ||
10252 | while (1) { | 8003 | while (1) { |
10253 | read_lock(&map_tree->map_tree.lock); | 8004 | read_lock(&map_tree->lock); |
10254 | /* | 8005 | /* |
10255 | * lookup_extent_mapping will return the first extent map | 8006 | * lookup_extent_mapping will return the first extent map |
10256 | * intersecting the range, so setting @len to 1 is enough to | 8007 | * intersecting the range, so setting @len to 1 is enough to |
10257 | * get the first chunk. | 8008 | * get the first chunk. |
10258 | */ | 8009 | */ |
10259 | em = lookup_extent_mapping(&map_tree->map_tree, start, 1); | 8010 | em = lookup_extent_mapping(map_tree, start, 1); |
10260 | read_unlock(&map_tree->map_tree.lock); | 8011 | read_unlock(&map_tree->lock); |
10261 | if (!em) | 8012 | if (!em) |
10262 | break; | 8013 | break; |
10263 | 8014 | ||
@@ -10417,9 +8168,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) | |||
10417 | } | 8168 | } |
10418 | 8169 | ||
10419 | trace_btrfs_add_block_group(info, cache, 0); | 8170 | trace_btrfs_add_block_group(info, cache, 0); |
10420 | update_space_info(info, cache->flags, found_key.offset, | 8171 | btrfs_update_space_info(info, cache->flags, found_key.offset, |
10421 | btrfs_block_group_used(&cache->item), | 8172 | btrfs_block_group_used(&cache->item), |
10422 | cache->bytes_super, &space_info); | 8173 | cache->bytes_super, &space_info); |
10423 | 8174 | ||
10424 | cache->space_info = space_info; | 8175 | cache->space_info = space_info; |
10425 | 8176 | ||
@@ -10437,9 +8188,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) | |||
10437 | list_for_each_entry_rcu(space_info, &info->space_info, list) { | 8188 | list_for_each_entry_rcu(space_info, &info->space_info, list) { |
10438 | if (!(get_alloc_profile(info, space_info->flags) & | 8189 | if (!(get_alloc_profile(info, space_info->flags) & |
10439 | (BTRFS_BLOCK_GROUP_RAID10 | | 8190 | (BTRFS_BLOCK_GROUP_RAID10 | |
10440 | BTRFS_BLOCK_GROUP_RAID1 | | 8191 | BTRFS_BLOCK_GROUP_RAID1_MASK | |
10441 | BTRFS_BLOCK_GROUP_RAID5 | | 8192 | BTRFS_BLOCK_GROUP_RAID56_MASK | |
10442 | BTRFS_BLOCK_GROUP_RAID6 | | ||
10443 | BTRFS_BLOCK_GROUP_DUP))) | 8193 | BTRFS_BLOCK_GROUP_DUP))) |
10444 | continue; | 8194 | continue; |
10445 | /* | 8195 | /* |
@@ -10457,7 +8207,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) | |||
10457 | } | 8207 | } |
10458 | 8208 | ||
10459 | btrfs_add_raid_kobjects(info); | 8209 | btrfs_add_raid_kobjects(info); |
10460 | init_global_block_rsv(info); | 8210 | btrfs_init_global_block_rsv(info); |
10461 | ret = check_chunk_block_group_mappings(info); | 8211 | ret = check_chunk_block_group_mappings(info); |
10462 | error: | 8212 | error: |
10463 | btrfs_free_path(path); | 8213 | btrfs_free_path(path); |
@@ -10554,7 +8304,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, | |||
10554 | * assigned to our block group. We want our bg to be added to the rbtree | 8304 | * assigned to our block group. We want our bg to be added to the rbtree |
10555 | * with its ->space_info set. | 8305 | * with its ->space_info set. |
10556 | */ | 8306 | */ |
10557 | cache->space_info = __find_space_info(fs_info, cache->flags); | 8307 | cache->space_info = btrfs_find_space_info(fs_info, cache->flags); |
10558 | ASSERT(cache->space_info); | 8308 | ASSERT(cache->space_info); |
10559 | 8309 | ||
10560 | ret = btrfs_add_block_group_cache(fs_info, cache); | 8310 | ret = btrfs_add_block_group_cache(fs_info, cache); |
@@ -10569,9 +8319,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, | |||
10569 | * the rbtree, update the space info's counters. | 8319 | * the rbtree, update the space info's counters. |
10570 | */ | 8320 | */ |
10571 | trace_btrfs_add_block_group(fs_info, cache, 1); | 8321 | trace_btrfs_add_block_group(fs_info, cache, 1); |
10572 | update_space_info(fs_info, cache->flags, size, bytes_used, | 8322 | btrfs_update_space_info(fs_info, cache->flags, size, bytes_used, |
10573 | cache->bytes_super, &cache->space_info); | 8323 | cache->bytes_super, &cache->space_info); |
10574 | update_global_block_rsv(fs_info); | 8324 | btrfs_update_global_block_rsv(fs_info); |
10575 | 8325 | ||
10576 | link_block_group(cache); | 8326 | link_block_group(cache); |
10577 | 8327 | ||
@@ -10598,6 +8348,35 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
10598 | write_sequnlock(&fs_info->profiles_lock); | 8348 | write_sequnlock(&fs_info->profiles_lock); |
10599 | } | 8349 | } |
10600 | 8350 | ||
8351 | /* | ||
8352 | * Clear incompat bits for the following feature(s): | ||
8353 | * | ||
8354 | * - RAID56 - in case there's neither RAID5 nor RAID6 profile block group | ||
8355 | * in the whole filesystem | ||
8356 | */ | ||
8357 | static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags) | ||
8358 | { | ||
8359 | if (flags & BTRFS_BLOCK_GROUP_RAID56_MASK) { | ||
8360 | struct list_head *head = &fs_info->space_info; | ||
8361 | struct btrfs_space_info *sinfo; | ||
8362 | |||
8363 | list_for_each_entry_rcu(sinfo, head, list) { | ||
8364 | bool found = false; | ||
8365 | |||
8366 | down_read(&sinfo->groups_sem); | ||
8367 | if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID5])) | ||
8368 | found = true; | ||
8369 | if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID6])) | ||
8370 | found = true; | ||
8371 | up_read(&sinfo->groups_sem); | ||
8372 | |||
8373 | if (found) | ||
8374 | return; | ||
8375 | } | ||
8376 | btrfs_clear_fs_incompat(fs_info, RAID56); | ||
8377 | } | ||
8378 | } | ||
8379 | |||
10601 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 8380 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
10602 | u64 group_start, struct extent_map *em) | 8381 | u64 group_start, struct extent_map *em) |
10603 | { | 8382 | { |
@@ -10744,6 +8523,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
10744 | clear_avail_alloc_bits(fs_info, block_group->flags); | 8523 | clear_avail_alloc_bits(fs_info, block_group->flags); |
10745 | } | 8524 | } |
10746 | up_write(&block_group->space_info->groups_sem); | 8525 | up_write(&block_group->space_info->groups_sem); |
8526 | clear_incompat_bg_bits(fs_info, block_group->flags); | ||
10747 | if (kobj) { | 8527 | if (kobj) { |
10748 | kobject_del(kobj); | 8528 | kobject_del(kobj); |
10749 | kobject_put(kobj); | 8529 | kobject_put(kobj); |
@@ -10853,7 +8633,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
10853 | if (remove_em) { | 8633 | if (remove_em) { |
10854 | struct extent_map_tree *em_tree; | 8634 | struct extent_map_tree *em_tree; |
10855 | 8635 | ||
10856 | em_tree = &fs_info->mapping_tree.map_tree; | 8636 | em_tree = &fs_info->mapping_tree; |
10857 | write_lock(&em_tree->lock); | 8637 | write_lock(&em_tree->lock); |
10858 | remove_extent_mapping(em_tree, em); | 8638 | remove_extent_mapping(em_tree, em); |
10859 | write_unlock(&em_tree->lock); | 8639 | write_unlock(&em_tree->lock); |
@@ -10871,7 +8651,7 @@ struct btrfs_trans_handle * | |||
10871 | btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, | 8651 | btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, |
10872 | const u64 chunk_offset) | 8652 | const u64 chunk_offset) |
10873 | { | 8653 | { |
10874 | struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; | 8654 | struct extent_map_tree *em_tree = &fs_info->mapping_tree; |
10875 | struct extent_map *em; | 8655 | struct extent_map *em; |
10876 | struct map_lookup *map; | 8656 | struct map_lookup *map; |
10877 | unsigned int num_items; | 8657 | unsigned int num_items; |
@@ -11020,7 +8800,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | |||
11020 | spin_lock(&space_info->lock); | 8800 | spin_lock(&space_info->lock); |
11021 | spin_lock(&block_group->lock); | 8801 | spin_lock(&block_group->lock); |
11022 | 8802 | ||
11023 | update_bytes_pinned(space_info, -block_group->pinned); | 8803 | btrfs_space_info_update_bytes_pinned(fs_info, space_info, |
8804 | -block_group->pinned); | ||
11024 | space_info->bytes_readonly += block_group->pinned; | 8805 | space_info->bytes_readonly += block_group->pinned; |
11025 | percpu_counter_add_batch(&space_info->total_bytes_pinned, | 8806 | percpu_counter_add_batch(&space_info->total_bytes_pinned, |
11026 | -block_group->pinned, | 8807 | -block_group->pinned, |
@@ -11076,43 +8857,6 @@ next: | |||
11076 | spin_unlock(&fs_info->unused_bgs_lock); | 8857 | spin_unlock(&fs_info->unused_bgs_lock); |
11077 | } | 8858 | } |
11078 | 8859 | ||
11079 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info) | ||
11080 | { | ||
11081 | struct btrfs_super_block *disk_super; | ||
11082 | u64 features; | ||
11083 | u64 flags; | ||
11084 | int mixed = 0; | ||
11085 | int ret; | ||
11086 | |||
11087 | disk_super = fs_info->super_copy; | ||
11088 | if (!btrfs_super_root(disk_super)) | ||
11089 | return -EINVAL; | ||
11090 | |||
11091 | features = btrfs_super_incompat_flags(disk_super); | ||
11092 | if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) | ||
11093 | mixed = 1; | ||
11094 | |||
11095 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
11096 | ret = create_space_info(fs_info, flags); | ||
11097 | if (ret) | ||
11098 | goto out; | ||
11099 | |||
11100 | if (mixed) { | ||
11101 | flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; | ||
11102 | ret = create_space_info(fs_info, flags); | ||
11103 | } else { | ||
11104 | flags = BTRFS_BLOCK_GROUP_METADATA; | ||
11105 | ret = create_space_info(fs_info, flags); | ||
11106 | if (ret) | ||
11107 | goto out; | ||
11108 | |||
11109 | flags = BTRFS_BLOCK_GROUP_DATA; | ||
11110 | ret = create_space_info(fs_info, flags); | ||
11111 | } | ||
11112 | out: | ||
11113 | return ret; | ||
11114 | } | ||
11115 | |||
11116 | int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, | 8860 | int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, |
11117 | u64 start, u64 end) | 8861 | u64 start, u64 end) |
11118 | { | 8862 | { |
@@ -11171,12 +8915,17 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) | |||
11171 | find_first_clear_extent_bit(&device->alloc_state, start, | 8915 | find_first_clear_extent_bit(&device->alloc_state, start, |
11172 | &start, &end, | 8916 | &start, &end, |
11173 | CHUNK_TRIMMED | CHUNK_ALLOCATED); | 8917 | CHUNK_TRIMMED | CHUNK_ALLOCATED); |
8918 | |||
8919 | /* Ensure we skip the reserved area in the first 1M */ | ||
8920 | start = max_t(u64, start, SZ_1M); | ||
8921 | |||
11174 | /* | 8922 | /* |
11175 | * If find_first_clear_extent_bit find a range that spans the | 8923 | * If find_first_clear_extent_bit find a range that spans the |
11176 | * end of the device it will set end to -1, in this case it's up | 8924 | * end of the device it will set end to -1, in this case it's up |
11177 | * to the caller to trim the value to the size of the device. | 8925 | * to the caller to trim the value to the size of the device. |
11178 | */ | 8926 | */ |
11179 | end = min(end, device->total_bytes - 1); | 8927 | end = min(end, device->total_bytes - 1); |
8928 | |||
11180 | len = end - start + 1; | 8929 | len = end - start + 1; |
11181 | 8930 | ||
11182 | /* We didn't find any extents */ | 8931 | /* We didn't find any extents */ |