aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c389
1 files changed, 341 insertions, 48 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 90d314eeff6d..a4b2b03cd682 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -68,6 +68,8 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
68 struct extent_buffer **must_clean); 68 struct extent_buffer **must_clean);
69static int find_next_key(struct btrfs_path *path, int level, 69static int find_next_key(struct btrfs_path *path, int level,
70 struct btrfs_key *key); 70 struct btrfs_key *key);
71static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
72 int dump_block_groups);
71 73
72static noinline int 74static noinline int
73block_group_cache_done(struct btrfs_block_group_cache *cache) 75block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -2764,67 +2766,346 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2764 alloc_target); 2766 alloc_target);
2765} 2767}
2766 2768
2769static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
2770{
2771 u64 num_bytes;
2772 int level;
2773
2774 level = BTRFS_MAX_LEVEL - 2;
2775 /*
2776 * NOTE: these calculations are absolutely the worst possible case.
2777 * This assumes that _every_ item we insert will require a new leaf, and
2778 * that the tree has grown to its maximum level size.
2779 */
2780
2781 /*
2782 * for every item we insert we could insert both an extent item and a
2783 * extent ref item. Then for ever item we insert, we will need to cow
2784 * both the original leaf, plus the leaf to the left and right of it.
2785 *
2786 * Unless we are talking about the extent root, then we just want the
2787 * number of items * 2, since we just need the extent item plus its ref.
2788 */
2789 if (root == root->fs_info->extent_root)
2790 num_bytes = num_items * 2;
2791 else
2792 num_bytes = (num_items + (2 * num_items)) * 3;
2793
2794 /*
2795 * num_bytes is total number of leaves we could need times the leaf
2796 * size, and then for every leaf we could end up cow'ing 2 nodes per
2797 * level, down to the leaf level.
2798 */
2799 num_bytes = (num_bytes * root->leafsize) +
2800 (num_bytes * (level * 2)) * root->nodesize;
2801
2802 return num_bytes;
2803}
2804
2767/* 2805/*
2768 * for now this just makes sure we have at least 5% of our metadata space free 2806 * Unreserve metadata space for delalloc. If we have less reserved credits than
2769 * for use. 2807 * we have extents, this function does nothing.
2770 */ 2808 */
2771int btrfs_check_metadata_free_space(struct btrfs_root *root) 2809int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2810 struct inode *inode, int num_items)
2772{ 2811{
2773 struct btrfs_fs_info *info = root->fs_info; 2812 struct btrfs_fs_info *info = root->fs_info;
2774 struct btrfs_space_info *meta_sinfo; 2813 struct btrfs_space_info *meta_sinfo;
2775 u64 alloc_target, thresh; 2814 u64 num_bytes;
2776 int committed = 0, ret; 2815 u64 alloc_target;
2816 bool bug = false;
2777 2817
2778 /* get the space info for where the metadata will live */ 2818 /* get the space info for where the metadata will live */
2779 alloc_target = btrfs_get_alloc_profile(root, 0); 2819 alloc_target = btrfs_get_alloc_profile(root, 0);
2780 meta_sinfo = __find_space_info(info, alloc_target); 2820 meta_sinfo = __find_space_info(info, alloc_target);
2781 if (!meta_sinfo)
2782 goto alloc;
2783 2821
2784again: 2822 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2823 num_items);
2824
2785 spin_lock(&meta_sinfo->lock); 2825 spin_lock(&meta_sinfo->lock);
2786 if (!meta_sinfo->full) 2826 if (BTRFS_I(inode)->delalloc_reserved_extents <=
2787 thresh = meta_sinfo->total_bytes * 80; 2827 BTRFS_I(inode)->delalloc_extents) {
2788 else 2828 spin_unlock(&meta_sinfo->lock);
2789 thresh = meta_sinfo->total_bytes * 95; 2829 return 0;
2830 }
2831
2832 BTRFS_I(inode)->delalloc_reserved_extents--;
2833 BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0);
2834
2835 if (meta_sinfo->bytes_delalloc < num_bytes) {
2836 bug = true;
2837 meta_sinfo->bytes_delalloc = 0;
2838 } else {
2839 meta_sinfo->bytes_delalloc -= num_bytes;
2840 }
2841 spin_unlock(&meta_sinfo->lock);
2790 2842
2843 BUG_ON(bug);
2844
2845 return 0;
2846}
2847
2848static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
2849{
2850 u64 thresh;
2851
2852 thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2853 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2854 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2855 meta_sinfo->bytes_may_use;
2856
2857 thresh = meta_sinfo->total_bytes - thresh;
2858 thresh *= 80;
2791 do_div(thresh, 100); 2859 do_div(thresh, 100);
2860 if (thresh <= meta_sinfo->bytes_delalloc)
2861 meta_sinfo->force_delalloc = 1;
2862 else
2863 meta_sinfo->force_delalloc = 0;
2864}
2792 2865
2793 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2866static int maybe_allocate_chunk(struct btrfs_root *root,
2794 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 2867 struct btrfs_space_info *info)
2795 meta_sinfo->bytes_super > thresh) { 2868{
2796 struct btrfs_trans_handle *trans; 2869 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
2797 if (!meta_sinfo->full) { 2870 struct btrfs_trans_handle *trans;
2798 meta_sinfo->force_alloc = 1; 2871 bool wait = false;
2872 int ret = 0;
2873 u64 min_metadata;
2874 u64 free_space;
2875
2876 free_space = btrfs_super_total_bytes(disk_super);
2877 /*
2878 * we allow the metadata to grow to a max of either 5gb or 5% of the
2879 * space in the volume.
2880 */
2881 min_metadata = min((u64)5 * 1024 * 1024 * 1024,
2882 div64_u64(free_space * 5, 100));
2883 if (info->total_bytes >= min_metadata) {
2884 spin_unlock(&info->lock);
2885 return 0;
2886 }
2887
2888 if (info->full) {
2889 spin_unlock(&info->lock);
2890 return 0;
2891 }
2892
2893 if (!info->allocating_chunk) {
2894 info->force_alloc = 1;
2895 info->allocating_chunk = 1;
2896 init_waitqueue_head(&info->wait);
2897 } else {
2898 wait = true;
2899 }
2900
2901 spin_unlock(&info->lock);
2902
2903 if (wait) {
2904 wait_event(info->wait,
2905 !info->allocating_chunk);
2906 return 1;
2907 }
2908
2909 trans = btrfs_start_transaction(root, 1);
2910 if (!trans) {
2911 ret = -ENOMEM;
2912 goto out;
2913 }
2914
2915 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2916 4096 + 2 * 1024 * 1024,
2917 info->flags, 0);
2918 btrfs_end_transaction(trans, root);
2919 if (ret)
2920 goto out;
2921out:
2922 spin_lock(&info->lock);
2923 info->allocating_chunk = 0;
2924 spin_unlock(&info->lock);
2925 wake_up(&info->wait);
2926
2927 if (ret)
2928 return 0;
2929 return 1;
2930}
2931
2932/*
2933 * Reserve metadata space for delalloc.
2934 */
2935int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
2936 struct inode *inode, int num_items)
2937{
2938 struct btrfs_fs_info *info = root->fs_info;
2939 struct btrfs_space_info *meta_sinfo;
2940 u64 num_bytes;
2941 u64 used;
2942 u64 alloc_target;
2943 int flushed = 0;
2944 int force_delalloc;
2945
2946 /* get the space info for where the metadata will live */
2947 alloc_target = btrfs_get_alloc_profile(root, 0);
2948 meta_sinfo = __find_space_info(info, alloc_target);
2949
2950 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2951 num_items);
2952again:
2953 spin_lock(&meta_sinfo->lock);
2954
2955 force_delalloc = meta_sinfo->force_delalloc;
2956
2957 if (unlikely(!meta_sinfo->bytes_root))
2958 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
2959
2960 if (!flushed)
2961 meta_sinfo->bytes_delalloc += num_bytes;
2962
2963 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2964 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2965 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2966 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
2967
2968 if (used > meta_sinfo->total_bytes) {
2969 flushed++;
2970
2971 if (flushed == 1) {
2972 if (maybe_allocate_chunk(root, meta_sinfo))
2973 goto again;
2974 flushed++;
2975 } else {
2799 spin_unlock(&meta_sinfo->lock); 2976 spin_unlock(&meta_sinfo->lock);
2800alloc: 2977 }
2801 trans = btrfs_start_transaction(root, 1);
2802 if (!trans)
2803 return -ENOMEM;
2804 2978
2805 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2979 if (flushed == 2) {
2806 2 * 1024 * 1024, alloc_target, 0); 2980 filemap_flush(inode->i_mapping);
2807 btrfs_end_transaction(trans, root); 2981 goto again;
2808 if (!meta_sinfo) { 2982 } else if (flushed == 3) {
2809 meta_sinfo = __find_space_info(info, 2983 btrfs_start_delalloc_inodes(root);
2810 alloc_target); 2984 btrfs_wait_ordered_extents(root, 0);
2811 }
2812 goto again; 2985 goto again;
2813 } 2986 }
2987 spin_lock(&meta_sinfo->lock);
2988 meta_sinfo->bytes_delalloc -= num_bytes;
2814 spin_unlock(&meta_sinfo->lock); 2989 spin_unlock(&meta_sinfo->lock);
2990 printk(KERN_ERR "enospc, has %d, reserved %d\n",
2991 BTRFS_I(inode)->delalloc_extents,
2992 BTRFS_I(inode)->delalloc_reserved_extents);
2993 dump_space_info(meta_sinfo, 0, 0);
2994 return -ENOSPC;
2995 }
2815 2996
2816 if (!committed) { 2997 BTRFS_I(inode)->delalloc_reserved_extents++;
2817 committed = 1; 2998 check_force_delalloc(meta_sinfo);
2818 trans = btrfs_join_transaction(root, 1); 2999 spin_unlock(&meta_sinfo->lock);
2819 if (!trans) 3000
2820 return -ENOMEM; 3001 if (!flushed && force_delalloc)
2821 ret = btrfs_commit_transaction(trans, root); 3002 filemap_flush(inode->i_mapping);
2822 if (ret) 3003
2823 return ret; 3004 return 0;
3005}
3006
3007/*
3008 * unreserve num_items number of items worth of metadata space. This needs to
3009 * be paired with btrfs_reserve_metadata_space.
3010 *
3011 * NOTE: if you have the option, run this _AFTER_ you do a
3012 * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref
3013 * oprations which will result in more used metadata, so we want to make sure we
3014 * can do that without issue.
3015 */
3016int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items)
3017{
3018 struct btrfs_fs_info *info = root->fs_info;
3019 struct btrfs_space_info *meta_sinfo;
3020 u64 num_bytes;
3021 u64 alloc_target;
3022 bool bug = false;
3023
3024 /* get the space info for where the metadata will live */
3025 alloc_target = btrfs_get_alloc_profile(root, 0);
3026 meta_sinfo = __find_space_info(info, alloc_target);
3027
3028 num_bytes = calculate_bytes_needed(root, num_items);
3029
3030 spin_lock(&meta_sinfo->lock);
3031 if (meta_sinfo->bytes_may_use < num_bytes) {
3032 bug = true;
3033 meta_sinfo->bytes_may_use = 0;
3034 } else {
3035 meta_sinfo->bytes_may_use -= num_bytes;
3036 }
3037 spin_unlock(&meta_sinfo->lock);
3038
3039 BUG_ON(bug);
3040
3041 return 0;
3042}
3043
3044/*
3045 * Reserve some metadata space for use. We'll calculate the worste case number
3046 * of bytes that would be needed to modify num_items number of items. If we
3047 * have space, fantastic, if not, you get -ENOSPC. Please call
3048 * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of
3049 * items you reserved, since whatever metadata you needed should have already
3050 * been allocated.
3051 *
3052 * This will commit the transaction to make more space if we don't have enough
3053 * metadata space. THe only time we don't do this is if we're reserving space
3054 * inside of a transaction, then we will just return -ENOSPC and it is the
3055 * callers responsibility to handle it properly.
3056 */
3057int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
3058{
3059 struct btrfs_fs_info *info = root->fs_info;
3060 struct btrfs_space_info *meta_sinfo;
3061 u64 num_bytes;
3062 u64 used;
3063 u64 alloc_target;
3064 int retries = 0;
3065
3066 /* get the space info for where the metadata will live */
3067 alloc_target = btrfs_get_alloc_profile(root, 0);
3068 meta_sinfo = __find_space_info(info, alloc_target);
3069
3070 num_bytes = calculate_bytes_needed(root, num_items);
3071again:
3072 spin_lock(&meta_sinfo->lock);
3073
3074 if (unlikely(!meta_sinfo->bytes_root))
3075 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3076
3077 if (!retries)
3078 meta_sinfo->bytes_may_use += num_bytes;
3079
3080 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3081 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3082 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3083 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3084
3085 if (used > meta_sinfo->total_bytes) {
3086 retries++;
3087 if (retries == 1) {
3088 if (maybe_allocate_chunk(root, meta_sinfo))
3089 goto again;
3090 retries++;
3091 } else {
3092 spin_unlock(&meta_sinfo->lock);
3093 }
3094
3095 if (retries == 2) {
3096 btrfs_start_delalloc_inodes(root);
3097 btrfs_wait_ordered_extents(root, 0);
2824 goto again; 3098 goto again;
2825 } 3099 }
3100 spin_lock(&meta_sinfo->lock);
3101 meta_sinfo->bytes_may_use -= num_bytes;
3102 spin_unlock(&meta_sinfo->lock);
3103
3104 dump_space_info(meta_sinfo, 0, 0);
2826 return -ENOSPC; 3105 return -ENOSPC;
2827 } 3106 }
3107
3108 check_force_delalloc(meta_sinfo);
2828 spin_unlock(&meta_sinfo->lock); 3109 spin_unlock(&meta_sinfo->lock);
2829 3110
2830 return 0; 3111 return 0;
@@ -2915,7 +3196,7 @@ alloc:
2915 BTRFS_I(inode)->reserved_bytes += bytes; 3196 BTRFS_I(inode)->reserved_bytes += bytes;
2916 spin_unlock(&data_sinfo->lock); 3197 spin_unlock(&data_sinfo->lock);
2917 3198
2918 return btrfs_check_metadata_free_space(root); 3199 return 0;
2919} 3200}
2920 3201
2921/* 3202/*
@@ -3014,17 +3295,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3014 BUG_ON(!space_info); 3295 BUG_ON(!space_info);
3015 3296
3016 spin_lock(&space_info->lock); 3297 spin_lock(&space_info->lock);
3017 if (space_info->force_alloc) { 3298 if (space_info->force_alloc)
3018 force = 1; 3299 force = 1;
3019 space_info->force_alloc = 0;
3020 }
3021 if (space_info->full) { 3300 if (space_info->full) {
3022 spin_unlock(&space_info->lock); 3301 spin_unlock(&space_info->lock);
3023 goto out; 3302 goto out;
3024 } 3303 }
3025 3304
3026 thresh = space_info->total_bytes - space_info->bytes_readonly; 3305 thresh = space_info->total_bytes - space_info->bytes_readonly;
3027 thresh = div_factor(thresh, 6); 3306 thresh = div_factor(thresh, 8);
3028 if (!force && 3307 if (!force &&
3029 (space_info->bytes_used + space_info->bytes_pinned + 3308 (space_info->bytes_used + space_info->bytes_pinned +
3030 space_info->bytes_reserved + alloc_bytes) < thresh) { 3309 space_info->bytes_reserved + alloc_bytes) < thresh) {
@@ -3038,7 +3317,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3038 * we keep a reasonable number of metadata chunks allocated in the 3317 * we keep a reasonable number of metadata chunks allocated in the
3039 * FS as well. 3318 * FS as well.
3040 */ 3319 */
3041 if (flags & BTRFS_BLOCK_GROUP_DATA) { 3320 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
3042 fs_info->data_chunk_allocations++; 3321 fs_info->data_chunk_allocations++;
3043 if (!(fs_info->data_chunk_allocations % 3322 if (!(fs_info->data_chunk_allocations %
3044 fs_info->metadata_ratio)) 3323 fs_info->metadata_ratio))
@@ -3046,8 +3325,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3046 } 3325 }
3047 3326
3048 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3327 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3328 spin_lock(&space_info->lock);
3049 if (ret) 3329 if (ret)
3050 space_info->full = 1; 3330 space_info->full = 1;
3331 space_info->force_alloc = 0;
3332 spin_unlock(&space_info->lock);
3051out: 3333out:
3052 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3334 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3053 return ret; 3335 return ret;
@@ -4062,21 +4344,32 @@ loop:
4062 return ret; 4344 return ret;
4063} 4345}
4064 4346
4065static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 4347static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
4348 int dump_block_groups)
4066{ 4349{
4067 struct btrfs_block_group_cache *cache; 4350 struct btrfs_block_group_cache *cache;
4068 4351
4352 spin_lock(&info->lock);
4069 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 4353 printk(KERN_INFO "space_info has %llu free, is %sfull\n",
4070 (unsigned long long)(info->total_bytes - info->bytes_used - 4354 (unsigned long long)(info->total_bytes - info->bytes_used -
4071 info->bytes_pinned - info->bytes_reserved), 4355 info->bytes_pinned - info->bytes_reserved -
4356 info->bytes_super),
4072 (info->full) ? "" : "not "); 4357 (info->full) ? "" : "not ");
4073 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 4358 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
4074 " may_use=%llu, used=%llu\n", 4359 " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu"
4360 "\n",
4075 (unsigned long long)info->total_bytes, 4361 (unsigned long long)info->total_bytes,
4076 (unsigned long long)info->bytes_pinned, 4362 (unsigned long long)info->bytes_pinned,
4077 (unsigned long long)info->bytes_delalloc, 4363 (unsigned long long)info->bytes_delalloc,
4078 (unsigned long long)info->bytes_may_use, 4364 (unsigned long long)info->bytes_may_use,
4079 (unsigned long long)info->bytes_used); 4365 (unsigned long long)info->bytes_used,
4366 (unsigned long long)info->bytes_root,
4367 (unsigned long long)info->bytes_super,
4368 (unsigned long long)info->bytes_reserved);
4369 spin_unlock(&info->lock);
4370
4371 if (!dump_block_groups)
4372 return;
4080 4373
4081 down_read(&info->groups_sem); 4374 down_read(&info->groups_sem);
4082 list_for_each_entry(cache, &info->block_groups, list) { 4375 list_for_each_entry(cache, &info->block_groups, list) {
@@ -4144,7 +4437,7 @@ again:
4144 printk(KERN_ERR "btrfs allocation failed flags %llu, " 4437 printk(KERN_ERR "btrfs allocation failed flags %llu, "
4145 "wanted %llu\n", (unsigned long long)data, 4438 "wanted %llu\n", (unsigned long long)data,
4146 (unsigned long long)num_bytes); 4439 (unsigned long long)num_bytes);
4147 dump_space_info(sinfo, num_bytes); 4440 dump_space_info(sinfo, num_bytes, 1);
4148 } 4441 }
4149 4442
4150 return ret; 4443 return ret;