aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-10-01 12:58:13 -0400
committerChris Mason <chris.mason@oracle.com>2009-10-01 12:58:13 -0400
commit25472b880c69c0daa485c4f80a6550437ed1149f (patch)
treed12bc091e8991513db0d2891111ba773eb5c52e2 /fs/btrfs/extent-tree.c
parent17d857be649a21ca90008c6dc425d849fa83db5c (diff)
parentab93dbecfba72bbc04b7036343d180aaff1b61a3 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable into for-linus
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c391
1 files changed, 342 insertions, 49 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 993f93ff7ba6..359a754c782c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -68,6 +68,8 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
68 struct extent_buffer **must_clean); 68 struct extent_buffer **must_clean);
69static int find_next_key(struct btrfs_path *path, int level, 69static int find_next_key(struct btrfs_path *path, int level,
70 struct btrfs_key *key); 70 struct btrfs_key *key);
71static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
72 int dump_block_groups);
71 73
72static noinline int 74static noinline int
73block_group_cache_done(struct btrfs_block_group_cache *cache) 75block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -2765,67 +2767,346 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2765 alloc_target); 2767 alloc_target);
2766} 2768}
2767 2769
2770static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
2771{
2772 u64 num_bytes;
2773 int level;
2774
2775 level = BTRFS_MAX_LEVEL - 2;
2776 /*
2777 * NOTE: these calculations are absolutely the worst possible case.
2778 * This assumes that _every_ item we insert will require a new leaf, and
2779 * that the tree has grown to its maximum level size.
2780 */
2781
2782 /*
2783 * for every item we insert we could insert both an extent item and a
2784 * extent ref item. Then for ever item we insert, we will need to cow
2785 * both the original leaf, plus the leaf to the left and right of it.
2786 *
2787 * Unless we are talking about the extent root, then we just want the
2788 * number of items * 2, since we just need the extent item plus its ref.
2789 */
2790 if (root == root->fs_info->extent_root)
2791 num_bytes = num_items * 2;
2792 else
2793 num_bytes = (num_items + (2 * num_items)) * 3;
2794
2795 /*
2796 * num_bytes is total number of leaves we could need times the leaf
2797 * size, and then for every leaf we could end up cow'ing 2 nodes per
2798 * level, down to the leaf level.
2799 */
2800 num_bytes = (num_bytes * root->leafsize) +
2801 (num_bytes * (level * 2)) * root->nodesize;
2802
2803 return num_bytes;
2804}
2805
2768/* 2806/*
2769 * for now this just makes sure we have at least 5% of our metadata space free 2807 * Unreserve metadata space for delalloc. If we have less reserved credits than
2770 * for use. 2808 * we have extents, this function does nothing.
2771 */ 2809 */
2772int btrfs_check_metadata_free_space(struct btrfs_root *root) 2810int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2811 struct inode *inode, int num_items)
2773{ 2812{
2774 struct btrfs_fs_info *info = root->fs_info; 2813 struct btrfs_fs_info *info = root->fs_info;
2775 struct btrfs_space_info *meta_sinfo; 2814 struct btrfs_space_info *meta_sinfo;
2776 u64 alloc_target, thresh; 2815 u64 num_bytes;
2777 int committed = 0, ret; 2816 u64 alloc_target;
2817 bool bug = false;
2778 2818
2779 /* get the space info for where the metadata will live */ 2819 /* get the space info for where the metadata will live */
2780 alloc_target = btrfs_get_alloc_profile(root, 0); 2820 alloc_target = btrfs_get_alloc_profile(root, 0);
2781 meta_sinfo = __find_space_info(info, alloc_target); 2821 meta_sinfo = __find_space_info(info, alloc_target);
2782 if (!meta_sinfo)
2783 goto alloc;
2784 2822
2785again: 2823 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2824 num_items);
2825
2786 spin_lock(&meta_sinfo->lock); 2826 spin_lock(&meta_sinfo->lock);
2787 if (!meta_sinfo->full) 2827 if (BTRFS_I(inode)->delalloc_reserved_extents <=
2788 thresh = meta_sinfo->total_bytes * 80; 2828 BTRFS_I(inode)->delalloc_extents) {
2789 else 2829 spin_unlock(&meta_sinfo->lock);
2790 thresh = meta_sinfo->total_bytes * 95; 2830 return 0;
2831 }
2832
2833 BTRFS_I(inode)->delalloc_reserved_extents--;
2834 BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0);
2835
2836 if (meta_sinfo->bytes_delalloc < num_bytes) {
2837 bug = true;
2838 meta_sinfo->bytes_delalloc = 0;
2839 } else {
2840 meta_sinfo->bytes_delalloc -= num_bytes;
2841 }
2842 spin_unlock(&meta_sinfo->lock);
2791 2843
2844 BUG_ON(bug);
2845
2846 return 0;
2847}
2848
2849static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
2850{
2851 u64 thresh;
2852
2853 thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2854 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2855 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2856 meta_sinfo->bytes_may_use;
2857
2858 thresh = meta_sinfo->total_bytes - thresh;
2859 thresh *= 80;
2792 do_div(thresh, 100); 2860 do_div(thresh, 100);
2861 if (thresh <= meta_sinfo->bytes_delalloc)
2862 meta_sinfo->force_delalloc = 1;
2863 else
2864 meta_sinfo->force_delalloc = 0;
2865}
2793 2866
2794 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2867static int maybe_allocate_chunk(struct btrfs_root *root,
2795 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 2868 struct btrfs_space_info *info)
2796 meta_sinfo->bytes_super > thresh) { 2869{
2797 struct btrfs_trans_handle *trans; 2870 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
2798 if (!meta_sinfo->full) { 2871 struct btrfs_trans_handle *trans;
2799 meta_sinfo->force_alloc = 1; 2872 bool wait = false;
2873 int ret = 0;
2874 u64 min_metadata;
2875 u64 free_space;
2876
2877 free_space = btrfs_super_total_bytes(disk_super);
2878 /*
2879 * we allow the metadata to grow to a max of either 5gb or 5% of the
2880 * space in the volume.
2881 */
2882 min_metadata = min((u64)5 * 1024 * 1024 * 1024,
2883 div64_u64(free_space * 5, 100));
2884 if (info->total_bytes >= min_metadata) {
2885 spin_unlock(&info->lock);
2886 return 0;
2887 }
2888
2889 if (info->full) {
2890 spin_unlock(&info->lock);
2891 return 0;
2892 }
2893
2894 if (!info->allocating_chunk) {
2895 info->force_alloc = 1;
2896 info->allocating_chunk = 1;
2897 init_waitqueue_head(&info->wait);
2898 } else {
2899 wait = true;
2900 }
2901
2902 spin_unlock(&info->lock);
2903
2904 if (wait) {
2905 wait_event(info->wait,
2906 !info->allocating_chunk);
2907 return 1;
2908 }
2909
2910 trans = btrfs_start_transaction(root, 1);
2911 if (!trans) {
2912 ret = -ENOMEM;
2913 goto out;
2914 }
2915
2916 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2917 4096 + 2 * 1024 * 1024,
2918 info->flags, 0);
2919 btrfs_end_transaction(trans, root);
2920 if (ret)
2921 goto out;
2922out:
2923 spin_lock(&info->lock);
2924 info->allocating_chunk = 0;
2925 spin_unlock(&info->lock);
2926 wake_up(&info->wait);
2927
2928 if (ret)
2929 return 0;
2930 return 1;
2931}
2932
2933/*
2934 * Reserve metadata space for delalloc.
2935 */
2936int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
2937 struct inode *inode, int num_items)
2938{
2939 struct btrfs_fs_info *info = root->fs_info;
2940 struct btrfs_space_info *meta_sinfo;
2941 u64 num_bytes;
2942 u64 used;
2943 u64 alloc_target;
2944 int flushed = 0;
2945 int force_delalloc;
2946
2947 /* get the space info for where the metadata will live */
2948 alloc_target = btrfs_get_alloc_profile(root, 0);
2949 meta_sinfo = __find_space_info(info, alloc_target);
2950
2951 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2952 num_items);
2953again:
2954 spin_lock(&meta_sinfo->lock);
2955
2956 force_delalloc = meta_sinfo->force_delalloc;
2957
2958 if (unlikely(!meta_sinfo->bytes_root))
2959 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
2960
2961 if (!flushed)
2962 meta_sinfo->bytes_delalloc += num_bytes;
2963
2964 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2965 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2966 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2967 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
2968
2969 if (used > meta_sinfo->total_bytes) {
2970 flushed++;
2971
2972 if (flushed == 1) {
2973 if (maybe_allocate_chunk(root, meta_sinfo))
2974 goto again;
2975 flushed++;
2976 } else {
2800 spin_unlock(&meta_sinfo->lock); 2977 spin_unlock(&meta_sinfo->lock);
2801alloc: 2978 }
2802 trans = btrfs_start_transaction(root, 1);
2803 if (!trans)
2804 return -ENOMEM;
2805 2979
2806 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2980 if (flushed == 2) {
2807 2 * 1024 * 1024, alloc_target, 0); 2981 filemap_flush(inode->i_mapping);
2808 btrfs_end_transaction(trans, root); 2982 goto again;
2809 if (!meta_sinfo) { 2983 } else if (flushed == 3) {
2810 meta_sinfo = __find_space_info(info, 2984 btrfs_start_delalloc_inodes(root);
2811 alloc_target); 2985 btrfs_wait_ordered_extents(root, 0);
2812 }
2813 goto again; 2986 goto again;
2814 } 2987 }
2988 spin_lock(&meta_sinfo->lock);
2989 meta_sinfo->bytes_delalloc -= num_bytes;
2815 spin_unlock(&meta_sinfo->lock); 2990 spin_unlock(&meta_sinfo->lock);
2991 printk(KERN_ERR "enospc, has %d, reserved %d\n",
2992 BTRFS_I(inode)->delalloc_extents,
2993 BTRFS_I(inode)->delalloc_reserved_extents);
2994 dump_space_info(meta_sinfo, 0, 0);
2995 return -ENOSPC;
2996 }
2816 2997
2817 if (!committed) { 2998 BTRFS_I(inode)->delalloc_reserved_extents++;
2818 committed = 1; 2999 check_force_delalloc(meta_sinfo);
2819 trans = btrfs_join_transaction(root, 1); 3000 spin_unlock(&meta_sinfo->lock);
2820 if (!trans) 3001
2821 return -ENOMEM; 3002 if (!flushed && force_delalloc)
2822 ret = btrfs_commit_transaction(trans, root); 3003 filemap_flush(inode->i_mapping);
2823 if (ret) 3004
2824 return ret; 3005 return 0;
3006}
3007
3008/*
3009 * unreserve num_items number of items worth of metadata space. This needs to
3010 * be paired with btrfs_reserve_metadata_space.
3011 *
3012 * NOTE: if you have the option, run this _AFTER_ you do a
3013 * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref
3014 * oprations which will result in more used metadata, so we want to make sure we
3015 * can do that without issue.
3016 */
3017int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items)
3018{
3019 struct btrfs_fs_info *info = root->fs_info;
3020 struct btrfs_space_info *meta_sinfo;
3021 u64 num_bytes;
3022 u64 alloc_target;
3023 bool bug = false;
3024
3025 /* get the space info for where the metadata will live */
3026 alloc_target = btrfs_get_alloc_profile(root, 0);
3027 meta_sinfo = __find_space_info(info, alloc_target);
3028
3029 num_bytes = calculate_bytes_needed(root, num_items);
3030
3031 spin_lock(&meta_sinfo->lock);
3032 if (meta_sinfo->bytes_may_use < num_bytes) {
3033 bug = true;
3034 meta_sinfo->bytes_may_use = 0;
3035 } else {
3036 meta_sinfo->bytes_may_use -= num_bytes;
3037 }
3038 spin_unlock(&meta_sinfo->lock);
3039
3040 BUG_ON(bug);
3041
3042 return 0;
3043}
3044
3045/*
3046 * Reserve some metadata space for use. We'll calculate the worste case number
3047 * of bytes that would be needed to modify num_items number of items. If we
3048 * have space, fantastic, if not, you get -ENOSPC. Please call
3049 * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of
3050 * items you reserved, since whatever metadata you needed should have already
3051 * been allocated.
3052 *
3053 * This will commit the transaction to make more space if we don't have enough
3054 * metadata space. THe only time we don't do this is if we're reserving space
3055 * inside of a transaction, then we will just return -ENOSPC and it is the
3056 * callers responsibility to handle it properly.
3057 */
3058int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
3059{
3060 struct btrfs_fs_info *info = root->fs_info;
3061 struct btrfs_space_info *meta_sinfo;
3062 u64 num_bytes;
3063 u64 used;
3064 u64 alloc_target;
3065 int retries = 0;
3066
3067 /* get the space info for where the metadata will live */
3068 alloc_target = btrfs_get_alloc_profile(root, 0);
3069 meta_sinfo = __find_space_info(info, alloc_target);
3070
3071 num_bytes = calculate_bytes_needed(root, num_items);
3072again:
3073 spin_lock(&meta_sinfo->lock);
3074
3075 if (unlikely(!meta_sinfo->bytes_root))
3076 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3077
3078 if (!retries)
3079 meta_sinfo->bytes_may_use += num_bytes;
3080
3081 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3082 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3083 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3084 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3085
3086 if (used > meta_sinfo->total_bytes) {
3087 retries++;
3088 if (retries == 1) {
3089 if (maybe_allocate_chunk(root, meta_sinfo))
3090 goto again;
3091 retries++;
3092 } else {
3093 spin_unlock(&meta_sinfo->lock);
3094 }
3095
3096 if (retries == 2) {
3097 btrfs_start_delalloc_inodes(root);
3098 btrfs_wait_ordered_extents(root, 0);
2825 goto again; 3099 goto again;
2826 } 3100 }
3101 spin_lock(&meta_sinfo->lock);
3102 meta_sinfo->bytes_may_use -= num_bytes;
3103 spin_unlock(&meta_sinfo->lock);
3104
3105 dump_space_info(meta_sinfo, 0, 0);
2827 return -ENOSPC; 3106 return -ENOSPC;
2828 } 3107 }
3108
3109 check_force_delalloc(meta_sinfo);
2829 spin_unlock(&meta_sinfo->lock); 3110 spin_unlock(&meta_sinfo->lock);
2830 3111
2831 return 0; 3112 return 0;
@@ -2888,7 +3169,7 @@ alloc:
2888 spin_unlock(&data_sinfo->lock); 3169 spin_unlock(&data_sinfo->lock);
2889 3170
2890 /* commit the current transaction and try again */ 3171 /* commit the current transaction and try again */
2891 if (!committed) { 3172 if (!committed && !root->fs_info->open_ioctl_trans) {
2892 committed = 1; 3173 committed = 1;
2893 trans = btrfs_join_transaction(root, 1); 3174 trans = btrfs_join_transaction(root, 1);
2894 if (!trans) 3175 if (!trans)
@@ -2916,7 +3197,7 @@ alloc:
2916 BTRFS_I(inode)->reserved_bytes += bytes; 3197 BTRFS_I(inode)->reserved_bytes += bytes;
2917 spin_unlock(&data_sinfo->lock); 3198 spin_unlock(&data_sinfo->lock);
2918 3199
2919 return btrfs_check_metadata_free_space(root); 3200 return 0;
2920} 3201}
2921 3202
2922/* 3203/*
@@ -3015,17 +3296,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3015 BUG_ON(!space_info); 3296 BUG_ON(!space_info);
3016 3297
3017 spin_lock(&space_info->lock); 3298 spin_lock(&space_info->lock);
3018 if (space_info->force_alloc) { 3299 if (space_info->force_alloc)
3019 force = 1; 3300 force = 1;
3020 space_info->force_alloc = 0;
3021 }
3022 if (space_info->full) { 3301 if (space_info->full) {
3023 spin_unlock(&space_info->lock); 3302 spin_unlock(&space_info->lock);
3024 goto out; 3303 goto out;
3025 } 3304 }
3026 3305
3027 thresh = space_info->total_bytes - space_info->bytes_readonly; 3306 thresh = space_info->total_bytes - space_info->bytes_readonly;
3028 thresh = div_factor(thresh, 6); 3307 thresh = div_factor(thresh, 8);
3029 if (!force && 3308 if (!force &&
3030 (space_info->bytes_used + space_info->bytes_pinned + 3309 (space_info->bytes_used + space_info->bytes_pinned +
3031 space_info->bytes_reserved + alloc_bytes) < thresh) { 3310 space_info->bytes_reserved + alloc_bytes) < thresh) {
@@ -3039,7 +3318,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3039 * we keep a reasonable number of metadata chunks allocated in the 3318 * we keep a reasonable number of metadata chunks allocated in the
3040 * FS as well. 3319 * FS as well.
3041 */ 3320 */
3042 if (flags & BTRFS_BLOCK_GROUP_DATA) { 3321 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
3043 fs_info->data_chunk_allocations++; 3322 fs_info->data_chunk_allocations++;
3044 if (!(fs_info->data_chunk_allocations % 3323 if (!(fs_info->data_chunk_allocations %
3045 fs_info->metadata_ratio)) 3324 fs_info->metadata_ratio))
@@ -3047,8 +3326,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3047 } 3326 }
3048 3327
3049 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3328 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3329 spin_lock(&space_info->lock);
3050 if (ret) 3330 if (ret)
3051 space_info->full = 1; 3331 space_info->full = 1;
3332 space_info->force_alloc = 0;
3333 spin_unlock(&space_info->lock);
3052out: 3334out:
3053 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3335 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3054 return ret; 3336 return ret;
@@ -4063,21 +4345,32 @@ loop:
4063 return ret; 4345 return ret;
4064} 4346}
4065 4347
4066static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 4348static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
4349 int dump_block_groups)
4067{ 4350{
4068 struct btrfs_block_group_cache *cache; 4351 struct btrfs_block_group_cache *cache;
4069 4352
4353 spin_lock(&info->lock);
4070 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 4354 printk(KERN_INFO "space_info has %llu free, is %sfull\n",
4071 (unsigned long long)(info->total_bytes - info->bytes_used - 4355 (unsigned long long)(info->total_bytes - info->bytes_used -
4072 info->bytes_pinned - info->bytes_reserved), 4356 info->bytes_pinned - info->bytes_reserved -
4357 info->bytes_super),
4073 (info->full) ? "" : "not "); 4358 (info->full) ? "" : "not ");
4074 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 4359 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
4075 " may_use=%llu, used=%llu\n", 4360 " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu"
4361 "\n",
4076 (unsigned long long)info->total_bytes, 4362 (unsigned long long)info->total_bytes,
4077 (unsigned long long)info->bytes_pinned, 4363 (unsigned long long)info->bytes_pinned,
4078 (unsigned long long)info->bytes_delalloc, 4364 (unsigned long long)info->bytes_delalloc,
4079 (unsigned long long)info->bytes_may_use, 4365 (unsigned long long)info->bytes_may_use,
4080 (unsigned long long)info->bytes_used); 4366 (unsigned long long)info->bytes_used,
4367 (unsigned long long)info->bytes_root,
4368 (unsigned long long)info->bytes_super,
4369 (unsigned long long)info->bytes_reserved);
4370 spin_unlock(&info->lock);
4371
4372 if (!dump_block_groups)
4373 return;
4081 4374
4082 down_read(&info->groups_sem); 4375 down_read(&info->groups_sem);
4083 list_for_each_entry(cache, &info->block_groups, list) { 4376 list_for_each_entry(cache, &info->block_groups, list) {
@@ -4145,7 +4438,7 @@ again:
4145 printk(KERN_ERR "btrfs allocation failed flags %llu, " 4438 printk(KERN_ERR "btrfs allocation failed flags %llu, "
4146 "wanted %llu\n", (unsigned long long)data, 4439 "wanted %llu\n", (unsigned long long)data,
4147 (unsigned long long)num_bytes); 4440 (unsigned long long)num_bytes);
4148 dump_space_info(sinfo, num_bytes); 4441 dump_space_info(sinfo, num_bytes, 1);
4149 } 4442 }
4150 4443
4151 return ret; 4444 return ret;