aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c598
1 files changed, 514 insertions, 84 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 993f93ff7ba6..d0c4d584efad 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -68,6 +68,8 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
68 struct extent_buffer **must_clean); 68 struct extent_buffer **must_clean);
69static int find_next_key(struct btrfs_path *path, int level, 69static int find_next_key(struct btrfs_path *path, int level,
70 struct btrfs_key *key); 70 struct btrfs_key *key);
71static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
72 int dump_block_groups);
71 73
72static noinline int 74static noinline int
73block_group_cache_done(struct btrfs_block_group_cache *cache) 75block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -2765,67 +2767,448 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2765 alloc_target); 2767 alloc_target);
2766} 2768}
2767 2769
2770static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
2771{
2772 u64 num_bytes;
2773 int level;
2774
2775 level = BTRFS_MAX_LEVEL - 2;
2776 /*
2777 * NOTE: these calculations are absolutely the worst possible case.
2778 * This assumes that _every_ item we insert will require a new leaf, and
2779 * that the tree has grown to its maximum level size.
2780 */
2781
2782 /*
2783 * for every item we insert we could insert both an extent item and a
2784 * extent ref item. Then for ever item we insert, we will need to cow
2785 * both the original leaf, plus the leaf to the left and right of it.
2786 *
2787 * Unless we are talking about the extent root, then we just want the
2788 * number of items * 2, since we just need the extent item plus its ref.
2789 */
2790 if (root == root->fs_info->extent_root)
2791 num_bytes = num_items * 2;
2792 else
2793 num_bytes = (num_items + (2 * num_items)) * 3;
2794
2795 /*
2796 * num_bytes is total number of leaves we could need times the leaf
2797 * size, and then for every leaf we could end up cow'ing 2 nodes per
2798 * level, down to the leaf level.
2799 */
2800 num_bytes = (num_bytes * root->leafsize) +
2801 (num_bytes * (level * 2)) * root->nodesize;
2802
2803 return num_bytes;
2804}
2805
2768/* 2806/*
2769 * for now this just makes sure we have at least 5% of our metadata space free 2807 * Unreserve metadata space for delalloc. If we have less reserved credits than
2770 * for use. 2808 * we have extents, this function does nothing.
2771 */ 2809 */
2772int btrfs_check_metadata_free_space(struct btrfs_root *root) 2810int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2811 struct inode *inode, int num_items)
2773{ 2812{
2774 struct btrfs_fs_info *info = root->fs_info; 2813 struct btrfs_fs_info *info = root->fs_info;
2775 struct btrfs_space_info *meta_sinfo; 2814 struct btrfs_space_info *meta_sinfo;
2776 u64 alloc_target, thresh; 2815 u64 num_bytes;
2777 int committed = 0, ret; 2816 u64 alloc_target;
2817 bool bug = false;
2778 2818
2779 /* get the space info for where the metadata will live */ 2819 /* get the space info for where the metadata will live */
2780 alloc_target = btrfs_get_alloc_profile(root, 0); 2820 alloc_target = btrfs_get_alloc_profile(root, 0);
2781 meta_sinfo = __find_space_info(info, alloc_target); 2821 meta_sinfo = __find_space_info(info, alloc_target);
2782 if (!meta_sinfo)
2783 goto alloc;
2784 2822
2785again: 2823 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2824 num_items);
2825
2786 spin_lock(&meta_sinfo->lock); 2826 spin_lock(&meta_sinfo->lock);
2787 if (!meta_sinfo->full) 2827 spin_lock(&BTRFS_I(inode)->accounting_lock);
2788 thresh = meta_sinfo->total_bytes * 80; 2828 if (BTRFS_I(inode)->reserved_extents <=
2789 else 2829 BTRFS_I(inode)->outstanding_extents) {
2790 thresh = meta_sinfo->total_bytes * 95; 2830 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2831 spin_unlock(&meta_sinfo->lock);
2832 return 0;
2833 }
2834 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2835
2836 BTRFS_I(inode)->reserved_extents--;
2837 BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
2838
2839 if (meta_sinfo->bytes_delalloc < num_bytes) {
2840 bug = true;
2841 meta_sinfo->bytes_delalloc = 0;
2842 } else {
2843 meta_sinfo->bytes_delalloc -= num_bytes;
2844 }
2845 spin_unlock(&meta_sinfo->lock);
2846
2847 BUG_ON(bug);
2791 2848
2849 return 0;
2850}
2851
2852static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
2853{
2854 u64 thresh;
2855
2856 thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2857 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2858 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2859 meta_sinfo->bytes_may_use;
2860
2861 thresh = meta_sinfo->total_bytes - thresh;
2862 thresh *= 80;
2792 do_div(thresh, 100); 2863 do_div(thresh, 100);
2864 if (thresh <= meta_sinfo->bytes_delalloc)
2865 meta_sinfo->force_delalloc = 1;
2866 else
2867 meta_sinfo->force_delalloc = 0;
2868}
2793 2869
2794 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2870struct async_flush {
2795 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 2871 struct btrfs_root *root;
2796 meta_sinfo->bytes_super > thresh) { 2872 struct btrfs_space_info *info;
2797 struct btrfs_trans_handle *trans; 2873 struct btrfs_work work;
2798 if (!meta_sinfo->full) { 2874};
2799 meta_sinfo->force_alloc = 1; 2875
2876static noinline void flush_delalloc_async(struct btrfs_work *work)
2877{
2878 struct async_flush *async;
2879 struct btrfs_root *root;
2880 struct btrfs_space_info *info;
2881
2882 async = container_of(work, struct async_flush, work);
2883 root = async->root;
2884 info = async->info;
2885
2886 btrfs_start_delalloc_inodes(root);
2887 wake_up(&info->flush_wait);
2888 btrfs_wait_ordered_extents(root, 0);
2889
2890 spin_lock(&info->lock);
2891 info->flushing = 0;
2892 spin_unlock(&info->lock);
2893 wake_up(&info->flush_wait);
2894
2895 kfree(async);
2896}
2897
2898static void wait_on_flush(struct btrfs_space_info *info)
2899{
2900 DEFINE_WAIT(wait);
2901 u64 used;
2902
2903 while (1) {
2904 prepare_to_wait(&info->flush_wait, &wait,
2905 TASK_UNINTERRUPTIBLE);
2906 spin_lock(&info->lock);
2907 if (!info->flushing) {
2908 spin_unlock(&info->lock);
2909 break;
2910 }
2911
2912 used = info->bytes_used + info->bytes_reserved +
2913 info->bytes_pinned + info->bytes_readonly +
2914 info->bytes_super + info->bytes_root +
2915 info->bytes_may_use + info->bytes_delalloc;
2916 if (used < info->total_bytes) {
2917 spin_unlock(&info->lock);
2918 break;
2919 }
2920 spin_unlock(&info->lock);
2921 schedule();
2922 }
2923 finish_wait(&info->flush_wait, &wait);
2924}
2925
2926static void flush_delalloc(struct btrfs_root *root,
2927 struct btrfs_space_info *info)
2928{
2929 struct async_flush *async;
2930 bool wait = false;
2931
2932 spin_lock(&info->lock);
2933
2934 if (!info->flushing) {
2935 info->flushing = 1;
2936 init_waitqueue_head(&info->flush_wait);
2937 } else {
2938 wait = true;
2939 }
2940
2941 spin_unlock(&info->lock);
2942
2943 if (wait) {
2944 wait_on_flush(info);
2945 return;
2946 }
2947
2948 async = kzalloc(sizeof(*async), GFP_NOFS);
2949 if (!async)
2950 goto flush;
2951
2952 async->root = root;
2953 async->info = info;
2954 async->work.func = flush_delalloc_async;
2955
2956 btrfs_queue_worker(&root->fs_info->enospc_workers,
2957 &async->work);
2958 wait_on_flush(info);
2959 return;
2960
2961flush:
2962 btrfs_start_delalloc_inodes(root);
2963 btrfs_wait_ordered_extents(root, 0);
2964
2965 spin_lock(&info->lock);
2966 info->flushing = 0;
2967 spin_unlock(&info->lock);
2968 wake_up(&info->flush_wait);
2969}
2970
2971static int maybe_allocate_chunk(struct btrfs_root *root,
2972 struct btrfs_space_info *info)
2973{
2974 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
2975 struct btrfs_trans_handle *trans;
2976 bool wait = false;
2977 int ret = 0;
2978 u64 min_metadata;
2979 u64 free_space;
2980
2981 free_space = btrfs_super_total_bytes(disk_super);
2982 /*
2983 * we allow the metadata to grow to a max of either 5gb or 5% of the
2984 * space in the volume.
2985 */
2986 min_metadata = min((u64)5 * 1024 * 1024 * 1024,
2987 div64_u64(free_space * 5, 100));
2988 if (info->total_bytes >= min_metadata) {
2989 spin_unlock(&info->lock);
2990 return 0;
2991 }
2992
2993 if (info->full) {
2994 spin_unlock(&info->lock);
2995 return 0;
2996 }
2997
2998 if (!info->allocating_chunk) {
2999 info->force_alloc = 1;
3000 info->allocating_chunk = 1;
3001 init_waitqueue_head(&info->allocate_wait);
3002 } else {
3003 wait = true;
3004 }
3005
3006 spin_unlock(&info->lock);
3007
3008 if (wait) {
3009 wait_event(info->allocate_wait,
3010 !info->allocating_chunk);
3011 return 1;
3012 }
3013
3014 trans = btrfs_start_transaction(root, 1);
3015 if (!trans) {
3016 ret = -ENOMEM;
3017 goto out;
3018 }
3019
3020 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3021 4096 + 2 * 1024 * 1024,
3022 info->flags, 0);
3023 btrfs_end_transaction(trans, root);
3024 if (ret)
3025 goto out;
3026out:
3027 spin_lock(&info->lock);
3028 info->allocating_chunk = 0;
3029 spin_unlock(&info->lock);
3030 wake_up(&info->allocate_wait);
3031
3032 if (ret)
3033 return 0;
3034 return 1;
3035}
3036
3037/*
3038 * Reserve metadata space for delalloc.
3039 */
3040int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
3041 struct inode *inode, int num_items)
3042{
3043 struct btrfs_fs_info *info = root->fs_info;
3044 struct btrfs_space_info *meta_sinfo;
3045 u64 num_bytes;
3046 u64 used;
3047 u64 alloc_target;
3048 int flushed = 0;
3049 int force_delalloc;
3050
3051 /* get the space info for where the metadata will live */
3052 alloc_target = btrfs_get_alloc_profile(root, 0);
3053 meta_sinfo = __find_space_info(info, alloc_target);
3054
3055 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
3056 num_items);
3057again:
3058 spin_lock(&meta_sinfo->lock);
3059
3060 force_delalloc = meta_sinfo->force_delalloc;
3061
3062 if (unlikely(!meta_sinfo->bytes_root))
3063 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3064
3065 if (!flushed)
3066 meta_sinfo->bytes_delalloc += num_bytes;
3067
3068 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3069 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3070 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3071 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3072
3073 if (used > meta_sinfo->total_bytes) {
3074 flushed++;
3075
3076 if (flushed == 1) {
3077 if (maybe_allocate_chunk(root, meta_sinfo))
3078 goto again;
3079 flushed++;
3080 } else {
2800 spin_unlock(&meta_sinfo->lock); 3081 spin_unlock(&meta_sinfo->lock);
2801alloc: 3082 }
2802 trans = btrfs_start_transaction(root, 1);
2803 if (!trans)
2804 return -ENOMEM;
2805 3083
2806 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3084 if (flushed == 2) {
2807 2 * 1024 * 1024, alloc_target, 0); 3085 filemap_flush(inode->i_mapping);
2808 btrfs_end_transaction(trans, root); 3086 goto again;
2809 if (!meta_sinfo) { 3087 } else if (flushed == 3) {
2810 meta_sinfo = __find_space_info(info, 3088 flush_delalloc(root, meta_sinfo);
2811 alloc_target);
2812 }
2813 goto again; 3089 goto again;
2814 } 3090 }
3091 spin_lock(&meta_sinfo->lock);
3092 meta_sinfo->bytes_delalloc -= num_bytes;
2815 spin_unlock(&meta_sinfo->lock); 3093 spin_unlock(&meta_sinfo->lock);
3094 printk(KERN_ERR "enospc, has %d, reserved %d\n",
3095 BTRFS_I(inode)->outstanding_extents,
3096 BTRFS_I(inode)->reserved_extents);
3097 dump_space_info(meta_sinfo, 0, 0);
3098 return -ENOSPC;
3099 }
2816 3100
2817 if (!committed) { 3101 BTRFS_I(inode)->reserved_extents++;
2818 committed = 1; 3102 check_force_delalloc(meta_sinfo);
2819 trans = btrfs_join_transaction(root, 1); 3103 spin_unlock(&meta_sinfo->lock);
2820 if (!trans) 3104
2821 return -ENOMEM; 3105 if (!flushed && force_delalloc)
2822 ret = btrfs_commit_transaction(trans, root); 3106 filemap_flush(inode->i_mapping);
2823 if (ret) 3107
2824 return ret; 3108 return 0;
3109}
3110
3111/*
3112 * unreserve num_items number of items worth of metadata space. This needs to
3113 * be paired with btrfs_reserve_metadata_space.
3114 *
3115 * NOTE: if you have the option, run this _AFTER_ you do a
3116 * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref
3117 * oprations which will result in more used metadata, so we want to make sure we
3118 * can do that without issue.
3119 */
3120int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items)
3121{
3122 struct btrfs_fs_info *info = root->fs_info;
3123 struct btrfs_space_info *meta_sinfo;
3124 u64 num_bytes;
3125 u64 alloc_target;
3126 bool bug = false;
3127
3128 /* get the space info for where the metadata will live */
3129 alloc_target = btrfs_get_alloc_profile(root, 0);
3130 meta_sinfo = __find_space_info(info, alloc_target);
3131
3132 num_bytes = calculate_bytes_needed(root, num_items);
3133
3134 spin_lock(&meta_sinfo->lock);
3135 if (meta_sinfo->bytes_may_use < num_bytes) {
3136 bug = true;
3137 meta_sinfo->bytes_may_use = 0;
3138 } else {
3139 meta_sinfo->bytes_may_use -= num_bytes;
3140 }
3141 spin_unlock(&meta_sinfo->lock);
3142
3143 BUG_ON(bug);
3144
3145 return 0;
3146}
3147
3148/*
3149 * Reserve some metadata space for use. We'll calculate the worste case number
3150 * of bytes that would be needed to modify num_items number of items. If we
3151 * have space, fantastic, if not, you get -ENOSPC. Please call
3152 * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of
3153 * items you reserved, since whatever metadata you needed should have already
3154 * been allocated.
3155 *
3156 * This will commit the transaction to make more space if we don't have enough
3157 * metadata space. THe only time we don't do this is if we're reserving space
3158 * inside of a transaction, then we will just return -ENOSPC and it is the
3159 * callers responsibility to handle it properly.
3160 */
3161int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
3162{
3163 struct btrfs_fs_info *info = root->fs_info;
3164 struct btrfs_space_info *meta_sinfo;
3165 u64 num_bytes;
3166 u64 used;
3167 u64 alloc_target;
3168 int retries = 0;
3169
3170 /* get the space info for where the metadata will live */
3171 alloc_target = btrfs_get_alloc_profile(root, 0);
3172 meta_sinfo = __find_space_info(info, alloc_target);
3173
3174 num_bytes = calculate_bytes_needed(root, num_items);
3175again:
3176 spin_lock(&meta_sinfo->lock);
3177
3178 if (unlikely(!meta_sinfo->bytes_root))
3179 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3180
3181 if (!retries)
3182 meta_sinfo->bytes_may_use += num_bytes;
3183
3184 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3185 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3186 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3187 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3188
3189 if (used > meta_sinfo->total_bytes) {
3190 retries++;
3191 if (retries == 1) {
3192 if (maybe_allocate_chunk(root, meta_sinfo))
3193 goto again;
3194 retries++;
3195 } else {
3196 spin_unlock(&meta_sinfo->lock);
3197 }
3198
3199 if (retries == 2) {
3200 flush_delalloc(root, meta_sinfo);
2825 goto again; 3201 goto again;
2826 } 3202 }
3203 spin_lock(&meta_sinfo->lock);
3204 meta_sinfo->bytes_may_use -= num_bytes;
3205 spin_unlock(&meta_sinfo->lock);
3206
3207 dump_space_info(meta_sinfo, 0, 0);
2827 return -ENOSPC; 3208 return -ENOSPC;
2828 } 3209 }
3210
3211 check_force_delalloc(meta_sinfo);
2829 spin_unlock(&meta_sinfo->lock); 3212 spin_unlock(&meta_sinfo->lock);
2830 3213
2831 return 0; 3214 return 0;
@@ -2888,7 +3271,7 @@ alloc:
2888 spin_unlock(&data_sinfo->lock); 3271 spin_unlock(&data_sinfo->lock);
2889 3272
2890 /* commit the current transaction and try again */ 3273 /* commit the current transaction and try again */
2891 if (!committed) { 3274 if (!committed && !root->fs_info->open_ioctl_trans) {
2892 committed = 1; 3275 committed = 1;
2893 trans = btrfs_join_transaction(root, 1); 3276 trans = btrfs_join_transaction(root, 1);
2894 if (!trans) 3277 if (!trans)
@@ -2916,7 +3299,7 @@ alloc:
2916 BTRFS_I(inode)->reserved_bytes += bytes; 3299 BTRFS_I(inode)->reserved_bytes += bytes;
2917 spin_unlock(&data_sinfo->lock); 3300 spin_unlock(&data_sinfo->lock);
2918 3301
2919 return btrfs_check_metadata_free_space(root); 3302 return 0;
2920} 3303}
2921 3304
2922/* 3305/*
@@ -3015,17 +3398,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3015 BUG_ON(!space_info); 3398 BUG_ON(!space_info);
3016 3399
3017 spin_lock(&space_info->lock); 3400 spin_lock(&space_info->lock);
3018 if (space_info->force_alloc) { 3401 if (space_info->force_alloc)
3019 force = 1; 3402 force = 1;
3020 space_info->force_alloc = 0;
3021 }
3022 if (space_info->full) { 3403 if (space_info->full) {
3023 spin_unlock(&space_info->lock); 3404 spin_unlock(&space_info->lock);
3024 goto out; 3405 goto out;
3025 } 3406 }
3026 3407
3027 thresh = space_info->total_bytes - space_info->bytes_readonly; 3408 thresh = space_info->total_bytes - space_info->bytes_readonly;
3028 thresh = div_factor(thresh, 6); 3409 thresh = div_factor(thresh, 8);
3029 if (!force && 3410 if (!force &&
3030 (space_info->bytes_used + space_info->bytes_pinned + 3411 (space_info->bytes_used + space_info->bytes_pinned +
3031 space_info->bytes_reserved + alloc_bytes) < thresh) { 3412 space_info->bytes_reserved + alloc_bytes) < thresh) {
@@ -3039,7 +3420,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3039 * we keep a reasonable number of metadata chunks allocated in the 3420 * we keep a reasonable number of metadata chunks allocated in the
3040 * FS as well. 3421 * FS as well.
3041 */ 3422 */
3042 if (flags & BTRFS_BLOCK_GROUP_DATA) { 3423 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
3043 fs_info->data_chunk_allocations++; 3424 fs_info->data_chunk_allocations++;
3044 if (!(fs_info->data_chunk_allocations % 3425 if (!(fs_info->data_chunk_allocations %
3045 fs_info->metadata_ratio)) 3426 fs_info->metadata_ratio))
@@ -3047,8 +3428,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3047 } 3428 }
3048 3429
3049 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3430 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3431 spin_lock(&space_info->lock);
3050 if (ret) 3432 if (ret)
3051 space_info->full = 1; 3433 space_info->full = 1;
3434 space_info->force_alloc = 0;
3435 spin_unlock(&space_info->lock);
3052out: 3436out:
3053 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3437 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3054 return ret; 3438 return ret;
@@ -3747,6 +4131,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3747 int loop = 0; 4131 int loop = 0;
3748 bool found_uncached_bg = false; 4132 bool found_uncached_bg = false;
3749 bool failed_cluster_refill = false; 4133 bool failed_cluster_refill = false;
4134 bool failed_alloc = false;
3750 4135
3751 WARN_ON(num_bytes < root->sectorsize); 4136 WARN_ON(num_bytes < root->sectorsize);
3752 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4137 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3951,14 +4336,23 @@ refill_cluster:
3951 4336
3952 offset = btrfs_find_space_for_alloc(block_group, search_start, 4337 offset = btrfs_find_space_for_alloc(block_group, search_start,
3953 num_bytes, empty_size); 4338 num_bytes, empty_size);
3954 if (!offset && (cached || (!cached && 4339 /*
3955 loop == LOOP_CACHING_NOWAIT))) { 4340 * If we didn't find a chunk, and we haven't failed on this
3956 goto loop; 4341 * block group before, and this block group is in the middle of
3957 } else if (!offset && (!cached && 4342 * caching and we are ok with waiting, then go ahead and wait
3958 loop > LOOP_CACHING_NOWAIT)) { 4343 * for progress to be made, and set failed_alloc to true.
4344 *
4345 * If failed_alloc is true then we've already waited on this
4346 * block group once and should move on to the next block group.
4347 */
4348 if (!offset && !failed_alloc && !cached &&
4349 loop > LOOP_CACHING_NOWAIT) {
3959 wait_block_group_cache_progress(block_group, 4350 wait_block_group_cache_progress(block_group,
3960 num_bytes + empty_size); 4351 num_bytes + empty_size);
4352 failed_alloc = true;
3961 goto have_block_group; 4353 goto have_block_group;
4354 } else if (!offset) {
4355 goto loop;
3962 } 4356 }
3963checks: 4357checks:
3964 search_start = stripe_align(root, offset); 4358 search_start = stripe_align(root, offset);
@@ -4006,6 +4400,7 @@ checks:
4006 break; 4400 break;
4007loop: 4401loop:
4008 failed_cluster_refill = false; 4402 failed_cluster_refill = false;
4403 failed_alloc = false;
4009 btrfs_put_block_group(block_group); 4404 btrfs_put_block_group(block_group);
4010 } 4405 }
4011 up_read(&space_info->groups_sem); 4406 up_read(&space_info->groups_sem);
@@ -4063,21 +4458,32 @@ loop:
4063 return ret; 4458 return ret;
4064} 4459}
4065 4460
4066static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 4461static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
4462 int dump_block_groups)
4067{ 4463{
4068 struct btrfs_block_group_cache *cache; 4464 struct btrfs_block_group_cache *cache;
4069 4465
4466 spin_lock(&info->lock);
4070 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 4467 printk(KERN_INFO "space_info has %llu free, is %sfull\n",
4071 (unsigned long long)(info->total_bytes - info->bytes_used - 4468 (unsigned long long)(info->total_bytes - info->bytes_used -
4072 info->bytes_pinned - info->bytes_reserved), 4469 info->bytes_pinned - info->bytes_reserved -
4470 info->bytes_super),
4073 (info->full) ? "" : "not "); 4471 (info->full) ? "" : "not ");
4074 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 4472 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
4075 " may_use=%llu, used=%llu\n", 4473 " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu"
4474 "\n",
4076 (unsigned long long)info->total_bytes, 4475 (unsigned long long)info->total_bytes,
4077 (unsigned long long)info->bytes_pinned, 4476 (unsigned long long)info->bytes_pinned,
4078 (unsigned long long)info->bytes_delalloc, 4477 (unsigned long long)info->bytes_delalloc,
4079 (unsigned long long)info->bytes_may_use, 4478 (unsigned long long)info->bytes_may_use,
4080 (unsigned long long)info->bytes_used); 4479 (unsigned long long)info->bytes_used,
4480 (unsigned long long)info->bytes_root,
4481 (unsigned long long)info->bytes_super,
4482 (unsigned long long)info->bytes_reserved);
4483 spin_unlock(&info->lock);
4484
4485 if (!dump_block_groups)
4486 return;
4081 4487
4082 down_read(&info->groups_sem); 4488 down_read(&info->groups_sem);
4083 list_for_each_entry(cache, &info->block_groups, list) { 4489 list_for_each_entry(cache, &info->block_groups, list) {
@@ -4145,7 +4551,7 @@ again:
4145 printk(KERN_ERR "btrfs allocation failed flags %llu, " 4551 printk(KERN_ERR "btrfs allocation failed flags %llu, "
4146 "wanted %llu\n", (unsigned long long)data, 4552 "wanted %llu\n", (unsigned long long)data,
4147 (unsigned long long)num_bytes); 4553 (unsigned long long)num_bytes);
4148 dump_space_info(sinfo, num_bytes); 4554 dump_space_info(sinfo, num_bytes, 1);
4149 } 4555 }
4150 4556
4151 return ret; 4557 return ret;
@@ -4506,6 +4912,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4506 u64 bytenr; 4912 u64 bytenr;
4507 u64 generation; 4913 u64 generation;
4508 u64 refs; 4914 u64 refs;
4915 u64 flags;
4509 u64 last = 0; 4916 u64 last = 0;
4510 u32 nritems; 4917 u32 nritems;
4511 u32 blocksize; 4918 u32 blocksize;
@@ -4543,15 +4950,19 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4543 generation <= root->root_key.offset) 4950 generation <= root->root_key.offset)
4544 continue; 4951 continue;
4545 4952
4953 /* We don't lock the tree block, it's OK to be racy here */
4954 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
4955 &refs, &flags);
4956 BUG_ON(ret);
4957 BUG_ON(refs == 0);
4958
4546 if (wc->stage == DROP_REFERENCE) { 4959 if (wc->stage == DROP_REFERENCE) {
4547 ret = btrfs_lookup_extent_info(trans, root,
4548 bytenr, blocksize,
4549 &refs, NULL);
4550 BUG_ON(ret);
4551 BUG_ON(refs == 0);
4552 if (refs == 1) 4960 if (refs == 1)
4553 goto reada; 4961 goto reada;
4554 4962
4963 if (wc->level == 1 &&
4964 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4965 continue;
4555 if (!wc->update_ref || 4966 if (!wc->update_ref ||
4556 generation <= root->root_key.offset) 4967 generation <= root->root_key.offset)
4557 continue; 4968 continue;
@@ -4560,6 +4971,10 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4560 &wc->update_progress); 4971 &wc->update_progress);
4561 if (ret < 0) 4972 if (ret < 0)
4562 continue; 4973 continue;
4974 } else {
4975 if (wc->level == 1 &&
4976 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4977 continue;
4563 } 4978 }
4564reada: 4979reada:
4565 ret = readahead_tree_block(root, bytenr, blocksize, 4980 ret = readahead_tree_block(root, bytenr, blocksize,
@@ -4583,7 +4998,7 @@ reada:
4583static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 4998static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4584 struct btrfs_root *root, 4999 struct btrfs_root *root,
4585 struct btrfs_path *path, 5000 struct btrfs_path *path,
4586 struct walk_control *wc) 5001 struct walk_control *wc, int lookup_info)
4587{ 5002{
4588 int level = wc->level; 5003 int level = wc->level;
4589 struct extent_buffer *eb = path->nodes[level]; 5004 struct extent_buffer *eb = path->nodes[level];
@@ -4598,8 +5013,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4598 * when reference count of tree block is 1, it won't increase 5013 * when reference count of tree block is 1, it won't increase
4599 * again. once full backref flag is set, we never clear it. 5014 * again. once full backref flag is set, we never clear it.
4600 */ 5015 */
4601 if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 5016 if (lookup_info &&
4602 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { 5017 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
5018 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
4603 BUG_ON(!path->locks[level]); 5019 BUG_ON(!path->locks[level]);
4604 ret = btrfs_lookup_extent_info(trans, root, 5020 ret = btrfs_lookup_extent_info(trans, root,
4605 eb->start, eb->len, 5021 eb->start, eb->len,
@@ -4660,7 +5076,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4660static noinline int do_walk_down(struct btrfs_trans_handle *trans, 5076static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4661 struct btrfs_root *root, 5077 struct btrfs_root *root,
4662 struct btrfs_path *path, 5078 struct btrfs_path *path,
4663 struct walk_control *wc) 5079 struct walk_control *wc, int *lookup_info)
4664{ 5080{
4665 u64 bytenr; 5081 u64 bytenr;
4666 u64 generation; 5082 u64 generation;
@@ -4680,8 +5096,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4680 * for the subtree 5096 * for the subtree
4681 */ 5097 */
4682 if (wc->stage == UPDATE_BACKREF && 5098 if (wc->stage == UPDATE_BACKREF &&
4683 generation <= root->root_key.offset) 5099 generation <= root->root_key.offset) {
5100 *lookup_info = 1;
4684 return 1; 5101 return 1;
5102 }
4685 5103
4686 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 5104 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
4687 blocksize = btrfs_level_size(root, level - 1); 5105 blocksize = btrfs_level_size(root, level - 1);
@@ -4694,14 +5112,19 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4694 btrfs_tree_lock(next); 5112 btrfs_tree_lock(next);
4695 btrfs_set_lock_blocking(next); 5113 btrfs_set_lock_blocking(next);
4696 5114
4697 if (wc->stage == DROP_REFERENCE) { 5115 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
4698 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5116 &wc->refs[level - 1],
4699 &wc->refs[level - 1], 5117 &wc->flags[level - 1]);
4700 &wc->flags[level - 1]); 5118 BUG_ON(ret);
4701 BUG_ON(ret); 5119 BUG_ON(wc->refs[level - 1] == 0);
4702 BUG_ON(wc->refs[level - 1] == 0); 5120 *lookup_info = 0;
4703 5121
5122 if (wc->stage == DROP_REFERENCE) {
4704 if (wc->refs[level - 1] > 1) { 5123 if (wc->refs[level - 1] > 1) {
5124 if (level == 1 &&
5125 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5126 goto skip;
5127
4705 if (!wc->update_ref || 5128 if (!wc->update_ref ||
4706 generation <= root->root_key.offset) 5129 generation <= root->root_key.offset)
4707 goto skip; 5130 goto skip;
@@ -4715,12 +5138,17 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4715 wc->stage = UPDATE_BACKREF; 5138 wc->stage = UPDATE_BACKREF;
4716 wc->shared_level = level - 1; 5139 wc->shared_level = level - 1;
4717 } 5140 }
5141 } else {
5142 if (level == 1 &&
5143 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5144 goto skip;
4718 } 5145 }
4719 5146
4720 if (!btrfs_buffer_uptodate(next, generation)) { 5147 if (!btrfs_buffer_uptodate(next, generation)) {
4721 btrfs_tree_unlock(next); 5148 btrfs_tree_unlock(next);
4722 free_extent_buffer(next); 5149 free_extent_buffer(next);
4723 next = NULL; 5150 next = NULL;
5151 *lookup_info = 1;
4724 } 5152 }
4725 5153
4726 if (!next) { 5154 if (!next) {
@@ -4743,21 +5171,22 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4743skip: 5171skip:
4744 wc->refs[level - 1] = 0; 5172 wc->refs[level - 1] = 0;
4745 wc->flags[level - 1] = 0; 5173 wc->flags[level - 1] = 0;
5174 if (wc->stage == DROP_REFERENCE) {
5175 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5176 parent = path->nodes[level]->start;
5177 } else {
5178 BUG_ON(root->root_key.objectid !=
5179 btrfs_header_owner(path->nodes[level]));
5180 parent = 0;
5181 }
4746 5182
4747 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 5183 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
4748 parent = path->nodes[level]->start; 5184 root->root_key.objectid, level - 1, 0);
4749 } else { 5185 BUG_ON(ret);
4750 BUG_ON(root->root_key.objectid !=
4751 btrfs_header_owner(path->nodes[level]));
4752 parent = 0;
4753 } 5186 }
4754
4755 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
4756 root->root_key.objectid, level - 1, 0);
4757 BUG_ON(ret);
4758
4759 btrfs_tree_unlock(next); 5187 btrfs_tree_unlock(next);
4760 free_extent_buffer(next); 5188 free_extent_buffer(next);
5189 *lookup_info = 1;
4761 return 1; 5190 return 1;
4762} 5191}
4763 5192
@@ -4871,6 +5300,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4871 struct walk_control *wc) 5300 struct walk_control *wc)
4872{ 5301{
4873 int level = wc->level; 5302 int level = wc->level;
5303 int lookup_info = 1;
4874 int ret; 5304 int ret;
4875 5305
4876 while (level >= 0) { 5306 while (level >= 0) {
@@ -4878,14 +5308,14 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4878 btrfs_header_nritems(path->nodes[level])) 5308 btrfs_header_nritems(path->nodes[level]))
4879 break; 5309 break;
4880 5310
4881 ret = walk_down_proc(trans, root, path, wc); 5311 ret = walk_down_proc(trans, root, path, wc, lookup_info);
4882 if (ret > 0) 5312 if (ret > 0)
4883 break; 5313 break;
4884 5314
4885 if (level == 0) 5315 if (level == 0)
4886 break; 5316 break;
4887 5317
4888 ret = do_walk_down(trans, root, path, wc); 5318 ret = do_walk_down(trans, root, path, wc, &lookup_info);
4889 if (ret > 0) { 5319 if (ret > 0) {
4890 path->slots[level]++; 5320 path->slots[level]++;
4891 continue; 5321 continue;