aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c724
1 files changed, 611 insertions, 113 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 993f93ff7ba6..94627c4cc193 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -68,6 +68,8 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
68 struct extent_buffer **must_clean); 68 struct extent_buffer **must_clean);
69static int find_next_key(struct btrfs_path *path, int level, 69static int find_next_key(struct btrfs_path *path, int level,
70 struct btrfs_key *key); 70 struct btrfs_key *key);
71static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
72 int dump_block_groups);
71 73
72static noinline int 74static noinline int
73block_group_cache_done(struct btrfs_block_group_cache *cache) 75block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -1566,23 +1568,23 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1566 return ret; 1568 return ret;
1567} 1569}
1568 1570
1569#ifdef BIO_RW_DISCARD
1570static void btrfs_issue_discard(struct block_device *bdev, 1571static void btrfs_issue_discard(struct block_device *bdev,
1571 u64 start, u64 len) 1572 u64 start, u64 len)
1572{ 1573{
1573 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 1574 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
1574 DISCARD_FL_BARRIER); 1575 DISCARD_FL_BARRIER);
1575} 1576}
1576#endif
1577 1577
1578static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1578static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1579 u64 num_bytes) 1579 u64 num_bytes)
1580{ 1580{
1581#ifdef BIO_RW_DISCARD
1582 int ret; 1581 int ret;
1583 u64 map_length = num_bytes; 1582 u64 map_length = num_bytes;
1584 struct btrfs_multi_bio *multi = NULL; 1583 struct btrfs_multi_bio *multi = NULL;
1585 1584
1585 if (!btrfs_test_opt(root, DISCARD))
1586 return 0;
1587
1586 /* Tell the block device(s) that the sectors can be discarded */ 1588 /* Tell the block device(s) that the sectors can be discarded */
1587 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 1589 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
1588 bytenr, &map_length, &multi, 0); 1590 bytenr, &map_length, &multi, 0);
@@ -1602,9 +1604,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1602 } 1604 }
1603 1605
1604 return ret; 1606 return ret;
1605#else
1606 return 0;
1607#endif
1608} 1607}
1609 1608
1610int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1609int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
@@ -2765,67 +2764,448 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2765 alloc_target); 2764 alloc_target);
2766} 2765}
2767 2766
2767static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
2768{
2769 u64 num_bytes;
2770 int level;
2771
2772 level = BTRFS_MAX_LEVEL - 2;
2773 /*
2774 * NOTE: these calculations are absolutely the worst possible case.
2775 * This assumes that _every_ item we insert will require a new leaf, and
2776 * that the tree has grown to its maximum level size.
2777 */
2778
2779 /*
2780 * for every item we insert we could insert both an extent item and a
2781 * extent ref item. Then for ever item we insert, we will need to cow
2782 * both the original leaf, plus the leaf to the left and right of it.
2783 *
2784 * Unless we are talking about the extent root, then we just want the
2785 * number of items * 2, since we just need the extent item plus its ref.
2786 */
2787 if (root == root->fs_info->extent_root)
2788 num_bytes = num_items * 2;
2789 else
2790 num_bytes = (num_items + (2 * num_items)) * 3;
2791
2792 /*
2793 * num_bytes is total number of leaves we could need times the leaf
2794 * size, and then for every leaf we could end up cow'ing 2 nodes per
2795 * level, down to the leaf level.
2796 */
2797 num_bytes = (num_bytes * root->leafsize) +
2798 (num_bytes * (level * 2)) * root->nodesize;
2799
2800 return num_bytes;
2801}
2802
2768/* 2803/*
2769 * for now this just makes sure we have at least 5% of our metadata space free 2804 * Unreserve metadata space for delalloc. If we have less reserved credits than
2770 * for use. 2805 * we have extents, this function does nothing.
2771 */ 2806 */
2772int btrfs_check_metadata_free_space(struct btrfs_root *root) 2807int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2808 struct inode *inode, int num_items)
2773{ 2809{
2774 struct btrfs_fs_info *info = root->fs_info; 2810 struct btrfs_fs_info *info = root->fs_info;
2775 struct btrfs_space_info *meta_sinfo; 2811 struct btrfs_space_info *meta_sinfo;
2776 u64 alloc_target, thresh; 2812 u64 num_bytes;
2777 int committed = 0, ret; 2813 u64 alloc_target;
2814 bool bug = false;
2778 2815
2779 /* get the space info for where the metadata will live */ 2816 /* get the space info for where the metadata will live */
2780 alloc_target = btrfs_get_alloc_profile(root, 0); 2817 alloc_target = btrfs_get_alloc_profile(root, 0);
2781 meta_sinfo = __find_space_info(info, alloc_target); 2818 meta_sinfo = __find_space_info(info, alloc_target);
2782 if (!meta_sinfo)
2783 goto alloc;
2784 2819
2785again: 2820 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2821 num_items);
2822
2786 spin_lock(&meta_sinfo->lock); 2823 spin_lock(&meta_sinfo->lock);
2787 if (!meta_sinfo->full) 2824 spin_lock(&BTRFS_I(inode)->accounting_lock);
2788 thresh = meta_sinfo->total_bytes * 80; 2825 if (BTRFS_I(inode)->reserved_extents <=
2789 else 2826 BTRFS_I(inode)->outstanding_extents) {
2790 thresh = meta_sinfo->total_bytes * 95; 2827 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2828 spin_unlock(&meta_sinfo->lock);
2829 return 0;
2830 }
2831 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2832
2833 BTRFS_I(inode)->reserved_extents--;
2834 BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
2835
2836 if (meta_sinfo->bytes_delalloc < num_bytes) {
2837 bug = true;
2838 meta_sinfo->bytes_delalloc = 0;
2839 } else {
2840 meta_sinfo->bytes_delalloc -= num_bytes;
2841 }
2842 spin_unlock(&meta_sinfo->lock);
2843
2844 BUG_ON(bug);
2791 2845
2846 return 0;
2847}
2848
2849static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
2850{
2851 u64 thresh;
2852
2853 thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2854 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2855 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2856 meta_sinfo->bytes_may_use;
2857
2858 thresh = meta_sinfo->total_bytes - thresh;
2859 thresh *= 80;
2792 do_div(thresh, 100); 2860 do_div(thresh, 100);
2861 if (thresh <= meta_sinfo->bytes_delalloc)
2862 meta_sinfo->force_delalloc = 1;
2863 else
2864 meta_sinfo->force_delalloc = 0;
2865}
2793 2866
2794 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2867struct async_flush {
2795 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 2868 struct btrfs_root *root;
2796 meta_sinfo->bytes_super > thresh) { 2869 struct btrfs_space_info *info;
2797 struct btrfs_trans_handle *trans; 2870 struct btrfs_work work;
2798 if (!meta_sinfo->full) { 2871};
2799 meta_sinfo->force_alloc = 1; 2872
2873static noinline void flush_delalloc_async(struct btrfs_work *work)
2874{
2875 struct async_flush *async;
2876 struct btrfs_root *root;
2877 struct btrfs_space_info *info;
2878
2879 async = container_of(work, struct async_flush, work);
2880 root = async->root;
2881 info = async->info;
2882
2883 btrfs_start_delalloc_inodes(root);
2884 wake_up(&info->flush_wait);
2885 btrfs_wait_ordered_extents(root, 0);
2886
2887 spin_lock(&info->lock);
2888 info->flushing = 0;
2889 spin_unlock(&info->lock);
2890 wake_up(&info->flush_wait);
2891
2892 kfree(async);
2893}
2894
2895static void wait_on_flush(struct btrfs_space_info *info)
2896{
2897 DEFINE_WAIT(wait);
2898 u64 used;
2899
2900 while (1) {
2901 prepare_to_wait(&info->flush_wait, &wait,
2902 TASK_UNINTERRUPTIBLE);
2903 spin_lock(&info->lock);
2904 if (!info->flushing) {
2905 spin_unlock(&info->lock);
2906 break;
2907 }
2908
2909 used = info->bytes_used + info->bytes_reserved +
2910 info->bytes_pinned + info->bytes_readonly +
2911 info->bytes_super + info->bytes_root +
2912 info->bytes_may_use + info->bytes_delalloc;
2913 if (used < info->total_bytes) {
2914 spin_unlock(&info->lock);
2915 break;
2916 }
2917 spin_unlock(&info->lock);
2918 schedule();
2919 }
2920 finish_wait(&info->flush_wait, &wait);
2921}
2922
2923static void flush_delalloc(struct btrfs_root *root,
2924 struct btrfs_space_info *info)
2925{
2926 struct async_flush *async;
2927 bool wait = false;
2928
2929 spin_lock(&info->lock);
2930
2931 if (!info->flushing) {
2932 info->flushing = 1;
2933 init_waitqueue_head(&info->flush_wait);
2934 } else {
2935 wait = true;
2936 }
2937
2938 spin_unlock(&info->lock);
2939
2940 if (wait) {
2941 wait_on_flush(info);
2942 return;
2943 }
2944
2945 async = kzalloc(sizeof(*async), GFP_NOFS);
2946 if (!async)
2947 goto flush;
2948
2949 async->root = root;
2950 async->info = info;
2951 async->work.func = flush_delalloc_async;
2952
2953 btrfs_queue_worker(&root->fs_info->enospc_workers,
2954 &async->work);
2955 wait_on_flush(info);
2956 return;
2957
2958flush:
2959 btrfs_start_delalloc_inodes(root);
2960 btrfs_wait_ordered_extents(root, 0);
2961
2962 spin_lock(&info->lock);
2963 info->flushing = 0;
2964 spin_unlock(&info->lock);
2965 wake_up(&info->flush_wait);
2966}
2967
2968static int maybe_allocate_chunk(struct btrfs_root *root,
2969 struct btrfs_space_info *info)
2970{
2971 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
2972 struct btrfs_trans_handle *trans;
2973 bool wait = false;
2974 int ret = 0;
2975 u64 min_metadata;
2976 u64 free_space;
2977
2978 free_space = btrfs_super_total_bytes(disk_super);
2979 /*
2980 * we allow the metadata to grow to a max of either 10gb or 5% of the
2981 * space in the volume.
2982 */
2983 min_metadata = min((u64)10 * 1024 * 1024 * 1024,
2984 div64_u64(free_space * 5, 100));
2985 if (info->total_bytes >= min_metadata) {
2986 spin_unlock(&info->lock);
2987 return 0;
2988 }
2989
2990 if (info->full) {
2991 spin_unlock(&info->lock);
2992 return 0;
2993 }
2994
2995 if (!info->allocating_chunk) {
2996 info->force_alloc = 1;
2997 info->allocating_chunk = 1;
2998 init_waitqueue_head(&info->allocate_wait);
2999 } else {
3000 wait = true;
3001 }
3002
3003 spin_unlock(&info->lock);
3004
3005 if (wait) {
3006 wait_event(info->allocate_wait,
3007 !info->allocating_chunk);
3008 return 1;
3009 }
3010
3011 trans = btrfs_start_transaction(root, 1);
3012 if (!trans) {
3013 ret = -ENOMEM;
3014 goto out;
3015 }
3016
3017 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3018 4096 + 2 * 1024 * 1024,
3019 info->flags, 0);
3020 btrfs_end_transaction(trans, root);
3021 if (ret)
3022 goto out;
3023out:
3024 spin_lock(&info->lock);
3025 info->allocating_chunk = 0;
3026 spin_unlock(&info->lock);
3027 wake_up(&info->allocate_wait);
3028
3029 if (ret)
3030 return 0;
3031 return 1;
3032}
3033
3034/*
3035 * Reserve metadata space for delalloc.
3036 */
3037int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
3038 struct inode *inode, int num_items)
3039{
3040 struct btrfs_fs_info *info = root->fs_info;
3041 struct btrfs_space_info *meta_sinfo;
3042 u64 num_bytes;
3043 u64 used;
3044 u64 alloc_target;
3045 int flushed = 0;
3046 int force_delalloc;
3047
3048 /* get the space info for where the metadata will live */
3049 alloc_target = btrfs_get_alloc_profile(root, 0);
3050 meta_sinfo = __find_space_info(info, alloc_target);
3051
3052 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
3053 num_items);
3054again:
3055 spin_lock(&meta_sinfo->lock);
3056
3057 force_delalloc = meta_sinfo->force_delalloc;
3058
3059 if (unlikely(!meta_sinfo->bytes_root))
3060 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3061
3062 if (!flushed)
3063 meta_sinfo->bytes_delalloc += num_bytes;
3064
3065 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3066 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3067 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3068 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3069
3070 if (used > meta_sinfo->total_bytes) {
3071 flushed++;
3072
3073 if (flushed == 1) {
3074 if (maybe_allocate_chunk(root, meta_sinfo))
3075 goto again;
3076 flushed++;
3077 } else {
2800 spin_unlock(&meta_sinfo->lock); 3078 spin_unlock(&meta_sinfo->lock);
2801alloc: 3079 }
2802 trans = btrfs_start_transaction(root, 1);
2803 if (!trans)
2804 return -ENOMEM;
2805 3080
2806 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3081 if (flushed == 2) {
2807 2 * 1024 * 1024, alloc_target, 0); 3082 filemap_flush(inode->i_mapping);
2808 btrfs_end_transaction(trans, root); 3083 goto again;
2809 if (!meta_sinfo) { 3084 } else if (flushed == 3) {
2810 meta_sinfo = __find_space_info(info, 3085 flush_delalloc(root, meta_sinfo);
2811 alloc_target);
2812 }
2813 goto again; 3086 goto again;
2814 } 3087 }
3088 spin_lock(&meta_sinfo->lock);
3089 meta_sinfo->bytes_delalloc -= num_bytes;
2815 spin_unlock(&meta_sinfo->lock); 3090 spin_unlock(&meta_sinfo->lock);
3091 printk(KERN_ERR "enospc, has %d, reserved %d\n",
3092 BTRFS_I(inode)->outstanding_extents,
3093 BTRFS_I(inode)->reserved_extents);
3094 dump_space_info(meta_sinfo, 0, 0);
3095 return -ENOSPC;
3096 }
2816 3097
2817 if (!committed) { 3098 BTRFS_I(inode)->reserved_extents++;
2818 committed = 1; 3099 check_force_delalloc(meta_sinfo);
2819 trans = btrfs_join_transaction(root, 1); 3100 spin_unlock(&meta_sinfo->lock);
2820 if (!trans) 3101
2821 return -ENOMEM; 3102 if (!flushed && force_delalloc)
2822 ret = btrfs_commit_transaction(trans, root); 3103 filemap_flush(inode->i_mapping);
2823 if (ret) 3104
2824 return ret; 3105 return 0;
3106}
3107
3108/*
3109 * unreserve num_items number of items worth of metadata space. This needs to
3110 * be paired with btrfs_reserve_metadata_space.
3111 *
3112 * NOTE: if you have the option, run this _AFTER_ you do a
3113 * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref
3114 * oprations which will result in more used metadata, so we want to make sure we
3115 * can do that without issue.
3116 */
3117int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items)
3118{
3119 struct btrfs_fs_info *info = root->fs_info;
3120 struct btrfs_space_info *meta_sinfo;
3121 u64 num_bytes;
3122 u64 alloc_target;
3123 bool bug = false;
3124
3125 /* get the space info for where the metadata will live */
3126 alloc_target = btrfs_get_alloc_profile(root, 0);
3127 meta_sinfo = __find_space_info(info, alloc_target);
3128
3129 num_bytes = calculate_bytes_needed(root, num_items);
3130
3131 spin_lock(&meta_sinfo->lock);
3132 if (meta_sinfo->bytes_may_use < num_bytes) {
3133 bug = true;
3134 meta_sinfo->bytes_may_use = 0;
3135 } else {
3136 meta_sinfo->bytes_may_use -= num_bytes;
3137 }
3138 spin_unlock(&meta_sinfo->lock);
3139
3140 BUG_ON(bug);
3141
3142 return 0;
3143}
3144
3145/*
3146 * Reserve some metadata space for use. We'll calculate the worste case number
3147 * of bytes that would be needed to modify num_items number of items. If we
3148 * have space, fantastic, if not, you get -ENOSPC. Please call
3149 * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of
3150 * items you reserved, since whatever metadata you needed should have already
3151 * been allocated.
3152 *
3153 * This will commit the transaction to make more space if we don't have enough
3154 * metadata space. THe only time we don't do this is if we're reserving space
3155 * inside of a transaction, then we will just return -ENOSPC and it is the
3156 * callers responsibility to handle it properly.
3157 */
3158int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
3159{
3160 struct btrfs_fs_info *info = root->fs_info;
3161 struct btrfs_space_info *meta_sinfo;
3162 u64 num_bytes;
3163 u64 used;
3164 u64 alloc_target;
3165 int retries = 0;
3166
3167 /* get the space info for where the metadata will live */
3168 alloc_target = btrfs_get_alloc_profile(root, 0);
3169 meta_sinfo = __find_space_info(info, alloc_target);
3170
3171 num_bytes = calculate_bytes_needed(root, num_items);
3172again:
3173 spin_lock(&meta_sinfo->lock);
3174
3175 if (unlikely(!meta_sinfo->bytes_root))
3176 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3177
3178 if (!retries)
3179 meta_sinfo->bytes_may_use += num_bytes;
3180
3181 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3182 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3183 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3184 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3185
3186 if (used > meta_sinfo->total_bytes) {
3187 retries++;
3188 if (retries == 1) {
3189 if (maybe_allocate_chunk(root, meta_sinfo))
3190 goto again;
3191 retries++;
3192 } else {
3193 spin_unlock(&meta_sinfo->lock);
3194 }
3195
3196 if (retries == 2) {
3197 flush_delalloc(root, meta_sinfo);
2825 goto again; 3198 goto again;
2826 } 3199 }
3200 spin_lock(&meta_sinfo->lock);
3201 meta_sinfo->bytes_may_use -= num_bytes;
3202 spin_unlock(&meta_sinfo->lock);
3203
3204 dump_space_info(meta_sinfo, 0, 0);
2827 return -ENOSPC; 3205 return -ENOSPC;
2828 } 3206 }
3207
3208 check_force_delalloc(meta_sinfo);
2829 spin_unlock(&meta_sinfo->lock); 3209 spin_unlock(&meta_sinfo->lock);
2830 3210
2831 return 0; 3211 return 0;
@@ -2888,7 +3268,7 @@ alloc:
2888 spin_unlock(&data_sinfo->lock); 3268 spin_unlock(&data_sinfo->lock);
2889 3269
2890 /* commit the current transaction and try again */ 3270 /* commit the current transaction and try again */
2891 if (!committed) { 3271 if (!committed && !root->fs_info->open_ioctl_trans) {
2892 committed = 1; 3272 committed = 1;
2893 trans = btrfs_join_transaction(root, 1); 3273 trans = btrfs_join_transaction(root, 1);
2894 if (!trans) 3274 if (!trans)
@@ -2916,7 +3296,7 @@ alloc:
2916 BTRFS_I(inode)->reserved_bytes += bytes; 3296 BTRFS_I(inode)->reserved_bytes += bytes;
2917 spin_unlock(&data_sinfo->lock); 3297 spin_unlock(&data_sinfo->lock);
2918 3298
2919 return btrfs_check_metadata_free_space(root); 3299 return 0;
2920} 3300}
2921 3301
2922/* 3302/*
@@ -3015,17 +3395,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3015 BUG_ON(!space_info); 3395 BUG_ON(!space_info);
3016 3396
3017 spin_lock(&space_info->lock); 3397 spin_lock(&space_info->lock);
3018 if (space_info->force_alloc) { 3398 if (space_info->force_alloc)
3019 force = 1; 3399 force = 1;
3020 space_info->force_alloc = 0;
3021 }
3022 if (space_info->full) { 3400 if (space_info->full) {
3023 spin_unlock(&space_info->lock); 3401 spin_unlock(&space_info->lock);
3024 goto out; 3402 goto out;
3025 } 3403 }
3026 3404
3027 thresh = space_info->total_bytes - space_info->bytes_readonly; 3405 thresh = space_info->total_bytes - space_info->bytes_readonly;
3028 thresh = div_factor(thresh, 6); 3406 thresh = div_factor(thresh, 8);
3029 if (!force && 3407 if (!force &&
3030 (space_info->bytes_used + space_info->bytes_pinned + 3408 (space_info->bytes_used + space_info->bytes_pinned +
3031 space_info->bytes_reserved + alloc_bytes) < thresh) { 3409 space_info->bytes_reserved + alloc_bytes) < thresh) {
@@ -3039,7 +3417,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3039 * we keep a reasonable number of metadata chunks allocated in the 3417 * we keep a reasonable number of metadata chunks allocated in the
3040 * FS as well. 3418 * FS as well.
3041 */ 3419 */
3042 if (flags & BTRFS_BLOCK_GROUP_DATA) { 3420 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
3043 fs_info->data_chunk_allocations++; 3421 fs_info->data_chunk_allocations++;
3044 if (!(fs_info->data_chunk_allocations % 3422 if (!(fs_info->data_chunk_allocations %
3045 fs_info->metadata_ratio)) 3423 fs_info->metadata_ratio))
@@ -3047,8 +3425,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3047 } 3425 }
3048 3426
3049 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3427 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3428 spin_lock(&space_info->lock);
3050 if (ret) 3429 if (ret)
3051 space_info->full = 1; 3430 space_info->full = 1;
3431 space_info->force_alloc = 0;
3432 spin_unlock(&space_info->lock);
3052out: 3433out:
3053 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3434 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3054 return ret; 3435 return ret;
@@ -3306,6 +3687,14 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
3306 if (is_data) 3687 if (is_data)
3307 goto pinit; 3688 goto pinit;
3308 3689
3690 /*
3691 * discard is sloooow, and so triggering discards on
3692 * individual btree blocks isn't a good plan. Just
3693 * pin everything in discard mode.
3694 */
3695 if (btrfs_test_opt(root, DISCARD))
3696 goto pinit;
3697
3309 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 3698 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
3310 if (!buf) 3699 if (!buf)
3311 goto pinit; 3700 goto pinit;
@@ -3713,7 +4102,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
3713} 4102}
3714 4103
3715enum btrfs_loop_type { 4104enum btrfs_loop_type {
3716 LOOP_CACHED_ONLY = 0, 4105 LOOP_FIND_IDEAL = 0,
3717 LOOP_CACHING_NOWAIT = 1, 4106 LOOP_CACHING_NOWAIT = 1,
3718 LOOP_CACHING_WAIT = 2, 4107 LOOP_CACHING_WAIT = 2,
3719 LOOP_ALLOC_CHUNK = 3, 4108 LOOP_ALLOC_CHUNK = 3,
@@ -3742,11 +4131,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3742 struct btrfs_block_group_cache *block_group = NULL; 4131 struct btrfs_block_group_cache *block_group = NULL;
3743 int empty_cluster = 2 * 1024 * 1024; 4132 int empty_cluster = 2 * 1024 * 1024;
3744 int allowed_chunk_alloc = 0; 4133 int allowed_chunk_alloc = 0;
4134 int done_chunk_alloc = 0;
3745 struct btrfs_space_info *space_info; 4135 struct btrfs_space_info *space_info;
3746 int last_ptr_loop = 0; 4136 int last_ptr_loop = 0;
3747 int loop = 0; 4137 int loop = 0;
3748 bool found_uncached_bg = false; 4138 bool found_uncached_bg = false;
3749 bool failed_cluster_refill = false; 4139 bool failed_cluster_refill = false;
4140 bool failed_alloc = false;
4141 u64 ideal_cache_percent = 0;
4142 u64 ideal_cache_offset = 0;
3750 4143
3751 WARN_ON(num_bytes < root->sectorsize); 4144 WARN_ON(num_bytes < root->sectorsize);
3752 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4145 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3782,14 +4175,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3782 empty_cluster = 0; 4175 empty_cluster = 0;
3783 4176
3784 if (search_start == hint_byte) { 4177 if (search_start == hint_byte) {
4178ideal_cache:
3785 block_group = btrfs_lookup_block_group(root->fs_info, 4179 block_group = btrfs_lookup_block_group(root->fs_info,
3786 search_start); 4180 search_start);
3787 /* 4181 /*
3788 * we don't want to use the block group if it doesn't match our 4182 * we don't want to use the block group if it doesn't match our
3789 * allocation bits, or if its not cached. 4183 * allocation bits, or if its not cached.
4184 *
4185 * However if we are re-searching with an ideal block group
4186 * picked out then we don't care that the block group is cached.
3790 */ 4187 */
3791 if (block_group && block_group_bits(block_group, data) && 4188 if (block_group && block_group_bits(block_group, data) &&
3792 block_group_cache_done(block_group)) { 4189 (block_group->cached != BTRFS_CACHE_NO ||
4190 search_start == ideal_cache_offset)) {
3793 down_read(&space_info->groups_sem); 4191 down_read(&space_info->groups_sem);
3794 if (list_empty(&block_group->list) || 4192 if (list_empty(&block_group->list) ||
3795 block_group->ro) { 4193 block_group->ro) {
@@ -3801,13 +4199,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3801 */ 4199 */
3802 btrfs_put_block_group(block_group); 4200 btrfs_put_block_group(block_group);
3803 up_read(&space_info->groups_sem); 4201 up_read(&space_info->groups_sem);
3804 } else 4202 } else {
3805 goto have_block_group; 4203 goto have_block_group;
4204 }
3806 } else if (block_group) { 4205 } else if (block_group) {
3807 btrfs_put_block_group(block_group); 4206 btrfs_put_block_group(block_group);
3808 } 4207 }
3809 } 4208 }
3810
3811search: 4209search:
3812 down_read(&space_info->groups_sem); 4210 down_read(&space_info->groups_sem);
3813 list_for_each_entry(block_group, &space_info->block_groups, list) { 4211 list_for_each_entry(block_group, &space_info->block_groups, list) {
@@ -3819,28 +4217,45 @@ search:
3819 4217
3820have_block_group: 4218have_block_group:
3821 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4219 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4220 u64 free_percent;
4221
4222 free_percent = btrfs_block_group_used(&block_group->item);
4223 free_percent *= 100;
4224 free_percent = div64_u64(free_percent,
4225 block_group->key.offset);
4226 free_percent = 100 - free_percent;
4227 if (free_percent > ideal_cache_percent &&
4228 likely(!block_group->ro)) {
4229 ideal_cache_offset = block_group->key.objectid;
4230 ideal_cache_percent = free_percent;
4231 }
4232
3822 /* 4233 /*
3823 * we want to start caching kthreads, but not too many 4234 * We only want to start kthread caching if we are at
3824 * right off the bat so we don't overwhelm the system, 4235 * the point where we will wait for caching to make
3825 * so only start them if there are less than 2 and we're 4236 * progress, or if our ideal search is over and we've
3826 * in the initial allocation phase. 4237 * found somebody to start caching.
3827 */ 4238 */
3828 if (loop > LOOP_CACHING_NOWAIT || 4239 if (loop > LOOP_CACHING_NOWAIT ||
3829 atomic_read(&space_info->caching_threads) < 2) { 4240 (loop > LOOP_FIND_IDEAL &&
4241 atomic_read(&space_info->caching_threads) < 2)) {
3830 ret = cache_block_group(block_group); 4242 ret = cache_block_group(block_group);
3831 BUG_ON(ret); 4243 BUG_ON(ret);
3832 } 4244 }
3833 }
3834
3835 cached = block_group_cache_done(block_group);
3836 if (unlikely(!cached)) {
3837 found_uncached_bg = true; 4245 found_uncached_bg = true;
3838 4246
3839 /* if we only want cached bgs, loop */ 4247 /*
3840 if (loop == LOOP_CACHED_ONLY) 4248 * If loop is set for cached only, try the next block
4249 * group.
4250 */
4251 if (loop == LOOP_FIND_IDEAL)
3841 goto loop; 4252 goto loop;
3842 } 4253 }
3843 4254
4255 cached = block_group_cache_done(block_group);
4256 if (unlikely(!cached))
4257 found_uncached_bg = true;
4258
3844 if (unlikely(block_group->ro)) 4259 if (unlikely(block_group->ro))
3845 goto loop; 4260 goto loop;
3846 4261
@@ -3951,14 +4366,23 @@ refill_cluster:
3951 4366
3952 offset = btrfs_find_space_for_alloc(block_group, search_start, 4367 offset = btrfs_find_space_for_alloc(block_group, search_start,
3953 num_bytes, empty_size); 4368 num_bytes, empty_size);
3954 if (!offset && (cached || (!cached && 4369 /*
3955 loop == LOOP_CACHING_NOWAIT))) { 4370 * If we didn't find a chunk, and we haven't failed on this
3956 goto loop; 4371 * block group before, and this block group is in the middle of
3957 } else if (!offset && (!cached && 4372 * caching and we are ok with waiting, then go ahead and wait
3958 loop > LOOP_CACHING_NOWAIT)) { 4373 * for progress to be made, and set failed_alloc to true.
4374 *
4375 * If failed_alloc is true then we've already waited on this
4376 * block group once and should move on to the next block group.
4377 */
4378 if (!offset && !failed_alloc && !cached &&
4379 loop > LOOP_CACHING_NOWAIT) {
3959 wait_block_group_cache_progress(block_group, 4380 wait_block_group_cache_progress(block_group,
3960 num_bytes + empty_size); 4381 num_bytes + empty_size);
4382 failed_alloc = true;
3961 goto have_block_group; 4383 goto have_block_group;
4384 } else if (!offset) {
4385 goto loop;
3962 } 4386 }
3963checks: 4387checks:
3964 search_start = stripe_align(root, offset); 4388 search_start = stripe_align(root, offset);
@@ -4006,13 +4430,16 @@ checks:
4006 break; 4430 break;
4007loop: 4431loop:
4008 failed_cluster_refill = false; 4432 failed_cluster_refill = false;
4433 failed_alloc = false;
4009 btrfs_put_block_group(block_group); 4434 btrfs_put_block_group(block_group);
4010 } 4435 }
4011 up_read(&space_info->groups_sem); 4436 up_read(&space_info->groups_sem);
4012 4437
4013 /* LOOP_CACHED_ONLY, only search fully cached block groups 4438 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
4014 * LOOP_CACHING_NOWAIT, search partially cached block groups, but 4439 * for them to make caching progress. Also
4015 * dont wait foR them to finish caching 4440 * determine the best possible bg to cache
4441 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
4442 * caching kthreads as we move along
4016 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 4443 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
4017 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 4444 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
4018 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 4445 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
@@ -4021,12 +4448,47 @@ loop:
4021 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 4448 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
4022 (found_uncached_bg || empty_size || empty_cluster || 4449 (found_uncached_bg || empty_size || empty_cluster ||
4023 allowed_chunk_alloc)) { 4450 allowed_chunk_alloc)) {
4024 if (found_uncached_bg) { 4451 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
4025 found_uncached_bg = false; 4452 found_uncached_bg = false;
4026 if (loop < LOOP_CACHING_WAIT) { 4453 loop++;
4027 loop++; 4454 if (!ideal_cache_percent &&
4455 atomic_read(&space_info->caching_threads))
4028 goto search; 4456 goto search;
4029 } 4457
4458 /*
4459 * 1 of the following 2 things have happened so far
4460 *
4461 * 1) We found an ideal block group for caching that
4462 * is mostly full and will cache quickly, so we might
4463 * as well wait for it.
4464 *
4465 * 2) We searched for cached only and we didn't find
4466 * anything, and we didn't start any caching kthreads
4467 * either, so chances are we will loop through and
4468 * start a couple caching kthreads, and then come back
4469 * around and just wait for them. This will be slower
4470 * because we will have 2 caching kthreads reading at
4471 * the same time when we could have just started one
4472 * and waited for it to get far enough to give us an
4473 * allocation, so go ahead and go to the wait caching
4474 * loop.
4475 */
4476 loop = LOOP_CACHING_WAIT;
4477 search_start = ideal_cache_offset;
4478 ideal_cache_percent = 0;
4479 goto ideal_cache;
4480 } else if (loop == LOOP_FIND_IDEAL) {
4481 /*
4482 * Didn't find a uncached bg, wait on anything we find
4483 * next.
4484 */
4485 loop = LOOP_CACHING_WAIT;
4486 goto search;
4487 }
4488
4489 if (loop < LOOP_CACHING_WAIT) {
4490 loop++;
4491 goto search;
4030 } 4492 }
4031 4493
4032 if (loop == LOOP_ALLOC_CHUNK) { 4494 if (loop == LOOP_ALLOC_CHUNK) {
@@ -4038,7 +4500,8 @@ loop:
4038 ret = do_chunk_alloc(trans, root, num_bytes + 4500 ret = do_chunk_alloc(trans, root, num_bytes +
4039 2 * 1024 * 1024, data, 1); 4501 2 * 1024 * 1024, data, 1);
4040 allowed_chunk_alloc = 0; 4502 allowed_chunk_alloc = 0;
4041 } else { 4503 done_chunk_alloc = 1;
4504 } else if (!done_chunk_alloc) {
4042 space_info->force_alloc = 1; 4505 space_info->force_alloc = 1;
4043 } 4506 }
4044 4507
@@ -4063,21 +4526,32 @@ loop:
4063 return ret; 4526 return ret;
4064} 4527}
4065 4528
4066static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 4529static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
4530 int dump_block_groups)
4067{ 4531{
4068 struct btrfs_block_group_cache *cache; 4532 struct btrfs_block_group_cache *cache;
4069 4533
4534 spin_lock(&info->lock);
4070 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 4535 printk(KERN_INFO "space_info has %llu free, is %sfull\n",
4071 (unsigned long long)(info->total_bytes - info->bytes_used - 4536 (unsigned long long)(info->total_bytes - info->bytes_used -
4072 info->bytes_pinned - info->bytes_reserved), 4537 info->bytes_pinned - info->bytes_reserved -
4538 info->bytes_super),
4073 (info->full) ? "" : "not "); 4539 (info->full) ? "" : "not ");
4074 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 4540 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
4075 " may_use=%llu, used=%llu\n", 4541 " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu"
4542 "\n",
4076 (unsigned long long)info->total_bytes, 4543 (unsigned long long)info->total_bytes,
4077 (unsigned long long)info->bytes_pinned, 4544 (unsigned long long)info->bytes_pinned,
4078 (unsigned long long)info->bytes_delalloc, 4545 (unsigned long long)info->bytes_delalloc,
4079 (unsigned long long)info->bytes_may_use, 4546 (unsigned long long)info->bytes_may_use,
4080 (unsigned long long)info->bytes_used); 4547 (unsigned long long)info->bytes_used,
4548 (unsigned long long)info->bytes_root,
4549 (unsigned long long)info->bytes_super,
4550 (unsigned long long)info->bytes_reserved);
4551 spin_unlock(&info->lock);
4552
4553 if (!dump_block_groups)
4554 return;
4081 4555
4082 down_read(&info->groups_sem); 4556 down_read(&info->groups_sem);
4083 list_for_each_entry(cache, &info->block_groups, list) { 4557 list_for_each_entry(cache, &info->block_groups, list) {
@@ -4145,7 +4619,7 @@ again:
4145 printk(KERN_ERR "btrfs allocation failed flags %llu, " 4619 printk(KERN_ERR "btrfs allocation failed flags %llu, "
4146 "wanted %llu\n", (unsigned long long)data, 4620 "wanted %llu\n", (unsigned long long)data,
4147 (unsigned long long)num_bytes); 4621 (unsigned long long)num_bytes);
4148 dump_space_info(sinfo, num_bytes); 4622 dump_space_info(sinfo, num_bytes, 1);
4149 } 4623 }
4150 4624
4151 return ret; 4625 return ret;
@@ -4506,6 +4980,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4506 u64 bytenr; 4980 u64 bytenr;
4507 u64 generation; 4981 u64 generation;
4508 u64 refs; 4982 u64 refs;
4983 u64 flags;
4509 u64 last = 0; 4984 u64 last = 0;
4510 u32 nritems; 4985 u32 nritems;
4511 u32 blocksize; 4986 u32 blocksize;
@@ -4543,15 +5018,19 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4543 generation <= root->root_key.offset) 5018 generation <= root->root_key.offset)
4544 continue; 5019 continue;
4545 5020
5021 /* We don't lock the tree block, it's OK to be racy here */
5022 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
5023 &refs, &flags);
5024 BUG_ON(ret);
5025 BUG_ON(refs == 0);
5026
4546 if (wc->stage == DROP_REFERENCE) { 5027 if (wc->stage == DROP_REFERENCE) {
4547 ret = btrfs_lookup_extent_info(trans, root,
4548 bytenr, blocksize,
4549 &refs, NULL);
4550 BUG_ON(ret);
4551 BUG_ON(refs == 0);
4552 if (refs == 1) 5028 if (refs == 1)
4553 goto reada; 5029 goto reada;
4554 5030
5031 if (wc->level == 1 &&
5032 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5033 continue;
4555 if (!wc->update_ref || 5034 if (!wc->update_ref ||
4556 generation <= root->root_key.offset) 5035 generation <= root->root_key.offset)
4557 continue; 5036 continue;
@@ -4560,6 +5039,10 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4560 &wc->update_progress); 5039 &wc->update_progress);
4561 if (ret < 0) 5040 if (ret < 0)
4562 continue; 5041 continue;
5042 } else {
5043 if (wc->level == 1 &&
5044 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5045 continue;
4563 } 5046 }
4564reada: 5047reada:
4565 ret = readahead_tree_block(root, bytenr, blocksize, 5048 ret = readahead_tree_block(root, bytenr, blocksize,
@@ -4583,7 +5066,7 @@ reada:
4583static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 5066static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4584 struct btrfs_root *root, 5067 struct btrfs_root *root,
4585 struct btrfs_path *path, 5068 struct btrfs_path *path,
4586 struct walk_control *wc) 5069 struct walk_control *wc, int lookup_info)
4587{ 5070{
4588 int level = wc->level; 5071 int level = wc->level;
4589 struct extent_buffer *eb = path->nodes[level]; 5072 struct extent_buffer *eb = path->nodes[level];
@@ -4598,8 +5081,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4598 * when reference count of tree block is 1, it won't increase 5081 * when reference count of tree block is 1, it won't increase
4599 * again. once full backref flag is set, we never clear it. 5082 * again. once full backref flag is set, we never clear it.
4600 */ 5083 */
4601 if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 5084 if (lookup_info &&
4602 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { 5085 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
5086 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
4603 BUG_ON(!path->locks[level]); 5087 BUG_ON(!path->locks[level]);
4604 ret = btrfs_lookup_extent_info(trans, root, 5088 ret = btrfs_lookup_extent_info(trans, root,
4605 eb->start, eb->len, 5089 eb->start, eb->len,
@@ -4660,7 +5144,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4660static noinline int do_walk_down(struct btrfs_trans_handle *trans, 5144static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4661 struct btrfs_root *root, 5145 struct btrfs_root *root,
4662 struct btrfs_path *path, 5146 struct btrfs_path *path,
4663 struct walk_control *wc) 5147 struct walk_control *wc, int *lookup_info)
4664{ 5148{
4665 u64 bytenr; 5149 u64 bytenr;
4666 u64 generation; 5150 u64 generation;
@@ -4680,8 +5164,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4680 * for the subtree 5164 * for the subtree
4681 */ 5165 */
4682 if (wc->stage == UPDATE_BACKREF && 5166 if (wc->stage == UPDATE_BACKREF &&
4683 generation <= root->root_key.offset) 5167 generation <= root->root_key.offset) {
5168 *lookup_info = 1;
4684 return 1; 5169 return 1;
5170 }
4685 5171
4686 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 5172 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
4687 blocksize = btrfs_level_size(root, level - 1); 5173 blocksize = btrfs_level_size(root, level - 1);
@@ -4694,14 +5180,19 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4694 btrfs_tree_lock(next); 5180 btrfs_tree_lock(next);
4695 btrfs_set_lock_blocking(next); 5181 btrfs_set_lock_blocking(next);
4696 5182
4697 if (wc->stage == DROP_REFERENCE) { 5183 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
4698 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5184 &wc->refs[level - 1],
4699 &wc->refs[level - 1], 5185 &wc->flags[level - 1]);
4700 &wc->flags[level - 1]); 5186 BUG_ON(ret);
4701 BUG_ON(ret); 5187 BUG_ON(wc->refs[level - 1] == 0);
4702 BUG_ON(wc->refs[level - 1] == 0); 5188 *lookup_info = 0;
4703 5189
5190 if (wc->stage == DROP_REFERENCE) {
4704 if (wc->refs[level - 1] > 1) { 5191 if (wc->refs[level - 1] > 1) {
5192 if (level == 1 &&
5193 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5194 goto skip;
5195
4705 if (!wc->update_ref || 5196 if (!wc->update_ref ||
4706 generation <= root->root_key.offset) 5197 generation <= root->root_key.offset)
4707 goto skip; 5198 goto skip;
@@ -4715,12 +5206,17 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4715 wc->stage = UPDATE_BACKREF; 5206 wc->stage = UPDATE_BACKREF;
4716 wc->shared_level = level - 1; 5207 wc->shared_level = level - 1;
4717 } 5208 }
5209 } else {
5210 if (level == 1 &&
5211 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5212 goto skip;
4718 } 5213 }
4719 5214
4720 if (!btrfs_buffer_uptodate(next, generation)) { 5215 if (!btrfs_buffer_uptodate(next, generation)) {
4721 btrfs_tree_unlock(next); 5216 btrfs_tree_unlock(next);
4722 free_extent_buffer(next); 5217 free_extent_buffer(next);
4723 next = NULL; 5218 next = NULL;
5219 *lookup_info = 1;
4724 } 5220 }
4725 5221
4726 if (!next) { 5222 if (!next) {
@@ -4743,21 +5239,22 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4743skip: 5239skip:
4744 wc->refs[level - 1] = 0; 5240 wc->refs[level - 1] = 0;
4745 wc->flags[level - 1] = 0; 5241 wc->flags[level - 1] = 0;
5242 if (wc->stage == DROP_REFERENCE) {
5243 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5244 parent = path->nodes[level]->start;
5245 } else {
5246 BUG_ON(root->root_key.objectid !=
5247 btrfs_header_owner(path->nodes[level]));
5248 parent = 0;
5249 }
4746 5250
4747 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 5251 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
4748 parent = path->nodes[level]->start; 5252 root->root_key.objectid, level - 1, 0);
4749 } else { 5253 BUG_ON(ret);
4750 BUG_ON(root->root_key.objectid !=
4751 btrfs_header_owner(path->nodes[level]));
4752 parent = 0;
4753 } 5254 }
4754
4755 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
4756 root->root_key.objectid, level - 1, 0);
4757 BUG_ON(ret);
4758
4759 btrfs_tree_unlock(next); 5255 btrfs_tree_unlock(next);
4760 free_extent_buffer(next); 5256 free_extent_buffer(next);
5257 *lookup_info = 1;
4761 return 1; 5258 return 1;
4762} 5259}
4763 5260
@@ -4871,6 +5368,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4871 struct walk_control *wc) 5368 struct walk_control *wc)
4872{ 5369{
4873 int level = wc->level; 5370 int level = wc->level;
5371 int lookup_info = 1;
4874 int ret; 5372 int ret;
4875 5373
4876 while (level >= 0) { 5374 while (level >= 0) {
@@ -4878,14 +5376,14 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4878 btrfs_header_nritems(path->nodes[level])) 5376 btrfs_header_nritems(path->nodes[level]))
4879 break; 5377 break;
4880 5378
4881 ret = walk_down_proc(trans, root, path, wc); 5379 ret = walk_down_proc(trans, root, path, wc, lookup_info);
4882 if (ret > 0) 5380 if (ret > 0)
4883 break; 5381 break;
4884 5382
4885 if (level == 0) 5383 if (level == 0)
4886 break; 5384 break;
4887 5385
4888 ret = do_walk_down(trans, root, path, wc); 5386 ret = do_walk_down(trans, root, path, wc, &lookup_info);
4889 if (ret > 0) { 5387 if (ret > 0) {
4890 path->slots[level]++; 5388 path->slots[level]++;
4891 continue; 5389 continue;