aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c365
1 files changed, 285 insertions, 80 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 359a754c782c..94627c4cc193 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1568,23 +1568,23 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1568 return ret; 1568 return ret;
1569} 1569}
1570 1570
1571#ifdef BIO_RW_DISCARD
1572static void btrfs_issue_discard(struct block_device *bdev, 1571static void btrfs_issue_discard(struct block_device *bdev,
1573 u64 start, u64 len) 1572 u64 start, u64 len)
1574{ 1573{
1575 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 1574 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
1576 DISCARD_FL_BARRIER); 1575 DISCARD_FL_BARRIER);
1577} 1576}
1578#endif
1579 1577
1580static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1578static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1581 u64 num_bytes) 1579 u64 num_bytes)
1582{ 1580{
1583#ifdef BIO_RW_DISCARD
1584 int ret; 1581 int ret;
1585 u64 map_length = num_bytes; 1582 u64 map_length = num_bytes;
1586 struct btrfs_multi_bio *multi = NULL; 1583 struct btrfs_multi_bio *multi = NULL;
1587 1584
1585 if (!btrfs_test_opt(root, DISCARD))
1586 return 0;
1587
1588 /* Tell the block device(s) that the sectors can be discarded */ 1588 /* Tell the block device(s) that the sectors can be discarded */
1589 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 1589 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
1590 bytenr, &map_length, &multi, 0); 1590 bytenr, &map_length, &multi, 0);
@@ -1604,9 +1604,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1604 } 1604 }
1605 1605
1606 return ret; 1606 return ret;
1607#else
1608 return 0;
1609#endif
1610} 1607}
1611 1608
1612int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1609int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
@@ -2824,14 +2821,17 @@ int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2824 num_items); 2821 num_items);
2825 2822
2826 spin_lock(&meta_sinfo->lock); 2823 spin_lock(&meta_sinfo->lock);
2827 if (BTRFS_I(inode)->delalloc_reserved_extents <= 2824 spin_lock(&BTRFS_I(inode)->accounting_lock);
2828 BTRFS_I(inode)->delalloc_extents) { 2825 if (BTRFS_I(inode)->reserved_extents <=
2826 BTRFS_I(inode)->outstanding_extents) {
2827 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2829 spin_unlock(&meta_sinfo->lock); 2828 spin_unlock(&meta_sinfo->lock);
2830 return 0; 2829 return 0;
2831 } 2830 }
2831 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2832 2832
2833 BTRFS_I(inode)->delalloc_reserved_extents--; 2833 BTRFS_I(inode)->reserved_extents--;
2834 BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0); 2834 BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
2835 2835
2836 if (meta_sinfo->bytes_delalloc < num_bytes) { 2836 if (meta_sinfo->bytes_delalloc < num_bytes) {
2837 bug = true; 2837 bug = true;
@@ -2864,6 +2864,107 @@ static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
2864 meta_sinfo->force_delalloc = 0; 2864 meta_sinfo->force_delalloc = 0;
2865} 2865}
2866 2866
2867struct async_flush {
2868 struct btrfs_root *root;
2869 struct btrfs_space_info *info;
2870 struct btrfs_work work;
2871};
2872
2873static noinline void flush_delalloc_async(struct btrfs_work *work)
2874{
2875 struct async_flush *async;
2876 struct btrfs_root *root;
2877 struct btrfs_space_info *info;
2878
2879 async = container_of(work, struct async_flush, work);
2880 root = async->root;
2881 info = async->info;
2882
2883 btrfs_start_delalloc_inodes(root);
2884 wake_up(&info->flush_wait);
2885 btrfs_wait_ordered_extents(root, 0);
2886
2887 spin_lock(&info->lock);
2888 info->flushing = 0;
2889 spin_unlock(&info->lock);
2890 wake_up(&info->flush_wait);
2891
2892 kfree(async);
2893}
2894
2895static void wait_on_flush(struct btrfs_space_info *info)
2896{
2897 DEFINE_WAIT(wait);
2898 u64 used;
2899
2900 while (1) {
2901 prepare_to_wait(&info->flush_wait, &wait,
2902 TASK_UNINTERRUPTIBLE);
2903 spin_lock(&info->lock);
2904 if (!info->flushing) {
2905 spin_unlock(&info->lock);
2906 break;
2907 }
2908
2909 used = info->bytes_used + info->bytes_reserved +
2910 info->bytes_pinned + info->bytes_readonly +
2911 info->bytes_super + info->bytes_root +
2912 info->bytes_may_use + info->bytes_delalloc;
2913 if (used < info->total_bytes) {
2914 spin_unlock(&info->lock);
2915 break;
2916 }
2917 spin_unlock(&info->lock);
2918 schedule();
2919 }
2920 finish_wait(&info->flush_wait, &wait);
2921}
2922
2923static void flush_delalloc(struct btrfs_root *root,
2924 struct btrfs_space_info *info)
2925{
2926 struct async_flush *async;
2927 bool wait = false;
2928
2929 spin_lock(&info->lock);
2930
2931 if (!info->flushing) {
2932 info->flushing = 1;
2933 init_waitqueue_head(&info->flush_wait);
2934 } else {
2935 wait = true;
2936 }
2937
2938 spin_unlock(&info->lock);
2939
2940 if (wait) {
2941 wait_on_flush(info);
2942 return;
2943 }
2944
2945 async = kzalloc(sizeof(*async), GFP_NOFS);
2946 if (!async)
2947 goto flush;
2948
2949 async->root = root;
2950 async->info = info;
2951 async->work.func = flush_delalloc_async;
2952
2953 btrfs_queue_worker(&root->fs_info->enospc_workers,
2954 &async->work);
2955 wait_on_flush(info);
2956 return;
2957
2958flush:
2959 btrfs_start_delalloc_inodes(root);
2960 btrfs_wait_ordered_extents(root, 0);
2961
2962 spin_lock(&info->lock);
2963 info->flushing = 0;
2964 spin_unlock(&info->lock);
2965 wake_up(&info->flush_wait);
2966}
2967
2867static int maybe_allocate_chunk(struct btrfs_root *root, 2968static int maybe_allocate_chunk(struct btrfs_root *root,
2868 struct btrfs_space_info *info) 2969 struct btrfs_space_info *info)
2869{ 2970{
@@ -2876,10 +2977,10 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
2876 2977
2877 free_space = btrfs_super_total_bytes(disk_super); 2978 free_space = btrfs_super_total_bytes(disk_super);
2878 /* 2979 /*
2879 * we allow the metadata to grow to a max of either 5gb or 5% of the 2980 * we allow the metadata to grow to a max of either 10gb or 5% of the
2880 * space in the volume. 2981 * space in the volume.
2881 */ 2982 */
2882 min_metadata = min((u64)5 * 1024 * 1024 * 1024, 2983 min_metadata = min((u64)10 * 1024 * 1024 * 1024,
2883 div64_u64(free_space * 5, 100)); 2984 div64_u64(free_space * 5, 100));
2884 if (info->total_bytes >= min_metadata) { 2985 if (info->total_bytes >= min_metadata) {
2885 spin_unlock(&info->lock); 2986 spin_unlock(&info->lock);
@@ -2894,7 +2995,7 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
2894 if (!info->allocating_chunk) { 2995 if (!info->allocating_chunk) {
2895 info->force_alloc = 1; 2996 info->force_alloc = 1;
2896 info->allocating_chunk = 1; 2997 info->allocating_chunk = 1;
2897 init_waitqueue_head(&info->wait); 2998 init_waitqueue_head(&info->allocate_wait);
2898 } else { 2999 } else {
2899 wait = true; 3000 wait = true;
2900 } 3001 }
@@ -2902,7 +3003,7 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
2902 spin_unlock(&info->lock); 3003 spin_unlock(&info->lock);
2903 3004
2904 if (wait) { 3005 if (wait) {
2905 wait_event(info->wait, 3006 wait_event(info->allocate_wait,
2906 !info->allocating_chunk); 3007 !info->allocating_chunk);
2907 return 1; 3008 return 1;
2908 } 3009 }
@@ -2923,7 +3024,7 @@ out:
2923 spin_lock(&info->lock); 3024 spin_lock(&info->lock);
2924 info->allocating_chunk = 0; 3025 info->allocating_chunk = 0;
2925 spin_unlock(&info->lock); 3026 spin_unlock(&info->lock);
2926 wake_up(&info->wait); 3027 wake_up(&info->allocate_wait);
2927 3028
2928 if (ret) 3029 if (ret)
2929 return 0; 3030 return 0;
@@ -2981,21 +3082,20 @@ again:
2981 filemap_flush(inode->i_mapping); 3082 filemap_flush(inode->i_mapping);
2982 goto again; 3083 goto again;
2983 } else if (flushed == 3) { 3084 } else if (flushed == 3) {
2984 btrfs_start_delalloc_inodes(root); 3085 flush_delalloc(root, meta_sinfo);
2985 btrfs_wait_ordered_extents(root, 0);
2986 goto again; 3086 goto again;
2987 } 3087 }
2988 spin_lock(&meta_sinfo->lock); 3088 spin_lock(&meta_sinfo->lock);
2989 meta_sinfo->bytes_delalloc -= num_bytes; 3089 meta_sinfo->bytes_delalloc -= num_bytes;
2990 spin_unlock(&meta_sinfo->lock); 3090 spin_unlock(&meta_sinfo->lock);
2991 printk(KERN_ERR "enospc, has %d, reserved %d\n", 3091 printk(KERN_ERR "enospc, has %d, reserved %d\n",
2992 BTRFS_I(inode)->delalloc_extents, 3092 BTRFS_I(inode)->outstanding_extents,
2993 BTRFS_I(inode)->delalloc_reserved_extents); 3093 BTRFS_I(inode)->reserved_extents);
2994 dump_space_info(meta_sinfo, 0, 0); 3094 dump_space_info(meta_sinfo, 0, 0);
2995 return -ENOSPC; 3095 return -ENOSPC;
2996 } 3096 }
2997 3097
2998 BTRFS_I(inode)->delalloc_reserved_extents++; 3098 BTRFS_I(inode)->reserved_extents++;
2999 check_force_delalloc(meta_sinfo); 3099 check_force_delalloc(meta_sinfo);
3000 spin_unlock(&meta_sinfo->lock); 3100 spin_unlock(&meta_sinfo->lock);
3001 3101
@@ -3094,8 +3194,7 @@ again:
3094 } 3194 }
3095 3195
3096 if (retries == 2) { 3196 if (retries == 2) {
3097 btrfs_start_delalloc_inodes(root); 3197 flush_delalloc(root, meta_sinfo);
3098 btrfs_wait_ordered_extents(root, 0);
3099 goto again; 3198 goto again;
3100 } 3199 }
3101 spin_lock(&meta_sinfo->lock); 3200 spin_lock(&meta_sinfo->lock);
@@ -3588,6 +3687,14 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
3588 if (is_data) 3687 if (is_data)
3589 goto pinit; 3688 goto pinit;
3590 3689
3690 /*
3691 * discard is sloooow, and so triggering discards on
3692 * individual btree blocks isn't a good plan. Just
3693 * pin everything in discard mode.
3694 */
3695 if (btrfs_test_opt(root, DISCARD))
3696 goto pinit;
3697
3591 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 3698 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
3592 if (!buf) 3699 if (!buf)
3593 goto pinit; 3700 goto pinit;
@@ -3995,7 +4102,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
3995} 4102}
3996 4103
3997enum btrfs_loop_type { 4104enum btrfs_loop_type {
3998 LOOP_CACHED_ONLY = 0, 4105 LOOP_FIND_IDEAL = 0,
3999 LOOP_CACHING_NOWAIT = 1, 4106 LOOP_CACHING_NOWAIT = 1,
4000 LOOP_CACHING_WAIT = 2, 4107 LOOP_CACHING_WAIT = 2,
4001 LOOP_ALLOC_CHUNK = 3, 4108 LOOP_ALLOC_CHUNK = 3,
@@ -4024,11 +4131,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4024 struct btrfs_block_group_cache *block_group = NULL; 4131 struct btrfs_block_group_cache *block_group = NULL;
4025 int empty_cluster = 2 * 1024 * 1024; 4132 int empty_cluster = 2 * 1024 * 1024;
4026 int allowed_chunk_alloc = 0; 4133 int allowed_chunk_alloc = 0;
4134 int done_chunk_alloc = 0;
4027 struct btrfs_space_info *space_info; 4135 struct btrfs_space_info *space_info;
4028 int last_ptr_loop = 0; 4136 int last_ptr_loop = 0;
4029 int loop = 0; 4137 int loop = 0;
4030 bool found_uncached_bg = false; 4138 bool found_uncached_bg = false;
4031 bool failed_cluster_refill = false; 4139 bool failed_cluster_refill = false;
4140 bool failed_alloc = false;
4141 u64 ideal_cache_percent = 0;
4142 u64 ideal_cache_offset = 0;
4032 4143
4033 WARN_ON(num_bytes < root->sectorsize); 4144 WARN_ON(num_bytes < root->sectorsize);
4034 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4145 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -4064,14 +4175,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4064 empty_cluster = 0; 4175 empty_cluster = 0;
4065 4176
4066 if (search_start == hint_byte) { 4177 if (search_start == hint_byte) {
4178ideal_cache:
4067 block_group = btrfs_lookup_block_group(root->fs_info, 4179 block_group = btrfs_lookup_block_group(root->fs_info,
4068 search_start); 4180 search_start);
4069 /* 4181 /*
4070 * we don't want to use the block group if it doesn't match our 4182 * we don't want to use the block group if it doesn't match our
4071 * allocation bits, or if its not cached. 4183 * allocation bits, or if its not cached.
4184 *
4185 * However if we are re-searching with an ideal block group
4186 * picked out then we don't care that the block group is cached.
4072 */ 4187 */
4073 if (block_group && block_group_bits(block_group, data) && 4188 if (block_group && block_group_bits(block_group, data) &&
4074 block_group_cache_done(block_group)) { 4189 (block_group->cached != BTRFS_CACHE_NO ||
4190 search_start == ideal_cache_offset)) {
4075 down_read(&space_info->groups_sem); 4191 down_read(&space_info->groups_sem);
4076 if (list_empty(&block_group->list) || 4192 if (list_empty(&block_group->list) ||
4077 block_group->ro) { 4193 block_group->ro) {
@@ -4083,13 +4199,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4083 */ 4199 */
4084 btrfs_put_block_group(block_group); 4200 btrfs_put_block_group(block_group);
4085 up_read(&space_info->groups_sem); 4201 up_read(&space_info->groups_sem);
4086 } else 4202 } else {
4087 goto have_block_group; 4203 goto have_block_group;
4204 }
4088 } else if (block_group) { 4205 } else if (block_group) {
4089 btrfs_put_block_group(block_group); 4206 btrfs_put_block_group(block_group);
4090 } 4207 }
4091 } 4208 }
4092
4093search: 4209search:
4094 down_read(&space_info->groups_sem); 4210 down_read(&space_info->groups_sem);
4095 list_for_each_entry(block_group, &space_info->block_groups, list) { 4211 list_for_each_entry(block_group, &space_info->block_groups, list) {
@@ -4101,28 +4217,45 @@ search:
4101 4217
4102have_block_group: 4218have_block_group:
4103 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4219 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4220 u64 free_percent;
4221
4222 free_percent = btrfs_block_group_used(&block_group->item);
4223 free_percent *= 100;
4224 free_percent = div64_u64(free_percent,
4225 block_group->key.offset);
4226 free_percent = 100 - free_percent;
4227 if (free_percent > ideal_cache_percent &&
4228 likely(!block_group->ro)) {
4229 ideal_cache_offset = block_group->key.objectid;
4230 ideal_cache_percent = free_percent;
4231 }
4232
4104 /* 4233 /*
4105 * we want to start caching kthreads, but not too many 4234 * We only want to start kthread caching if we are at
4106 * right off the bat so we don't overwhelm the system, 4235 * the point where we will wait for caching to make
4107 * so only start them if there are less than 2 and we're 4236 * progress, or if our ideal search is over and we've
4108 * in the initial allocation phase. 4237 * found somebody to start caching.
4109 */ 4238 */
4110 if (loop > LOOP_CACHING_NOWAIT || 4239 if (loop > LOOP_CACHING_NOWAIT ||
4111 atomic_read(&space_info->caching_threads) < 2) { 4240 (loop > LOOP_FIND_IDEAL &&
4241 atomic_read(&space_info->caching_threads) < 2)) {
4112 ret = cache_block_group(block_group); 4242 ret = cache_block_group(block_group);
4113 BUG_ON(ret); 4243 BUG_ON(ret);
4114 } 4244 }
4115 }
4116
4117 cached = block_group_cache_done(block_group);
4118 if (unlikely(!cached)) {
4119 found_uncached_bg = true; 4245 found_uncached_bg = true;
4120 4246
4121 /* if we only want cached bgs, loop */ 4247 /*
4122 if (loop == LOOP_CACHED_ONLY) 4248 * If loop is set for cached only, try the next block
4249 * group.
4250 */
4251 if (loop == LOOP_FIND_IDEAL)
4123 goto loop; 4252 goto loop;
4124 } 4253 }
4125 4254
4255 cached = block_group_cache_done(block_group);
4256 if (unlikely(!cached))
4257 found_uncached_bg = true;
4258
4126 if (unlikely(block_group->ro)) 4259 if (unlikely(block_group->ro))
4127 goto loop; 4260 goto loop;
4128 4261
@@ -4233,14 +4366,23 @@ refill_cluster:
4233 4366
4234 offset = btrfs_find_space_for_alloc(block_group, search_start, 4367 offset = btrfs_find_space_for_alloc(block_group, search_start,
4235 num_bytes, empty_size); 4368 num_bytes, empty_size);
4236 if (!offset && (cached || (!cached && 4369 /*
4237 loop == LOOP_CACHING_NOWAIT))) { 4370 * If we didn't find a chunk, and we haven't failed on this
4238 goto loop; 4371 * block group before, and this block group is in the middle of
4239 } else if (!offset && (!cached && 4372 * caching and we are ok with waiting, then go ahead and wait
4240 loop > LOOP_CACHING_NOWAIT)) { 4373 * for progress to be made, and set failed_alloc to true.
4374 *
4375 * If failed_alloc is true then we've already waited on this
4376 * block group once and should move on to the next block group.
4377 */
4378 if (!offset && !failed_alloc && !cached &&
4379 loop > LOOP_CACHING_NOWAIT) {
4241 wait_block_group_cache_progress(block_group, 4380 wait_block_group_cache_progress(block_group,
4242 num_bytes + empty_size); 4381 num_bytes + empty_size);
4382 failed_alloc = true;
4243 goto have_block_group; 4383 goto have_block_group;
4384 } else if (!offset) {
4385 goto loop;
4244 } 4386 }
4245checks: 4387checks:
4246 search_start = stripe_align(root, offset); 4388 search_start = stripe_align(root, offset);
@@ -4288,13 +4430,16 @@ checks:
4288 break; 4430 break;
4289loop: 4431loop:
4290 failed_cluster_refill = false; 4432 failed_cluster_refill = false;
4433 failed_alloc = false;
4291 btrfs_put_block_group(block_group); 4434 btrfs_put_block_group(block_group);
4292 } 4435 }
4293 up_read(&space_info->groups_sem); 4436 up_read(&space_info->groups_sem);
4294 4437
4295 /* LOOP_CACHED_ONLY, only search fully cached block groups 4438 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
4296 * LOOP_CACHING_NOWAIT, search partially cached block groups, but 4439 * for them to make caching progress. Also
4297 * dont wait foR them to finish caching 4440 * determine the best possible bg to cache
4441 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
4442 * caching kthreads as we move along
4298 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 4443 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
4299 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 4444 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
4300 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 4445 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
@@ -4303,12 +4448,47 @@ loop:
4303 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 4448 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
4304 (found_uncached_bg || empty_size || empty_cluster || 4449 (found_uncached_bg || empty_size || empty_cluster ||
4305 allowed_chunk_alloc)) { 4450 allowed_chunk_alloc)) {
4306 if (found_uncached_bg) { 4451 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
4307 found_uncached_bg = false; 4452 found_uncached_bg = false;
4308 if (loop < LOOP_CACHING_WAIT) { 4453 loop++;
4309 loop++; 4454 if (!ideal_cache_percent &&
4455 atomic_read(&space_info->caching_threads))
4310 goto search; 4456 goto search;
4311 } 4457
4458 /*
4459 * 1 of the following 2 things have happened so far
4460 *
4461 * 1) We found an ideal block group for caching that
4462 * is mostly full and will cache quickly, so we might
4463 * as well wait for it.
4464 *
4465 * 2) We searched for cached only and we didn't find
4466 * anything, and we didn't start any caching kthreads
4467 * either, so chances are we will loop through and
4468 * start a couple caching kthreads, and then come back
4469 * around and just wait for them. This will be slower
4470 * because we will have 2 caching kthreads reading at
4471 * the same time when we could have just started one
4472 * and waited for it to get far enough to give us an
4473 * allocation, so go ahead and go to the wait caching
4474 * loop.
4475 */
4476 loop = LOOP_CACHING_WAIT;
4477 search_start = ideal_cache_offset;
4478 ideal_cache_percent = 0;
4479 goto ideal_cache;
4480 } else if (loop == LOOP_FIND_IDEAL) {
4481 /*
4482 * Didn't find a uncached bg, wait on anything we find
4483 * next.
4484 */
4485 loop = LOOP_CACHING_WAIT;
4486 goto search;
4487 }
4488
4489 if (loop < LOOP_CACHING_WAIT) {
4490 loop++;
4491 goto search;
4312 } 4492 }
4313 4493
4314 if (loop == LOOP_ALLOC_CHUNK) { 4494 if (loop == LOOP_ALLOC_CHUNK) {
@@ -4320,7 +4500,8 @@ loop:
4320 ret = do_chunk_alloc(trans, root, num_bytes + 4500 ret = do_chunk_alloc(trans, root, num_bytes +
4321 2 * 1024 * 1024, data, 1); 4501 2 * 1024 * 1024, data, 1);
4322 allowed_chunk_alloc = 0; 4502 allowed_chunk_alloc = 0;
4323 } else { 4503 done_chunk_alloc = 1;
4504 } else if (!done_chunk_alloc) {
4324 space_info->force_alloc = 1; 4505 space_info->force_alloc = 1;
4325 } 4506 }
4326 4507
@@ -4799,6 +4980,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4799 u64 bytenr; 4980 u64 bytenr;
4800 u64 generation; 4981 u64 generation;
4801 u64 refs; 4982 u64 refs;
4983 u64 flags;
4802 u64 last = 0; 4984 u64 last = 0;
4803 u32 nritems; 4985 u32 nritems;
4804 u32 blocksize; 4986 u32 blocksize;
@@ -4836,15 +5018,19 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4836 generation <= root->root_key.offset) 5018 generation <= root->root_key.offset)
4837 continue; 5019 continue;
4838 5020
5021 /* We don't lock the tree block, it's OK to be racy here */
5022 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
5023 &refs, &flags);
5024 BUG_ON(ret);
5025 BUG_ON(refs == 0);
5026
4839 if (wc->stage == DROP_REFERENCE) { 5027 if (wc->stage == DROP_REFERENCE) {
4840 ret = btrfs_lookup_extent_info(trans, root,
4841 bytenr, blocksize,
4842 &refs, NULL);
4843 BUG_ON(ret);
4844 BUG_ON(refs == 0);
4845 if (refs == 1) 5028 if (refs == 1)
4846 goto reada; 5029 goto reada;
4847 5030
5031 if (wc->level == 1 &&
5032 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5033 continue;
4848 if (!wc->update_ref || 5034 if (!wc->update_ref ||
4849 generation <= root->root_key.offset) 5035 generation <= root->root_key.offset)
4850 continue; 5036 continue;
@@ -4853,6 +5039,10 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4853 &wc->update_progress); 5039 &wc->update_progress);
4854 if (ret < 0) 5040 if (ret < 0)
4855 continue; 5041 continue;
5042 } else {
5043 if (wc->level == 1 &&
5044 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5045 continue;
4856 } 5046 }
4857reada: 5047reada:
4858 ret = readahead_tree_block(root, bytenr, blocksize, 5048 ret = readahead_tree_block(root, bytenr, blocksize,
@@ -4876,7 +5066,7 @@ reada:
4876static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 5066static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4877 struct btrfs_root *root, 5067 struct btrfs_root *root,
4878 struct btrfs_path *path, 5068 struct btrfs_path *path,
4879 struct walk_control *wc) 5069 struct walk_control *wc, int lookup_info)
4880{ 5070{
4881 int level = wc->level; 5071 int level = wc->level;
4882 struct extent_buffer *eb = path->nodes[level]; 5072 struct extent_buffer *eb = path->nodes[level];
@@ -4891,8 +5081,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4891 * when reference count of tree block is 1, it won't increase 5081 * when reference count of tree block is 1, it won't increase
4892 * again. once full backref flag is set, we never clear it. 5082 * again. once full backref flag is set, we never clear it.
4893 */ 5083 */
4894 if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 5084 if (lookup_info &&
4895 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { 5085 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
5086 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
4896 BUG_ON(!path->locks[level]); 5087 BUG_ON(!path->locks[level]);
4897 ret = btrfs_lookup_extent_info(trans, root, 5088 ret = btrfs_lookup_extent_info(trans, root,
4898 eb->start, eb->len, 5089 eb->start, eb->len,
@@ -4953,7 +5144,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4953static noinline int do_walk_down(struct btrfs_trans_handle *trans, 5144static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4954 struct btrfs_root *root, 5145 struct btrfs_root *root,
4955 struct btrfs_path *path, 5146 struct btrfs_path *path,
4956 struct walk_control *wc) 5147 struct walk_control *wc, int *lookup_info)
4957{ 5148{
4958 u64 bytenr; 5149 u64 bytenr;
4959 u64 generation; 5150 u64 generation;
@@ -4973,8 +5164,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4973 * for the subtree 5164 * for the subtree
4974 */ 5165 */
4975 if (wc->stage == UPDATE_BACKREF && 5166 if (wc->stage == UPDATE_BACKREF &&
4976 generation <= root->root_key.offset) 5167 generation <= root->root_key.offset) {
5168 *lookup_info = 1;
4977 return 1; 5169 return 1;
5170 }
4978 5171
4979 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 5172 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
4980 blocksize = btrfs_level_size(root, level - 1); 5173 blocksize = btrfs_level_size(root, level - 1);
@@ -4987,14 +5180,19 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4987 btrfs_tree_lock(next); 5180 btrfs_tree_lock(next);
4988 btrfs_set_lock_blocking(next); 5181 btrfs_set_lock_blocking(next);
4989 5182
4990 if (wc->stage == DROP_REFERENCE) { 5183 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
4991 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5184 &wc->refs[level - 1],
4992 &wc->refs[level - 1], 5185 &wc->flags[level - 1]);
4993 &wc->flags[level - 1]); 5186 BUG_ON(ret);
4994 BUG_ON(ret); 5187 BUG_ON(wc->refs[level - 1] == 0);
4995 BUG_ON(wc->refs[level - 1] == 0); 5188 *lookup_info = 0;
4996 5189
5190 if (wc->stage == DROP_REFERENCE) {
4997 if (wc->refs[level - 1] > 1) { 5191 if (wc->refs[level - 1] > 1) {
5192 if (level == 1 &&
5193 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5194 goto skip;
5195
4998 if (!wc->update_ref || 5196 if (!wc->update_ref ||
4999 generation <= root->root_key.offset) 5197 generation <= root->root_key.offset)
5000 goto skip; 5198 goto skip;
@@ -5008,12 +5206,17 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5008 wc->stage = UPDATE_BACKREF; 5206 wc->stage = UPDATE_BACKREF;
5009 wc->shared_level = level - 1; 5207 wc->shared_level = level - 1;
5010 } 5208 }
5209 } else {
5210 if (level == 1 &&
5211 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5212 goto skip;
5011 } 5213 }
5012 5214
5013 if (!btrfs_buffer_uptodate(next, generation)) { 5215 if (!btrfs_buffer_uptodate(next, generation)) {
5014 btrfs_tree_unlock(next); 5216 btrfs_tree_unlock(next);
5015 free_extent_buffer(next); 5217 free_extent_buffer(next);
5016 next = NULL; 5218 next = NULL;
5219 *lookup_info = 1;
5017 } 5220 }
5018 5221
5019 if (!next) { 5222 if (!next) {
@@ -5036,21 +5239,22 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5036skip: 5239skip:
5037 wc->refs[level - 1] = 0; 5240 wc->refs[level - 1] = 0;
5038 wc->flags[level - 1] = 0; 5241 wc->flags[level - 1] = 0;
5242 if (wc->stage == DROP_REFERENCE) {
5243 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5244 parent = path->nodes[level]->start;
5245 } else {
5246 BUG_ON(root->root_key.objectid !=
5247 btrfs_header_owner(path->nodes[level]));
5248 parent = 0;
5249 }
5039 5250
5040 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 5251 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
5041 parent = path->nodes[level]->start; 5252 root->root_key.objectid, level - 1, 0);
5042 } else { 5253 BUG_ON(ret);
5043 BUG_ON(root->root_key.objectid !=
5044 btrfs_header_owner(path->nodes[level]));
5045 parent = 0;
5046 } 5254 }
5047
5048 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
5049 root->root_key.objectid, level - 1, 0);
5050 BUG_ON(ret);
5051
5052 btrfs_tree_unlock(next); 5255 btrfs_tree_unlock(next);
5053 free_extent_buffer(next); 5256 free_extent_buffer(next);
5257 *lookup_info = 1;
5054 return 1; 5258 return 1;
5055} 5259}
5056 5260
@@ -5164,6 +5368,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5164 struct walk_control *wc) 5368 struct walk_control *wc)
5165{ 5369{
5166 int level = wc->level; 5370 int level = wc->level;
5371 int lookup_info = 1;
5167 int ret; 5372 int ret;
5168 5373
5169 while (level >= 0) { 5374 while (level >= 0) {
@@ -5171,14 +5376,14 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5171 btrfs_header_nritems(path->nodes[level])) 5376 btrfs_header_nritems(path->nodes[level]))
5172 break; 5377 break;
5173 5378
5174 ret = walk_down_proc(trans, root, path, wc); 5379 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5175 if (ret > 0) 5380 if (ret > 0)
5176 break; 5381 break;
5177 5382
5178 if (level == 0) 5383 if (level == 0)
5179 break; 5384 break;
5180 5385
5181 ret = do_walk_down(trans, root, path, wc); 5386 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5182 if (ret > 0) { 5387 if (ret > 0) {
5183 path->slots[level]++; 5388 path->slots[level]++;
5184 continue; 5389 continue;