diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 365 |
1 files changed, 285 insertions, 80 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 359a754c782c..94627c4cc193 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1568,23 +1568,23 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1568 | return ret; | 1568 | return ret; |
1569 | } | 1569 | } |
1570 | 1570 | ||
1571 | #ifdef BIO_RW_DISCARD | ||
1572 | static void btrfs_issue_discard(struct block_device *bdev, | 1571 | static void btrfs_issue_discard(struct block_device *bdev, |
1573 | u64 start, u64 len) | 1572 | u64 start, u64 len) |
1574 | { | 1573 | { |
1575 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, | 1574 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, |
1576 | DISCARD_FL_BARRIER); | 1575 | DISCARD_FL_BARRIER); |
1577 | } | 1576 | } |
1578 | #endif | ||
1579 | 1577 | ||
1580 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1578 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
1581 | u64 num_bytes) | 1579 | u64 num_bytes) |
1582 | { | 1580 | { |
1583 | #ifdef BIO_RW_DISCARD | ||
1584 | int ret; | 1581 | int ret; |
1585 | u64 map_length = num_bytes; | 1582 | u64 map_length = num_bytes; |
1586 | struct btrfs_multi_bio *multi = NULL; | 1583 | struct btrfs_multi_bio *multi = NULL; |
1587 | 1584 | ||
1585 | if (!btrfs_test_opt(root, DISCARD)) | ||
1586 | return 0; | ||
1587 | |||
1588 | /* Tell the block device(s) that the sectors can be discarded */ | 1588 | /* Tell the block device(s) that the sectors can be discarded */ |
1589 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, | 1589 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, |
1590 | bytenr, &map_length, &multi, 0); | 1590 | bytenr, &map_length, &multi, 0); |
@@ -1604,9 +1604,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1604 | } | 1604 | } |
1605 | 1605 | ||
1606 | return ret; | 1606 | return ret; |
1607 | #else | ||
1608 | return 0; | ||
1609 | #endif | ||
1610 | } | 1607 | } |
1611 | 1608 | ||
1612 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 1609 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
@@ -2824,14 +2821,17 @@ int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | |||
2824 | num_items); | 2821 | num_items); |
2825 | 2822 | ||
2826 | spin_lock(&meta_sinfo->lock); | 2823 | spin_lock(&meta_sinfo->lock); |
2827 | if (BTRFS_I(inode)->delalloc_reserved_extents <= | 2824 | spin_lock(&BTRFS_I(inode)->accounting_lock); |
2828 | BTRFS_I(inode)->delalloc_extents) { | 2825 | if (BTRFS_I(inode)->reserved_extents <= |
2826 | BTRFS_I(inode)->outstanding_extents) { | ||
2827 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
2829 | spin_unlock(&meta_sinfo->lock); | 2828 | spin_unlock(&meta_sinfo->lock); |
2830 | return 0; | 2829 | return 0; |
2831 | } | 2830 | } |
2831 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
2832 | 2832 | ||
2833 | BTRFS_I(inode)->delalloc_reserved_extents--; | 2833 | BTRFS_I(inode)->reserved_extents--; |
2834 | BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0); | 2834 | BUG_ON(BTRFS_I(inode)->reserved_extents < 0); |
2835 | 2835 | ||
2836 | if (meta_sinfo->bytes_delalloc < num_bytes) { | 2836 | if (meta_sinfo->bytes_delalloc < num_bytes) { |
2837 | bug = true; | 2837 | bug = true; |
@@ -2864,6 +2864,107 @@ static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | |||
2864 | meta_sinfo->force_delalloc = 0; | 2864 | meta_sinfo->force_delalloc = 0; |
2865 | } | 2865 | } |
2866 | 2866 | ||
2867 | struct async_flush { | ||
2868 | struct btrfs_root *root; | ||
2869 | struct btrfs_space_info *info; | ||
2870 | struct btrfs_work work; | ||
2871 | }; | ||
2872 | |||
2873 | static noinline void flush_delalloc_async(struct btrfs_work *work) | ||
2874 | { | ||
2875 | struct async_flush *async; | ||
2876 | struct btrfs_root *root; | ||
2877 | struct btrfs_space_info *info; | ||
2878 | |||
2879 | async = container_of(work, struct async_flush, work); | ||
2880 | root = async->root; | ||
2881 | info = async->info; | ||
2882 | |||
2883 | btrfs_start_delalloc_inodes(root); | ||
2884 | wake_up(&info->flush_wait); | ||
2885 | btrfs_wait_ordered_extents(root, 0); | ||
2886 | |||
2887 | spin_lock(&info->lock); | ||
2888 | info->flushing = 0; | ||
2889 | spin_unlock(&info->lock); | ||
2890 | wake_up(&info->flush_wait); | ||
2891 | |||
2892 | kfree(async); | ||
2893 | } | ||
2894 | |||
2895 | static void wait_on_flush(struct btrfs_space_info *info) | ||
2896 | { | ||
2897 | DEFINE_WAIT(wait); | ||
2898 | u64 used; | ||
2899 | |||
2900 | while (1) { | ||
2901 | prepare_to_wait(&info->flush_wait, &wait, | ||
2902 | TASK_UNINTERRUPTIBLE); | ||
2903 | spin_lock(&info->lock); | ||
2904 | if (!info->flushing) { | ||
2905 | spin_unlock(&info->lock); | ||
2906 | break; | ||
2907 | } | ||
2908 | |||
2909 | used = info->bytes_used + info->bytes_reserved + | ||
2910 | info->bytes_pinned + info->bytes_readonly + | ||
2911 | info->bytes_super + info->bytes_root + | ||
2912 | info->bytes_may_use + info->bytes_delalloc; | ||
2913 | if (used < info->total_bytes) { | ||
2914 | spin_unlock(&info->lock); | ||
2915 | break; | ||
2916 | } | ||
2917 | spin_unlock(&info->lock); | ||
2918 | schedule(); | ||
2919 | } | ||
2920 | finish_wait(&info->flush_wait, &wait); | ||
2921 | } | ||
2922 | |||
2923 | static void flush_delalloc(struct btrfs_root *root, | ||
2924 | struct btrfs_space_info *info) | ||
2925 | { | ||
2926 | struct async_flush *async; | ||
2927 | bool wait = false; | ||
2928 | |||
2929 | spin_lock(&info->lock); | ||
2930 | |||
2931 | if (!info->flushing) { | ||
2932 | info->flushing = 1; | ||
2933 | init_waitqueue_head(&info->flush_wait); | ||
2934 | } else { | ||
2935 | wait = true; | ||
2936 | } | ||
2937 | |||
2938 | spin_unlock(&info->lock); | ||
2939 | |||
2940 | if (wait) { | ||
2941 | wait_on_flush(info); | ||
2942 | return; | ||
2943 | } | ||
2944 | |||
2945 | async = kzalloc(sizeof(*async), GFP_NOFS); | ||
2946 | if (!async) | ||
2947 | goto flush; | ||
2948 | |||
2949 | async->root = root; | ||
2950 | async->info = info; | ||
2951 | async->work.func = flush_delalloc_async; | ||
2952 | |||
2953 | btrfs_queue_worker(&root->fs_info->enospc_workers, | ||
2954 | &async->work); | ||
2955 | wait_on_flush(info); | ||
2956 | return; | ||
2957 | |||
2958 | flush: | ||
2959 | btrfs_start_delalloc_inodes(root); | ||
2960 | btrfs_wait_ordered_extents(root, 0); | ||
2961 | |||
2962 | spin_lock(&info->lock); | ||
2963 | info->flushing = 0; | ||
2964 | spin_unlock(&info->lock); | ||
2965 | wake_up(&info->flush_wait); | ||
2966 | } | ||
2967 | |||
2867 | static int maybe_allocate_chunk(struct btrfs_root *root, | 2968 | static int maybe_allocate_chunk(struct btrfs_root *root, |
2868 | struct btrfs_space_info *info) | 2969 | struct btrfs_space_info *info) |
2869 | { | 2970 | { |
@@ -2876,10 +2977,10 @@ static int maybe_allocate_chunk(struct btrfs_root *root, | |||
2876 | 2977 | ||
2877 | free_space = btrfs_super_total_bytes(disk_super); | 2978 | free_space = btrfs_super_total_bytes(disk_super); |
2878 | /* | 2979 | /* |
2879 | * we allow the metadata to grow to a max of either 5gb or 5% of the | 2980 | * we allow the metadata to grow to a max of either 10gb or 5% of the |
2880 | * space in the volume. | 2981 | * space in the volume. |
2881 | */ | 2982 | */ |
2882 | min_metadata = min((u64)5 * 1024 * 1024 * 1024, | 2983 | min_metadata = min((u64)10 * 1024 * 1024 * 1024, |
2883 | div64_u64(free_space * 5, 100)); | 2984 | div64_u64(free_space * 5, 100)); |
2884 | if (info->total_bytes >= min_metadata) { | 2985 | if (info->total_bytes >= min_metadata) { |
2885 | spin_unlock(&info->lock); | 2986 | spin_unlock(&info->lock); |
@@ -2894,7 +2995,7 @@ static int maybe_allocate_chunk(struct btrfs_root *root, | |||
2894 | if (!info->allocating_chunk) { | 2995 | if (!info->allocating_chunk) { |
2895 | info->force_alloc = 1; | 2996 | info->force_alloc = 1; |
2896 | info->allocating_chunk = 1; | 2997 | info->allocating_chunk = 1; |
2897 | init_waitqueue_head(&info->wait); | 2998 | init_waitqueue_head(&info->allocate_wait); |
2898 | } else { | 2999 | } else { |
2899 | wait = true; | 3000 | wait = true; |
2900 | } | 3001 | } |
@@ -2902,7 +3003,7 @@ static int maybe_allocate_chunk(struct btrfs_root *root, | |||
2902 | spin_unlock(&info->lock); | 3003 | spin_unlock(&info->lock); |
2903 | 3004 | ||
2904 | if (wait) { | 3005 | if (wait) { |
2905 | wait_event(info->wait, | 3006 | wait_event(info->allocate_wait, |
2906 | !info->allocating_chunk); | 3007 | !info->allocating_chunk); |
2907 | return 1; | 3008 | return 1; |
2908 | } | 3009 | } |
@@ -2923,7 +3024,7 @@ out: | |||
2923 | spin_lock(&info->lock); | 3024 | spin_lock(&info->lock); |
2924 | info->allocating_chunk = 0; | 3025 | info->allocating_chunk = 0; |
2925 | spin_unlock(&info->lock); | 3026 | spin_unlock(&info->lock); |
2926 | wake_up(&info->wait); | 3027 | wake_up(&info->allocate_wait); |
2927 | 3028 | ||
2928 | if (ret) | 3029 | if (ret) |
2929 | return 0; | 3030 | return 0; |
@@ -2981,21 +3082,20 @@ again: | |||
2981 | filemap_flush(inode->i_mapping); | 3082 | filemap_flush(inode->i_mapping); |
2982 | goto again; | 3083 | goto again; |
2983 | } else if (flushed == 3) { | 3084 | } else if (flushed == 3) { |
2984 | btrfs_start_delalloc_inodes(root); | 3085 | flush_delalloc(root, meta_sinfo); |
2985 | btrfs_wait_ordered_extents(root, 0); | ||
2986 | goto again; | 3086 | goto again; |
2987 | } | 3087 | } |
2988 | spin_lock(&meta_sinfo->lock); | 3088 | spin_lock(&meta_sinfo->lock); |
2989 | meta_sinfo->bytes_delalloc -= num_bytes; | 3089 | meta_sinfo->bytes_delalloc -= num_bytes; |
2990 | spin_unlock(&meta_sinfo->lock); | 3090 | spin_unlock(&meta_sinfo->lock); |
2991 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | 3091 | printk(KERN_ERR "enospc, has %d, reserved %d\n", |
2992 | BTRFS_I(inode)->delalloc_extents, | 3092 | BTRFS_I(inode)->outstanding_extents, |
2993 | BTRFS_I(inode)->delalloc_reserved_extents); | 3093 | BTRFS_I(inode)->reserved_extents); |
2994 | dump_space_info(meta_sinfo, 0, 0); | 3094 | dump_space_info(meta_sinfo, 0, 0); |
2995 | return -ENOSPC; | 3095 | return -ENOSPC; |
2996 | } | 3096 | } |
2997 | 3097 | ||
2998 | BTRFS_I(inode)->delalloc_reserved_extents++; | 3098 | BTRFS_I(inode)->reserved_extents++; |
2999 | check_force_delalloc(meta_sinfo); | 3099 | check_force_delalloc(meta_sinfo); |
3000 | spin_unlock(&meta_sinfo->lock); | 3100 | spin_unlock(&meta_sinfo->lock); |
3001 | 3101 | ||
@@ -3094,8 +3194,7 @@ again: | |||
3094 | } | 3194 | } |
3095 | 3195 | ||
3096 | if (retries == 2) { | 3196 | if (retries == 2) { |
3097 | btrfs_start_delalloc_inodes(root); | 3197 | flush_delalloc(root, meta_sinfo); |
3098 | btrfs_wait_ordered_extents(root, 0); | ||
3099 | goto again; | 3198 | goto again; |
3100 | } | 3199 | } |
3101 | spin_lock(&meta_sinfo->lock); | 3200 | spin_lock(&meta_sinfo->lock); |
@@ -3588,6 +3687,14 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
3588 | if (is_data) | 3687 | if (is_data) |
3589 | goto pinit; | 3688 | goto pinit; |
3590 | 3689 | ||
3690 | /* | ||
3691 | * discard is sloooow, and so triggering discards on | ||
3692 | * individual btree blocks isn't a good plan. Just | ||
3693 | * pin everything in discard mode. | ||
3694 | */ | ||
3695 | if (btrfs_test_opt(root, DISCARD)) | ||
3696 | goto pinit; | ||
3697 | |||
3591 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); | 3698 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); |
3592 | if (!buf) | 3699 | if (!buf) |
3593 | goto pinit; | 3700 | goto pinit; |
@@ -3995,7 +4102,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
3995 | } | 4102 | } |
3996 | 4103 | ||
3997 | enum btrfs_loop_type { | 4104 | enum btrfs_loop_type { |
3998 | LOOP_CACHED_ONLY = 0, | 4105 | LOOP_FIND_IDEAL = 0, |
3999 | LOOP_CACHING_NOWAIT = 1, | 4106 | LOOP_CACHING_NOWAIT = 1, |
4000 | LOOP_CACHING_WAIT = 2, | 4107 | LOOP_CACHING_WAIT = 2, |
4001 | LOOP_ALLOC_CHUNK = 3, | 4108 | LOOP_ALLOC_CHUNK = 3, |
@@ -4024,11 +4131,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4024 | struct btrfs_block_group_cache *block_group = NULL; | 4131 | struct btrfs_block_group_cache *block_group = NULL; |
4025 | int empty_cluster = 2 * 1024 * 1024; | 4132 | int empty_cluster = 2 * 1024 * 1024; |
4026 | int allowed_chunk_alloc = 0; | 4133 | int allowed_chunk_alloc = 0; |
4134 | int done_chunk_alloc = 0; | ||
4027 | struct btrfs_space_info *space_info; | 4135 | struct btrfs_space_info *space_info; |
4028 | int last_ptr_loop = 0; | 4136 | int last_ptr_loop = 0; |
4029 | int loop = 0; | 4137 | int loop = 0; |
4030 | bool found_uncached_bg = false; | 4138 | bool found_uncached_bg = false; |
4031 | bool failed_cluster_refill = false; | 4139 | bool failed_cluster_refill = false; |
4140 | bool failed_alloc = false; | ||
4141 | u64 ideal_cache_percent = 0; | ||
4142 | u64 ideal_cache_offset = 0; | ||
4032 | 4143 | ||
4033 | WARN_ON(num_bytes < root->sectorsize); | 4144 | WARN_ON(num_bytes < root->sectorsize); |
4034 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 4145 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
@@ -4064,14 +4175,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4064 | empty_cluster = 0; | 4175 | empty_cluster = 0; |
4065 | 4176 | ||
4066 | if (search_start == hint_byte) { | 4177 | if (search_start == hint_byte) { |
4178 | ideal_cache: | ||
4067 | block_group = btrfs_lookup_block_group(root->fs_info, | 4179 | block_group = btrfs_lookup_block_group(root->fs_info, |
4068 | search_start); | 4180 | search_start); |
4069 | /* | 4181 | /* |
4070 | * we don't want to use the block group if it doesn't match our | 4182 | * we don't want to use the block group if it doesn't match our |
4071 | * allocation bits, or if its not cached. | 4183 | * allocation bits, or if its not cached. |
4184 | * | ||
4185 | * However if we are re-searching with an ideal block group | ||
4186 | * picked out then we don't care that the block group is cached. | ||
4072 | */ | 4187 | */ |
4073 | if (block_group && block_group_bits(block_group, data) && | 4188 | if (block_group && block_group_bits(block_group, data) && |
4074 | block_group_cache_done(block_group)) { | 4189 | (block_group->cached != BTRFS_CACHE_NO || |
4190 | search_start == ideal_cache_offset)) { | ||
4075 | down_read(&space_info->groups_sem); | 4191 | down_read(&space_info->groups_sem); |
4076 | if (list_empty(&block_group->list) || | 4192 | if (list_empty(&block_group->list) || |
4077 | block_group->ro) { | 4193 | block_group->ro) { |
@@ -4083,13 +4199,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4083 | */ | 4199 | */ |
4084 | btrfs_put_block_group(block_group); | 4200 | btrfs_put_block_group(block_group); |
4085 | up_read(&space_info->groups_sem); | 4201 | up_read(&space_info->groups_sem); |
4086 | } else | 4202 | } else { |
4087 | goto have_block_group; | 4203 | goto have_block_group; |
4204 | } | ||
4088 | } else if (block_group) { | 4205 | } else if (block_group) { |
4089 | btrfs_put_block_group(block_group); | 4206 | btrfs_put_block_group(block_group); |
4090 | } | 4207 | } |
4091 | } | 4208 | } |
4092 | |||
4093 | search: | 4209 | search: |
4094 | down_read(&space_info->groups_sem); | 4210 | down_read(&space_info->groups_sem); |
4095 | list_for_each_entry(block_group, &space_info->block_groups, list) { | 4211 | list_for_each_entry(block_group, &space_info->block_groups, list) { |
@@ -4101,28 +4217,45 @@ search: | |||
4101 | 4217 | ||
4102 | have_block_group: | 4218 | have_block_group: |
4103 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { | 4219 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
4220 | u64 free_percent; | ||
4221 | |||
4222 | free_percent = btrfs_block_group_used(&block_group->item); | ||
4223 | free_percent *= 100; | ||
4224 | free_percent = div64_u64(free_percent, | ||
4225 | block_group->key.offset); | ||
4226 | free_percent = 100 - free_percent; | ||
4227 | if (free_percent > ideal_cache_percent && | ||
4228 | likely(!block_group->ro)) { | ||
4229 | ideal_cache_offset = block_group->key.objectid; | ||
4230 | ideal_cache_percent = free_percent; | ||
4231 | } | ||
4232 | |||
4104 | /* | 4233 | /* |
4105 | * we want to start caching kthreads, but not too many | 4234 | * We only want to start kthread caching if we are at |
4106 | * right off the bat so we don't overwhelm the system, | 4235 | * the point where we will wait for caching to make |
4107 | * so only start them if there are less than 2 and we're | 4236 | * progress, or if our ideal search is over and we've |
4108 | * in the initial allocation phase. | 4237 | * found somebody to start caching. |
4109 | */ | 4238 | */ |
4110 | if (loop > LOOP_CACHING_NOWAIT || | 4239 | if (loop > LOOP_CACHING_NOWAIT || |
4111 | atomic_read(&space_info->caching_threads) < 2) { | 4240 | (loop > LOOP_FIND_IDEAL && |
4241 | atomic_read(&space_info->caching_threads) < 2)) { | ||
4112 | ret = cache_block_group(block_group); | 4242 | ret = cache_block_group(block_group); |
4113 | BUG_ON(ret); | 4243 | BUG_ON(ret); |
4114 | } | 4244 | } |
4115 | } | ||
4116 | |||
4117 | cached = block_group_cache_done(block_group); | ||
4118 | if (unlikely(!cached)) { | ||
4119 | found_uncached_bg = true; | 4245 | found_uncached_bg = true; |
4120 | 4246 | ||
4121 | /* if we only want cached bgs, loop */ | 4247 | /* |
4122 | if (loop == LOOP_CACHED_ONLY) | 4248 | * If loop is set for cached only, try the next block |
4249 | * group. | ||
4250 | */ | ||
4251 | if (loop == LOOP_FIND_IDEAL) | ||
4123 | goto loop; | 4252 | goto loop; |
4124 | } | 4253 | } |
4125 | 4254 | ||
4255 | cached = block_group_cache_done(block_group); | ||
4256 | if (unlikely(!cached)) | ||
4257 | found_uncached_bg = true; | ||
4258 | |||
4126 | if (unlikely(block_group->ro)) | 4259 | if (unlikely(block_group->ro)) |
4127 | goto loop; | 4260 | goto loop; |
4128 | 4261 | ||
@@ -4233,14 +4366,23 @@ refill_cluster: | |||
4233 | 4366 | ||
4234 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 4367 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
4235 | num_bytes, empty_size); | 4368 | num_bytes, empty_size); |
4236 | if (!offset && (cached || (!cached && | 4369 | /* |
4237 | loop == LOOP_CACHING_NOWAIT))) { | 4370 | * If we didn't find a chunk, and we haven't failed on this |
4238 | goto loop; | 4371 | * block group before, and this block group is in the middle of |
4239 | } else if (!offset && (!cached && | 4372 | * caching and we are ok with waiting, then go ahead and wait |
4240 | loop > LOOP_CACHING_NOWAIT)) { | 4373 | * for progress to be made, and set failed_alloc to true. |
4374 | * | ||
4375 | * If failed_alloc is true then we've already waited on this | ||
4376 | * block group once and should move on to the next block group. | ||
4377 | */ | ||
4378 | if (!offset && !failed_alloc && !cached && | ||
4379 | loop > LOOP_CACHING_NOWAIT) { | ||
4241 | wait_block_group_cache_progress(block_group, | 4380 | wait_block_group_cache_progress(block_group, |
4242 | num_bytes + empty_size); | 4381 | num_bytes + empty_size); |
4382 | failed_alloc = true; | ||
4243 | goto have_block_group; | 4383 | goto have_block_group; |
4384 | } else if (!offset) { | ||
4385 | goto loop; | ||
4244 | } | 4386 | } |
4245 | checks: | 4387 | checks: |
4246 | search_start = stripe_align(root, offset); | 4388 | search_start = stripe_align(root, offset); |
@@ -4288,13 +4430,16 @@ checks: | |||
4288 | break; | 4430 | break; |
4289 | loop: | 4431 | loop: |
4290 | failed_cluster_refill = false; | 4432 | failed_cluster_refill = false; |
4433 | failed_alloc = false; | ||
4291 | btrfs_put_block_group(block_group); | 4434 | btrfs_put_block_group(block_group); |
4292 | } | 4435 | } |
4293 | up_read(&space_info->groups_sem); | 4436 | up_read(&space_info->groups_sem); |
4294 | 4437 | ||
4295 | /* LOOP_CACHED_ONLY, only search fully cached block groups | 4438 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for |
4296 | * LOOP_CACHING_NOWAIT, search partially cached block groups, but | 4439 | * for them to make caching progress. Also |
4297 | * dont wait foR them to finish caching | 4440 | * determine the best possible bg to cache |
4441 | * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking | ||
4442 | * caching kthreads as we move along | ||
4298 | * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching | 4443 | * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching |
4299 | * LOOP_ALLOC_CHUNK, force a chunk allocation and try again | 4444 | * LOOP_ALLOC_CHUNK, force a chunk allocation and try again |
4300 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try | 4445 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try |
@@ -4303,12 +4448,47 @@ loop: | |||
4303 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && | 4448 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && |
4304 | (found_uncached_bg || empty_size || empty_cluster || | 4449 | (found_uncached_bg || empty_size || empty_cluster || |
4305 | allowed_chunk_alloc)) { | 4450 | allowed_chunk_alloc)) { |
4306 | if (found_uncached_bg) { | 4451 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
4307 | found_uncached_bg = false; | 4452 | found_uncached_bg = false; |
4308 | if (loop < LOOP_CACHING_WAIT) { | 4453 | loop++; |
4309 | loop++; | 4454 | if (!ideal_cache_percent && |
4455 | atomic_read(&space_info->caching_threads)) | ||
4310 | goto search; | 4456 | goto search; |
4311 | } | 4457 | |
4458 | /* | ||
4459 | * 1 of the following 2 things have happened so far | ||
4460 | * | ||
4461 | * 1) We found an ideal block group for caching that | ||
4462 | * is mostly full and will cache quickly, so we might | ||
4463 | * as well wait for it. | ||
4464 | * | ||
4465 | * 2) We searched for cached only and we didn't find | ||
4466 | * anything, and we didn't start any caching kthreads | ||
4467 | * either, so chances are we will loop through and | ||
4468 | * start a couple caching kthreads, and then come back | ||
4469 | * around and just wait for them. This will be slower | ||
4470 | * because we will have 2 caching kthreads reading at | ||
4471 | * the same time when we could have just started one | ||
4472 | * and waited for it to get far enough to give us an | ||
4473 | * allocation, so go ahead and go to the wait caching | ||
4474 | * loop. | ||
4475 | */ | ||
4476 | loop = LOOP_CACHING_WAIT; | ||
4477 | search_start = ideal_cache_offset; | ||
4478 | ideal_cache_percent = 0; | ||
4479 | goto ideal_cache; | ||
4480 | } else if (loop == LOOP_FIND_IDEAL) { | ||
4481 | /* | ||
4482 | * Didn't find a uncached bg, wait on anything we find | ||
4483 | * next. | ||
4484 | */ | ||
4485 | loop = LOOP_CACHING_WAIT; | ||
4486 | goto search; | ||
4487 | } | ||
4488 | |||
4489 | if (loop < LOOP_CACHING_WAIT) { | ||
4490 | loop++; | ||
4491 | goto search; | ||
4312 | } | 4492 | } |
4313 | 4493 | ||
4314 | if (loop == LOOP_ALLOC_CHUNK) { | 4494 | if (loop == LOOP_ALLOC_CHUNK) { |
@@ -4320,7 +4500,8 @@ loop: | |||
4320 | ret = do_chunk_alloc(trans, root, num_bytes + | 4500 | ret = do_chunk_alloc(trans, root, num_bytes + |
4321 | 2 * 1024 * 1024, data, 1); | 4501 | 2 * 1024 * 1024, data, 1); |
4322 | allowed_chunk_alloc = 0; | 4502 | allowed_chunk_alloc = 0; |
4323 | } else { | 4503 | done_chunk_alloc = 1; |
4504 | } else if (!done_chunk_alloc) { | ||
4324 | space_info->force_alloc = 1; | 4505 | space_info->force_alloc = 1; |
4325 | } | 4506 | } |
4326 | 4507 | ||
@@ -4799,6 +4980,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
4799 | u64 bytenr; | 4980 | u64 bytenr; |
4800 | u64 generation; | 4981 | u64 generation; |
4801 | u64 refs; | 4982 | u64 refs; |
4983 | u64 flags; | ||
4802 | u64 last = 0; | 4984 | u64 last = 0; |
4803 | u32 nritems; | 4985 | u32 nritems; |
4804 | u32 blocksize; | 4986 | u32 blocksize; |
@@ -4836,15 +5018,19 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
4836 | generation <= root->root_key.offset) | 5018 | generation <= root->root_key.offset) |
4837 | continue; | 5019 | continue; |
4838 | 5020 | ||
5021 | /* We don't lock the tree block, it's OK to be racy here */ | ||
5022 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | ||
5023 | &refs, &flags); | ||
5024 | BUG_ON(ret); | ||
5025 | BUG_ON(refs == 0); | ||
5026 | |||
4839 | if (wc->stage == DROP_REFERENCE) { | 5027 | if (wc->stage == DROP_REFERENCE) { |
4840 | ret = btrfs_lookup_extent_info(trans, root, | ||
4841 | bytenr, blocksize, | ||
4842 | &refs, NULL); | ||
4843 | BUG_ON(ret); | ||
4844 | BUG_ON(refs == 0); | ||
4845 | if (refs == 1) | 5028 | if (refs == 1) |
4846 | goto reada; | 5029 | goto reada; |
4847 | 5030 | ||
5031 | if (wc->level == 1 && | ||
5032 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
5033 | continue; | ||
4848 | if (!wc->update_ref || | 5034 | if (!wc->update_ref || |
4849 | generation <= root->root_key.offset) | 5035 | generation <= root->root_key.offset) |
4850 | continue; | 5036 | continue; |
@@ -4853,6 +5039,10 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
4853 | &wc->update_progress); | 5039 | &wc->update_progress); |
4854 | if (ret < 0) | 5040 | if (ret < 0) |
4855 | continue; | 5041 | continue; |
5042 | } else { | ||
5043 | if (wc->level == 1 && | ||
5044 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
5045 | continue; | ||
4856 | } | 5046 | } |
4857 | reada: | 5047 | reada: |
4858 | ret = readahead_tree_block(root, bytenr, blocksize, | 5048 | ret = readahead_tree_block(root, bytenr, blocksize, |
@@ -4876,7 +5066,7 @@ reada: | |||
4876 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | 5066 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, |
4877 | struct btrfs_root *root, | 5067 | struct btrfs_root *root, |
4878 | struct btrfs_path *path, | 5068 | struct btrfs_path *path, |
4879 | struct walk_control *wc) | 5069 | struct walk_control *wc, int lookup_info) |
4880 | { | 5070 | { |
4881 | int level = wc->level; | 5071 | int level = wc->level; |
4882 | struct extent_buffer *eb = path->nodes[level]; | 5072 | struct extent_buffer *eb = path->nodes[level]; |
@@ -4891,8 +5081,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4891 | * when reference count of tree block is 1, it won't increase | 5081 | * when reference count of tree block is 1, it won't increase |
4892 | * again. once full backref flag is set, we never clear it. | 5082 | * again. once full backref flag is set, we never clear it. |
4893 | */ | 5083 | */ |
4894 | if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || | 5084 | if (lookup_info && |
4895 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { | 5085 | ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || |
5086 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { | ||
4896 | BUG_ON(!path->locks[level]); | 5087 | BUG_ON(!path->locks[level]); |
4897 | ret = btrfs_lookup_extent_info(trans, root, | 5088 | ret = btrfs_lookup_extent_info(trans, root, |
4898 | eb->start, eb->len, | 5089 | eb->start, eb->len, |
@@ -4953,7 +5144,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4953 | static noinline int do_walk_down(struct btrfs_trans_handle *trans, | 5144 | static noinline int do_walk_down(struct btrfs_trans_handle *trans, |
4954 | struct btrfs_root *root, | 5145 | struct btrfs_root *root, |
4955 | struct btrfs_path *path, | 5146 | struct btrfs_path *path, |
4956 | struct walk_control *wc) | 5147 | struct walk_control *wc, int *lookup_info) |
4957 | { | 5148 | { |
4958 | u64 bytenr; | 5149 | u64 bytenr; |
4959 | u64 generation; | 5150 | u64 generation; |
@@ -4973,8 +5164,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
4973 | * for the subtree | 5164 | * for the subtree |
4974 | */ | 5165 | */ |
4975 | if (wc->stage == UPDATE_BACKREF && | 5166 | if (wc->stage == UPDATE_BACKREF && |
4976 | generation <= root->root_key.offset) | 5167 | generation <= root->root_key.offset) { |
5168 | *lookup_info = 1; | ||
4977 | return 1; | 5169 | return 1; |
5170 | } | ||
4978 | 5171 | ||
4979 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); | 5172 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); |
4980 | blocksize = btrfs_level_size(root, level - 1); | 5173 | blocksize = btrfs_level_size(root, level - 1); |
@@ -4987,14 +5180,19 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
4987 | btrfs_tree_lock(next); | 5180 | btrfs_tree_lock(next); |
4988 | btrfs_set_lock_blocking(next); | 5181 | btrfs_set_lock_blocking(next); |
4989 | 5182 | ||
4990 | if (wc->stage == DROP_REFERENCE) { | 5183 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, |
4991 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | 5184 | &wc->refs[level - 1], |
4992 | &wc->refs[level - 1], | 5185 | &wc->flags[level - 1]); |
4993 | &wc->flags[level - 1]); | 5186 | BUG_ON(ret); |
4994 | BUG_ON(ret); | 5187 | BUG_ON(wc->refs[level - 1] == 0); |
4995 | BUG_ON(wc->refs[level - 1] == 0); | 5188 | *lookup_info = 0; |
4996 | 5189 | ||
5190 | if (wc->stage == DROP_REFERENCE) { | ||
4997 | if (wc->refs[level - 1] > 1) { | 5191 | if (wc->refs[level - 1] > 1) { |
5192 | if (level == 1 && | ||
5193 | (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
5194 | goto skip; | ||
5195 | |||
4998 | if (!wc->update_ref || | 5196 | if (!wc->update_ref || |
4999 | generation <= root->root_key.offset) | 5197 | generation <= root->root_key.offset) |
5000 | goto skip; | 5198 | goto skip; |
@@ -5008,12 +5206,17 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
5008 | wc->stage = UPDATE_BACKREF; | 5206 | wc->stage = UPDATE_BACKREF; |
5009 | wc->shared_level = level - 1; | 5207 | wc->shared_level = level - 1; |
5010 | } | 5208 | } |
5209 | } else { | ||
5210 | if (level == 1 && | ||
5211 | (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
5212 | goto skip; | ||
5011 | } | 5213 | } |
5012 | 5214 | ||
5013 | if (!btrfs_buffer_uptodate(next, generation)) { | 5215 | if (!btrfs_buffer_uptodate(next, generation)) { |
5014 | btrfs_tree_unlock(next); | 5216 | btrfs_tree_unlock(next); |
5015 | free_extent_buffer(next); | 5217 | free_extent_buffer(next); |
5016 | next = NULL; | 5218 | next = NULL; |
5219 | *lookup_info = 1; | ||
5017 | } | 5220 | } |
5018 | 5221 | ||
5019 | if (!next) { | 5222 | if (!next) { |
@@ -5036,21 +5239,22 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
5036 | skip: | 5239 | skip: |
5037 | wc->refs[level - 1] = 0; | 5240 | wc->refs[level - 1] = 0; |
5038 | wc->flags[level - 1] = 0; | 5241 | wc->flags[level - 1] = 0; |
5242 | if (wc->stage == DROP_REFERENCE) { | ||
5243 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | ||
5244 | parent = path->nodes[level]->start; | ||
5245 | } else { | ||
5246 | BUG_ON(root->root_key.objectid != | ||
5247 | btrfs_header_owner(path->nodes[level])); | ||
5248 | parent = 0; | ||
5249 | } | ||
5039 | 5250 | ||
5040 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | 5251 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, |
5041 | parent = path->nodes[level]->start; | 5252 | root->root_key.objectid, level - 1, 0); |
5042 | } else { | 5253 | BUG_ON(ret); |
5043 | BUG_ON(root->root_key.objectid != | ||
5044 | btrfs_header_owner(path->nodes[level])); | ||
5045 | parent = 0; | ||
5046 | } | 5254 | } |
5047 | |||
5048 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, | ||
5049 | root->root_key.objectid, level - 1, 0); | ||
5050 | BUG_ON(ret); | ||
5051 | |||
5052 | btrfs_tree_unlock(next); | 5255 | btrfs_tree_unlock(next); |
5053 | free_extent_buffer(next); | 5256 | free_extent_buffer(next); |
5257 | *lookup_info = 1; | ||
5054 | return 1; | 5258 | return 1; |
5055 | } | 5259 | } |
5056 | 5260 | ||
@@ -5164,6 +5368,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
5164 | struct walk_control *wc) | 5368 | struct walk_control *wc) |
5165 | { | 5369 | { |
5166 | int level = wc->level; | 5370 | int level = wc->level; |
5371 | int lookup_info = 1; | ||
5167 | int ret; | 5372 | int ret; |
5168 | 5373 | ||
5169 | while (level >= 0) { | 5374 | while (level >= 0) { |
@@ -5171,14 +5376,14 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
5171 | btrfs_header_nritems(path->nodes[level])) | 5376 | btrfs_header_nritems(path->nodes[level])) |
5172 | break; | 5377 | break; |
5173 | 5378 | ||
5174 | ret = walk_down_proc(trans, root, path, wc); | 5379 | ret = walk_down_proc(trans, root, path, wc, lookup_info); |
5175 | if (ret > 0) | 5380 | if (ret > 0) |
5176 | break; | 5381 | break; |
5177 | 5382 | ||
5178 | if (level == 0) | 5383 | if (level == 0) |
5179 | break; | 5384 | break; |
5180 | 5385 | ||
5181 | ret = do_walk_down(trans, root, path, wc); | 5386 | ret = do_walk_down(trans, root, path, wc, &lookup_info); |
5182 | if (ret > 0) { | 5387 | if (ret > 0) { |
5183 | path->slots[level]++; | 5388 | path->slots[level]++; |
5184 | continue; | 5389 | continue; |