aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-10-11 14:23:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-10-11 14:23:13 -0400
commit474a503d4bf77ae0cbe484dd0842a2648c0b1c28 (patch)
tree70e3e4023209e741546491a58622bd45fb13e308 /fs/btrfs/extent-tree.c
parentd43c36dc6b357fa1806800f18aa30123c747a6d1 (diff)
parentac6889cbb254be1ffea376bea4a96ce9be0e0ed0 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: Btrfs: fix file clone ioctl for bookend extents Btrfs: fix uninit compiler warning in cow_file_range_nocow Btrfs: constify dentry_operations Btrfs: optimize back reference update during btrfs_drop_snapshot Btrfs: remove negative dentry when deleting subvolumne Btrfs: optimize fsync for the single writer case Btrfs: async delalloc flushing under space pressure Btrfs: release delalloc reservations on extent item insertion Btrfs: delay clearing EXTENT_DELALLOC for compressed extents Btrfs: cleanup extent_clear_unlock_delalloc flags Btrfs: fix possible softlockup in the allocator Btrfs: fix deadlock on async thread startup
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c235
1 files changed, 186 insertions, 49 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 359a754c782c..d0c4d584efad 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2824,14 +2824,17 @@ int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2824 num_items); 2824 num_items);
2825 2825
2826 spin_lock(&meta_sinfo->lock); 2826 spin_lock(&meta_sinfo->lock);
2827 if (BTRFS_I(inode)->delalloc_reserved_extents <= 2827 spin_lock(&BTRFS_I(inode)->accounting_lock);
2828 BTRFS_I(inode)->delalloc_extents) { 2828 if (BTRFS_I(inode)->reserved_extents <=
2829 BTRFS_I(inode)->outstanding_extents) {
2830 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2829 spin_unlock(&meta_sinfo->lock); 2831 spin_unlock(&meta_sinfo->lock);
2830 return 0; 2832 return 0;
2831 } 2833 }
2834 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2832 2835
2833 BTRFS_I(inode)->delalloc_reserved_extents--; 2836 BTRFS_I(inode)->reserved_extents--;
2834 BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0); 2837 BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
2835 2838
2836 if (meta_sinfo->bytes_delalloc < num_bytes) { 2839 if (meta_sinfo->bytes_delalloc < num_bytes) {
2837 bug = true; 2840 bug = true;
@@ -2864,6 +2867,107 @@ static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
2864 meta_sinfo->force_delalloc = 0; 2867 meta_sinfo->force_delalloc = 0;
2865} 2868}
2866 2869
2870struct async_flush {
2871 struct btrfs_root *root;
2872 struct btrfs_space_info *info;
2873 struct btrfs_work work;
2874};
2875
2876static noinline void flush_delalloc_async(struct btrfs_work *work)
2877{
2878 struct async_flush *async;
2879 struct btrfs_root *root;
2880 struct btrfs_space_info *info;
2881
2882 async = container_of(work, struct async_flush, work);
2883 root = async->root;
2884 info = async->info;
2885
2886 btrfs_start_delalloc_inodes(root);
2887 wake_up(&info->flush_wait);
2888 btrfs_wait_ordered_extents(root, 0);
2889
2890 spin_lock(&info->lock);
2891 info->flushing = 0;
2892 spin_unlock(&info->lock);
2893 wake_up(&info->flush_wait);
2894
2895 kfree(async);
2896}
2897
2898static void wait_on_flush(struct btrfs_space_info *info)
2899{
2900 DEFINE_WAIT(wait);
2901 u64 used;
2902
2903 while (1) {
2904 prepare_to_wait(&info->flush_wait, &wait,
2905 TASK_UNINTERRUPTIBLE);
2906 spin_lock(&info->lock);
2907 if (!info->flushing) {
2908 spin_unlock(&info->lock);
2909 break;
2910 }
2911
2912 used = info->bytes_used + info->bytes_reserved +
2913 info->bytes_pinned + info->bytes_readonly +
2914 info->bytes_super + info->bytes_root +
2915 info->bytes_may_use + info->bytes_delalloc;
2916 if (used < info->total_bytes) {
2917 spin_unlock(&info->lock);
2918 break;
2919 }
2920 spin_unlock(&info->lock);
2921 schedule();
2922 }
2923 finish_wait(&info->flush_wait, &wait);
2924}
2925
2926static void flush_delalloc(struct btrfs_root *root,
2927 struct btrfs_space_info *info)
2928{
2929 struct async_flush *async;
2930 bool wait = false;
2931
2932 spin_lock(&info->lock);
2933
2934 if (!info->flushing) {
2935 info->flushing = 1;
2936 init_waitqueue_head(&info->flush_wait);
2937 } else {
2938 wait = true;
2939 }
2940
2941 spin_unlock(&info->lock);
2942
2943 if (wait) {
2944 wait_on_flush(info);
2945 return;
2946 }
2947
2948 async = kzalloc(sizeof(*async), GFP_NOFS);
2949 if (!async)
2950 goto flush;
2951
2952 async->root = root;
2953 async->info = info;
2954 async->work.func = flush_delalloc_async;
2955
2956 btrfs_queue_worker(&root->fs_info->enospc_workers,
2957 &async->work);
2958 wait_on_flush(info);
2959 return;
2960
2961flush:
2962 btrfs_start_delalloc_inodes(root);
2963 btrfs_wait_ordered_extents(root, 0);
2964
2965 spin_lock(&info->lock);
2966 info->flushing = 0;
2967 spin_unlock(&info->lock);
2968 wake_up(&info->flush_wait);
2969}
2970
2867static int maybe_allocate_chunk(struct btrfs_root *root, 2971static int maybe_allocate_chunk(struct btrfs_root *root,
2868 struct btrfs_space_info *info) 2972 struct btrfs_space_info *info)
2869{ 2973{
@@ -2894,7 +2998,7 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
2894 if (!info->allocating_chunk) { 2998 if (!info->allocating_chunk) {
2895 info->force_alloc = 1; 2999 info->force_alloc = 1;
2896 info->allocating_chunk = 1; 3000 info->allocating_chunk = 1;
2897 init_waitqueue_head(&info->wait); 3001 init_waitqueue_head(&info->allocate_wait);
2898 } else { 3002 } else {
2899 wait = true; 3003 wait = true;
2900 } 3004 }
@@ -2902,7 +3006,7 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
2902 spin_unlock(&info->lock); 3006 spin_unlock(&info->lock);
2903 3007
2904 if (wait) { 3008 if (wait) {
2905 wait_event(info->wait, 3009 wait_event(info->allocate_wait,
2906 !info->allocating_chunk); 3010 !info->allocating_chunk);
2907 return 1; 3011 return 1;
2908 } 3012 }
@@ -2923,7 +3027,7 @@ out:
2923 spin_lock(&info->lock); 3027 spin_lock(&info->lock);
2924 info->allocating_chunk = 0; 3028 info->allocating_chunk = 0;
2925 spin_unlock(&info->lock); 3029 spin_unlock(&info->lock);
2926 wake_up(&info->wait); 3030 wake_up(&info->allocate_wait);
2927 3031
2928 if (ret) 3032 if (ret)
2929 return 0; 3033 return 0;
@@ -2981,21 +3085,20 @@ again:
2981 filemap_flush(inode->i_mapping); 3085 filemap_flush(inode->i_mapping);
2982 goto again; 3086 goto again;
2983 } else if (flushed == 3) { 3087 } else if (flushed == 3) {
2984 btrfs_start_delalloc_inodes(root); 3088 flush_delalloc(root, meta_sinfo);
2985 btrfs_wait_ordered_extents(root, 0);
2986 goto again; 3089 goto again;
2987 } 3090 }
2988 spin_lock(&meta_sinfo->lock); 3091 spin_lock(&meta_sinfo->lock);
2989 meta_sinfo->bytes_delalloc -= num_bytes; 3092 meta_sinfo->bytes_delalloc -= num_bytes;
2990 spin_unlock(&meta_sinfo->lock); 3093 spin_unlock(&meta_sinfo->lock);
2991 printk(KERN_ERR "enospc, has %d, reserved %d\n", 3094 printk(KERN_ERR "enospc, has %d, reserved %d\n",
2992 BTRFS_I(inode)->delalloc_extents, 3095 BTRFS_I(inode)->outstanding_extents,
2993 BTRFS_I(inode)->delalloc_reserved_extents); 3096 BTRFS_I(inode)->reserved_extents);
2994 dump_space_info(meta_sinfo, 0, 0); 3097 dump_space_info(meta_sinfo, 0, 0);
2995 return -ENOSPC; 3098 return -ENOSPC;
2996 } 3099 }
2997 3100
2998 BTRFS_I(inode)->delalloc_reserved_extents++; 3101 BTRFS_I(inode)->reserved_extents++;
2999 check_force_delalloc(meta_sinfo); 3102 check_force_delalloc(meta_sinfo);
3000 spin_unlock(&meta_sinfo->lock); 3103 spin_unlock(&meta_sinfo->lock);
3001 3104
@@ -3094,8 +3197,7 @@ again:
3094 } 3197 }
3095 3198
3096 if (retries == 2) { 3199 if (retries == 2) {
3097 btrfs_start_delalloc_inodes(root); 3200 flush_delalloc(root, meta_sinfo);
3098 btrfs_wait_ordered_extents(root, 0);
3099 goto again; 3201 goto again;
3100 } 3202 }
3101 spin_lock(&meta_sinfo->lock); 3203 spin_lock(&meta_sinfo->lock);
@@ -4029,6 +4131,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4029 int loop = 0; 4131 int loop = 0;
4030 bool found_uncached_bg = false; 4132 bool found_uncached_bg = false;
4031 bool failed_cluster_refill = false; 4133 bool failed_cluster_refill = false;
4134 bool failed_alloc = false;
4032 4135
4033 WARN_ON(num_bytes < root->sectorsize); 4136 WARN_ON(num_bytes < root->sectorsize);
4034 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4137 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -4233,14 +4336,23 @@ refill_cluster:
4233 4336
4234 offset = btrfs_find_space_for_alloc(block_group, search_start, 4337 offset = btrfs_find_space_for_alloc(block_group, search_start,
4235 num_bytes, empty_size); 4338 num_bytes, empty_size);
4236 if (!offset && (cached || (!cached && 4339 /*
4237 loop == LOOP_CACHING_NOWAIT))) { 4340 * If we didn't find a chunk, and we haven't failed on this
4238 goto loop; 4341 * block group before, and this block group is in the middle of
4239 } else if (!offset && (!cached && 4342 * caching and we are ok with waiting, then go ahead and wait
4240 loop > LOOP_CACHING_NOWAIT)) { 4343 * for progress to be made, and set failed_alloc to true.
4344 *
4345 * If failed_alloc is true then we've already waited on this
4346 * block group once and should move on to the next block group.
4347 */
4348 if (!offset && !failed_alloc && !cached &&
4349 loop > LOOP_CACHING_NOWAIT) {
4241 wait_block_group_cache_progress(block_group, 4350 wait_block_group_cache_progress(block_group,
4242 num_bytes + empty_size); 4351 num_bytes + empty_size);
4352 failed_alloc = true;
4243 goto have_block_group; 4353 goto have_block_group;
4354 } else if (!offset) {
4355 goto loop;
4244 } 4356 }
4245checks: 4357checks:
4246 search_start = stripe_align(root, offset); 4358 search_start = stripe_align(root, offset);
@@ -4288,6 +4400,7 @@ checks:
4288 break; 4400 break;
4289loop: 4401loop:
4290 failed_cluster_refill = false; 4402 failed_cluster_refill = false;
4403 failed_alloc = false;
4291 btrfs_put_block_group(block_group); 4404 btrfs_put_block_group(block_group);
4292 } 4405 }
4293 up_read(&space_info->groups_sem); 4406 up_read(&space_info->groups_sem);
@@ -4799,6 +4912,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4799 u64 bytenr; 4912 u64 bytenr;
4800 u64 generation; 4913 u64 generation;
4801 u64 refs; 4914 u64 refs;
4915 u64 flags;
4802 u64 last = 0; 4916 u64 last = 0;
4803 u32 nritems; 4917 u32 nritems;
4804 u32 blocksize; 4918 u32 blocksize;
@@ -4836,15 +4950,19 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4836 generation <= root->root_key.offset) 4950 generation <= root->root_key.offset)
4837 continue; 4951 continue;
4838 4952
4953 /* We don't lock the tree block, it's OK to be racy here */
4954 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
4955 &refs, &flags);
4956 BUG_ON(ret);
4957 BUG_ON(refs == 0);
4958
4839 if (wc->stage == DROP_REFERENCE) { 4959 if (wc->stage == DROP_REFERENCE) {
4840 ret = btrfs_lookup_extent_info(trans, root,
4841 bytenr, blocksize,
4842 &refs, NULL);
4843 BUG_ON(ret);
4844 BUG_ON(refs == 0);
4845 if (refs == 1) 4960 if (refs == 1)
4846 goto reada; 4961 goto reada;
4847 4962
4963 if (wc->level == 1 &&
4964 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4965 continue;
4848 if (!wc->update_ref || 4966 if (!wc->update_ref ||
4849 generation <= root->root_key.offset) 4967 generation <= root->root_key.offset)
4850 continue; 4968 continue;
@@ -4853,6 +4971,10 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4853 &wc->update_progress); 4971 &wc->update_progress);
4854 if (ret < 0) 4972 if (ret < 0)
4855 continue; 4973 continue;
4974 } else {
4975 if (wc->level == 1 &&
4976 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4977 continue;
4856 } 4978 }
4857reada: 4979reada:
4858 ret = readahead_tree_block(root, bytenr, blocksize, 4980 ret = readahead_tree_block(root, bytenr, blocksize,
@@ -4876,7 +4998,7 @@ reada:
4876static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 4998static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4877 struct btrfs_root *root, 4999 struct btrfs_root *root,
4878 struct btrfs_path *path, 5000 struct btrfs_path *path,
4879 struct walk_control *wc) 5001 struct walk_control *wc, int lookup_info)
4880{ 5002{
4881 int level = wc->level; 5003 int level = wc->level;
4882 struct extent_buffer *eb = path->nodes[level]; 5004 struct extent_buffer *eb = path->nodes[level];
@@ -4891,8 +5013,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4891 * when reference count of tree block is 1, it won't increase 5013 * when reference count of tree block is 1, it won't increase
4892 * again. once full backref flag is set, we never clear it. 5014 * again. once full backref flag is set, we never clear it.
4893 */ 5015 */
4894 if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 5016 if (lookup_info &&
4895 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { 5017 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
5018 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
4896 BUG_ON(!path->locks[level]); 5019 BUG_ON(!path->locks[level]);
4897 ret = btrfs_lookup_extent_info(trans, root, 5020 ret = btrfs_lookup_extent_info(trans, root,
4898 eb->start, eb->len, 5021 eb->start, eb->len,
@@ -4953,7 +5076,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4953static noinline int do_walk_down(struct btrfs_trans_handle *trans, 5076static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4954 struct btrfs_root *root, 5077 struct btrfs_root *root,
4955 struct btrfs_path *path, 5078 struct btrfs_path *path,
4956 struct walk_control *wc) 5079 struct walk_control *wc, int *lookup_info)
4957{ 5080{
4958 u64 bytenr; 5081 u64 bytenr;
4959 u64 generation; 5082 u64 generation;
@@ -4973,8 +5096,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4973 * for the subtree 5096 * for the subtree
4974 */ 5097 */
4975 if (wc->stage == UPDATE_BACKREF && 5098 if (wc->stage == UPDATE_BACKREF &&
4976 generation <= root->root_key.offset) 5099 generation <= root->root_key.offset) {
5100 *lookup_info = 1;
4977 return 1; 5101 return 1;
5102 }
4978 5103
4979 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 5104 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
4980 blocksize = btrfs_level_size(root, level - 1); 5105 blocksize = btrfs_level_size(root, level - 1);
@@ -4987,14 +5112,19 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4987 btrfs_tree_lock(next); 5112 btrfs_tree_lock(next);
4988 btrfs_set_lock_blocking(next); 5113 btrfs_set_lock_blocking(next);
4989 5114
4990 if (wc->stage == DROP_REFERENCE) { 5115 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
4991 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5116 &wc->refs[level - 1],
4992 &wc->refs[level - 1], 5117 &wc->flags[level - 1]);
4993 &wc->flags[level - 1]); 5118 BUG_ON(ret);
4994 BUG_ON(ret); 5119 BUG_ON(wc->refs[level - 1] == 0);
4995 BUG_ON(wc->refs[level - 1] == 0); 5120 *lookup_info = 0;
4996 5121
5122 if (wc->stage == DROP_REFERENCE) {
4997 if (wc->refs[level - 1] > 1) { 5123 if (wc->refs[level - 1] > 1) {
5124 if (level == 1 &&
5125 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5126 goto skip;
5127
4998 if (!wc->update_ref || 5128 if (!wc->update_ref ||
4999 generation <= root->root_key.offset) 5129 generation <= root->root_key.offset)
5000 goto skip; 5130 goto skip;
@@ -5008,12 +5138,17 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5008 wc->stage = UPDATE_BACKREF; 5138 wc->stage = UPDATE_BACKREF;
5009 wc->shared_level = level - 1; 5139 wc->shared_level = level - 1;
5010 } 5140 }
5141 } else {
5142 if (level == 1 &&
5143 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5144 goto skip;
5011 } 5145 }
5012 5146
5013 if (!btrfs_buffer_uptodate(next, generation)) { 5147 if (!btrfs_buffer_uptodate(next, generation)) {
5014 btrfs_tree_unlock(next); 5148 btrfs_tree_unlock(next);
5015 free_extent_buffer(next); 5149 free_extent_buffer(next);
5016 next = NULL; 5150 next = NULL;
5151 *lookup_info = 1;
5017 } 5152 }
5018 5153
5019 if (!next) { 5154 if (!next) {
@@ -5036,21 +5171,22 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5036skip: 5171skip:
5037 wc->refs[level - 1] = 0; 5172 wc->refs[level - 1] = 0;
5038 wc->flags[level - 1] = 0; 5173 wc->flags[level - 1] = 0;
5174 if (wc->stage == DROP_REFERENCE) {
5175 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5176 parent = path->nodes[level]->start;
5177 } else {
5178 BUG_ON(root->root_key.objectid !=
5179 btrfs_header_owner(path->nodes[level]));
5180 parent = 0;
5181 }
5039 5182
5040 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 5183 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
5041 parent = path->nodes[level]->start; 5184 root->root_key.objectid, level - 1, 0);
5042 } else { 5185 BUG_ON(ret);
5043 BUG_ON(root->root_key.objectid !=
5044 btrfs_header_owner(path->nodes[level]));
5045 parent = 0;
5046 } 5186 }
5047
5048 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
5049 root->root_key.objectid, level - 1, 0);
5050 BUG_ON(ret);
5051
5052 btrfs_tree_unlock(next); 5187 btrfs_tree_unlock(next);
5053 free_extent_buffer(next); 5188 free_extent_buffer(next);
5189 *lookup_info = 1;
5054 return 1; 5190 return 1;
5055} 5191}
5056 5192
@@ -5164,6 +5300,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5164 struct walk_control *wc) 5300 struct walk_control *wc)
5165{ 5301{
5166 int level = wc->level; 5302 int level = wc->level;
5303 int lookup_info = 1;
5167 int ret; 5304 int ret;
5168 5305
5169 while (level >= 0) { 5306 while (level >= 0) {
@@ -5171,14 +5308,14 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5171 btrfs_header_nritems(path->nodes[level])) 5308 btrfs_header_nritems(path->nodes[level]))
5172 break; 5309 break;
5173 5310
5174 ret = walk_down_proc(trans, root, path, wc); 5311 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5175 if (ret > 0) 5312 if (ret > 0)
5176 break; 5313 break;
5177 5314
5178 if (level == 0) 5315 if (level == 0)
5179 break; 5316 break;
5180 5317
5181 ret = do_walk_down(trans, root, path, wc); 5318 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5182 if (ret > 0) { 5319 if (ret > 0) {
5183 path->slots[level]++; 5320 path->slots[level]++;
5184 continue; 5321 continue;