aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2009-07-27 13:57:01 -0400
committerChris Mason <chris.mason@oracle.com>2009-07-27 13:57:01 -0400
commit68b38550ddbea13d296184bf69edff387618b1d3 (patch)
treec0c3901b77f273ac08c39c641586884f41a90b84
parent631c07c8d12bcc6ce4a0fbfbd64ea843d78e2b10 (diff)
Btrfs: change how we unpin extents
We are racy with async block caching and unpinning extents. This patch makes things much less complicated by only unpinning the extent if the block group is cached. We check the block_group->cached var under the block_group->lock spin lock. If it is set to BTRFS_CACHE_FINISHED then we update the pinned counters, and unpin the extent and add the free space back. If it is not set to this, we start the caching of the block group so the next time we unpin extents we can unpin the extent. This keeps us from racing with the async caching threads, lets us kill the fs wide async thread counter, and keeps us from having to set DELALLOC bits for every extent we hit if there are caching kthreads going. One thing that needed to be changed was btrfs_free_super_mirror_extents. Now instead of just looking for LOCKED extents, we also look for DIRTY extents, since we could have left some extents pinned in the previous transaction that will never get freed now that we are unmounting, which would cause us to leak memory. So btrfs_free_super_mirror_extents has been changed to btrfs_free_pinned_extents, and it will clear the extents locked for the super mirror, and any remaining pinned extents that may be present. Thank you, Signed-off-by: Josef Bacik <jbacik@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/extent-tree.c149
-rw-r--r--fs/btrfs/tree-log.c2
4 files changed, 46 insertions, 113 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 42b03c4ee494..17ad92c29cfd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -845,7 +845,6 @@ struct btrfs_fs_info {
845 atomic_t async_submit_draining; 845 atomic_t async_submit_draining;
846 atomic_t nr_async_bios; 846 atomic_t nr_async_bios;
847 atomic_t async_delalloc_pages; 847 atomic_t async_delalloc_pages;
848 atomic_t async_caching_threads;
849 848
850 /* 849 /*
851 * this is used by the balancing code to wait for all the pending 850 * this is used by the balancing code to wait for all the pending
@@ -1926,7 +1925,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1926 struct btrfs_root *root, unsigned long count); 1925 struct btrfs_root *root, unsigned long count);
1927int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); 1926int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
1928int btrfs_update_pinned_extents(struct btrfs_root *root, 1927int btrfs_update_pinned_extents(struct btrfs_root *root,
1929 u64 bytenr, u64 num, int pin, int mark_free); 1928 u64 bytenr, u64 num, int pin);
1930int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 1929int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
1931 struct btrfs_root *root, struct extent_buffer *leaf); 1930 struct btrfs_root *root, struct extent_buffer *leaf);
1932int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1931int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
@@ -2011,7 +2010,7 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
2011 u64 bytes); 2010 u64 bytes);
2012void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 2011void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
2013 u64 bytes); 2012 u64 bytes);
2014void btrfs_free_super_mirror_extents(struct btrfs_fs_info *info); 2013void btrfs_free_pinned_extents(struct btrfs_fs_info *info);
2015/* ctree.c */ 2014/* ctree.c */
2016int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2015int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
2017 int level, int *slot); 2016 int level, int *slot);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ec2c915f7f4a..c658397c7473 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1567,7 +1567,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1567 atomic_set(&fs_info->async_delalloc_pages, 0); 1567 atomic_set(&fs_info->async_delalloc_pages, 0);
1568 atomic_set(&fs_info->async_submit_draining, 0); 1568 atomic_set(&fs_info->async_submit_draining, 0);
1569 atomic_set(&fs_info->nr_async_bios, 0); 1569 atomic_set(&fs_info->nr_async_bios, 0);
1570 atomic_set(&fs_info->async_caching_threads, 0);
1571 fs_info->sb = sb; 1570 fs_info->sb = sb;
1572 fs_info->max_extent = (u64)-1; 1571 fs_info->max_extent = (u64)-1;
1573 fs_info->max_inline = 8192 * 1024; 1572 fs_info->max_inline = 8192 * 1024;
@@ -2339,7 +2338,7 @@ int close_ctree(struct btrfs_root *root)
2339 free_extent_buffer(root->fs_info->csum_root->commit_root); 2338 free_extent_buffer(root->fs_info->csum_root->commit_root);
2340 2339
2341 btrfs_free_block_groups(root->fs_info); 2340 btrfs_free_block_groups(root->fs_info);
2342 btrfs_free_super_mirror_extents(root->fs_info); 2341 btrfs_free_pinned_extents(root->fs_info);
2343 2342
2344 del_fs_roots(fs_info); 2343 del_fs_roots(fs_info);
2345 2344
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 508df5f7d2ea..08188f1615d9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -153,18 +153,26 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
153 return ret; 153 return ret;
154} 154}
155 155
156void btrfs_free_super_mirror_extents(struct btrfs_fs_info *info) 156/*
157 * We always set EXTENT_LOCKED for the super mirror extents so we don't
158 * overwrite them, so those bits need to be unset. Also, if we are unmounting
159 * with pinned extents still sitting there because we had a block group caching,
160 * we need to clear those now, since we are done.
161 */
162void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
157{ 163{
158 u64 start, end, last = 0; 164 u64 start, end, last = 0;
159 int ret; 165 int ret;
160 166
161 while (1) { 167 while (1) {
162 ret = find_first_extent_bit(&info->pinned_extents, last, 168 ret = find_first_extent_bit(&info->pinned_extents, last,
163 &start, &end, EXTENT_LOCKED); 169 &start, &end,
170 EXTENT_LOCKED|EXTENT_DIRTY);
164 if (ret) 171 if (ret)
165 break; 172 break;
166 173
167 unlock_extent(&info->pinned_extents, start, end, GFP_NOFS); 174 clear_extent_bits(&info->pinned_extents, start, end,
175 EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS);
168 last = end+1; 176 last = end+1;
169 } 177 }
170} 178}
@@ -209,8 +217,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
209 while (start < end) { 217 while (start < end) {
210 ret = find_first_extent_bit(&info->pinned_extents, start, 218 ret = find_first_extent_bit(&info->pinned_extents, start,
211 &extent_start, &extent_end, 219 &extent_start, &extent_end,
212 EXTENT_DIRTY|EXTENT_LOCKED| 220 EXTENT_DIRTY|EXTENT_LOCKED);
213 EXTENT_DELALLOC);
214 if (ret) 221 if (ret)
215 break; 222 break;
216 223
@@ -238,67 +245,6 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
238 return total_added; 245 return total_added;
239} 246}
240 247
241DEFINE_MUTEX(discard_mutex);
242
243/*
244 * if async kthreads are running when we cross transactions, we mark any pinned
245 * extents with EXTENT_DELALLOC and then let the caching kthreads clean up those
246 * extents when they are done. Also we run this from btrfs_finish_extent_commit
247 * in case there were some pinned extents that were missed because we had
248 * already cached that block group.
249 */
250static void btrfs_discard_pinned_extents(struct btrfs_fs_info *fs_info,
251 struct btrfs_block_group_cache *cache)
252{
253 u64 start, end, last;
254 int ret;
255
256 if (!cache)
257 last = 0;
258 else
259 last = cache->key.objectid;
260
261 mutex_lock(&discard_mutex);
262 while (1) {
263 ret = find_first_extent_bit(&fs_info->pinned_extents, last,
264 &start, &end, EXTENT_DELALLOC);
265 if (ret)
266 break;
267
268 if (cache && start >= cache->key.objectid + cache->key.offset)
269 break;
270
271
272 if (!cache) {
273 cache = btrfs_lookup_block_group(fs_info, start);
274 BUG_ON(!cache);
275
276 start = max(start, cache->key.objectid);
277 end = min(end, cache->key.objectid + cache->key.offset - 1);
278
279 if (block_group_cache_done(cache))
280 btrfs_add_free_space(cache, start,
281 end - start + 1);
282 cache = NULL;
283 } else {
284 start = max(start, cache->key.objectid);
285 end = min(end, cache->key.objectid + cache->key.offset - 1);
286 btrfs_add_free_space(cache, start, end - start + 1);
287 }
288
289 clear_extent_bits(&fs_info->pinned_extents, start, end,
290 EXTENT_DELALLOC, GFP_NOFS);
291 last = end + 1;
292
293 if (need_resched()) {
294 mutex_unlock(&discard_mutex);
295 cond_resched();
296 mutex_lock(&discard_mutex);
297 }
298 }
299 mutex_unlock(&discard_mutex);
300}
301
302static int caching_kthread(void *data) 248static int caching_kthread(void *data)
303{ 249{
304 struct btrfs_block_group_cache *block_group = data; 250 struct btrfs_block_group_cache *block_group = data;
@@ -317,7 +263,6 @@ static int caching_kthread(void *data)
317 if (!path) 263 if (!path)
318 return -ENOMEM; 264 return -ENOMEM;
319 265
320 atomic_inc(&fs_info->async_caching_threads);
321 atomic_inc(&block_group->space_info->caching_threads); 266 atomic_inc(&block_group->space_info->caching_threads);
322 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 267 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
323again: 268again:
@@ -399,13 +344,9 @@ next:
399err: 344err:
400 btrfs_free_path(path); 345 btrfs_free_path(path);
401 up_read(&fs_info->extent_root->commit_root_sem); 346 up_read(&fs_info->extent_root->commit_root_sem);
402 atomic_dec(&fs_info->async_caching_threads);
403 atomic_dec(&block_group->space_info->caching_threads); 347 atomic_dec(&block_group->space_info->caching_threads);
404 wake_up(&block_group->caching_q); 348 wake_up(&block_group->caching_q);
405 349
406 if (!ret)
407 btrfs_discard_pinned_extents(fs_info, block_group);
408
409 return 0; 350 return 0;
410} 351}
411 352
@@ -1867,7 +1808,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1867 BUG_ON(ret); 1808 BUG_ON(ret);
1868 } 1809 }
1869 btrfs_update_pinned_extents(root, node->bytenr, 1810 btrfs_update_pinned_extents(root, node->bytenr,
1870 node->num_bytes, 1, 0); 1811 node->num_bytes, 1);
1871 update_reserved_extents(root, node->bytenr, 1812 update_reserved_extents(root, node->bytenr,
1872 node->num_bytes, 0); 1813 node->num_bytes, 0);
1873 } 1814 }
@@ -3100,19 +3041,15 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
3100} 3041}
3101 3042
3102int btrfs_update_pinned_extents(struct btrfs_root *root, 3043int btrfs_update_pinned_extents(struct btrfs_root *root,
3103 u64 bytenr, u64 num, int pin, int mark_free) 3044 u64 bytenr, u64 num, int pin)
3104{ 3045{
3105 u64 len; 3046 u64 len;
3106 struct btrfs_block_group_cache *cache; 3047 struct btrfs_block_group_cache *cache;
3107 struct btrfs_fs_info *fs_info = root->fs_info; 3048 struct btrfs_fs_info *fs_info = root->fs_info;
3108 3049
3109 if (pin) { 3050 if (pin)
3110 set_extent_dirty(&fs_info->pinned_extents, 3051 set_extent_dirty(&fs_info->pinned_extents,
3111 bytenr, bytenr + num - 1, GFP_NOFS); 3052 bytenr, bytenr + num - 1, GFP_NOFS);
3112 } else {
3113 clear_extent_dirty(&fs_info->pinned_extents,
3114 bytenr, bytenr + num - 1, GFP_NOFS);
3115 }
3116 3053
3117 while (num > 0) { 3054 while (num > 0) {
3118 cache = btrfs_lookup_block_group(fs_info, bytenr); 3055 cache = btrfs_lookup_block_group(fs_info, bytenr);
@@ -3128,14 +3065,34 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
3128 spin_unlock(&cache->space_info->lock); 3065 spin_unlock(&cache->space_info->lock);
3129 fs_info->total_pinned += len; 3066 fs_info->total_pinned += len;
3130 } else { 3067 } else {
3068 int unpin = 0;
3069
3070 /*
3071 * in order to not race with the block group caching, we
3072 * only want to unpin the extent if we are cached. If
3073 * we aren't cached, we want to start async caching this
3074 * block group so we can free the extent the next time
3075 * around.
3076 */
3131 spin_lock(&cache->space_info->lock); 3077 spin_lock(&cache->space_info->lock);
3132 spin_lock(&cache->lock); 3078 spin_lock(&cache->lock);
3133 cache->pinned -= len; 3079 unpin = (cache->cached == BTRFS_CACHE_FINISHED);
3134 cache->space_info->bytes_pinned -= len; 3080 if (likely(unpin)) {
3081 cache->pinned -= len;
3082 cache->space_info->bytes_pinned -= len;
3083 fs_info->total_pinned -= len;
3084 }
3135 spin_unlock(&cache->lock); 3085 spin_unlock(&cache->lock);
3136 spin_unlock(&cache->space_info->lock); 3086 spin_unlock(&cache->space_info->lock);
3137 fs_info->total_pinned -= len; 3087
3138 if (block_group_cache_done(cache) && mark_free) 3088 if (likely(unpin))
3089 clear_extent_dirty(&fs_info->pinned_extents,
3090 bytenr, bytenr + len -1,
3091 GFP_NOFS);
3092 else
3093 cache_block_group(cache);
3094
3095 if (unpin)
3139 btrfs_add_free_space(cache, bytenr, len); 3096 btrfs_add_free_space(cache, bytenr, len);
3140 } 3097 }
3141 btrfs_put_block_group(cache); 3098 btrfs_put_block_group(cache);
@@ -3181,27 +3138,15 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
3181 u64 last = 0; 3138 u64 last = 0;
3182 u64 start; 3139 u64 start;
3183 u64 end; 3140 u64 end;
3184 bool caching_kthreads = false;
3185 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; 3141 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
3186 int ret; 3142 int ret;
3187 3143
3188 if (atomic_read(&root->fs_info->async_caching_threads))
3189 caching_kthreads = true;
3190
3191 while (1) { 3144 while (1) {
3192 ret = find_first_extent_bit(pinned_extents, last, 3145 ret = find_first_extent_bit(pinned_extents, last,
3193 &start, &end, EXTENT_DIRTY); 3146 &start, &end, EXTENT_DIRTY);
3194 if (ret) 3147 if (ret)
3195 break; 3148 break;
3196 3149
3197 /*
3198 * we need to make sure that the pinned extents don't go away
3199 * while we are caching block groups
3200 */
3201 if (unlikely(caching_kthreads))
3202 set_extent_delalloc(pinned_extents, start, end,
3203 GFP_NOFS);
3204
3205 set_extent_dirty(copy, start, end, GFP_NOFS); 3150 set_extent_dirty(copy, start, end, GFP_NOFS);
3206 last = end + 1; 3151 last = end + 1;
3207 } 3152 }
@@ -3215,12 +3160,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3215 u64 start; 3160 u64 start;
3216 u64 end; 3161 u64 end;
3217 int ret; 3162 int ret;
3218 int mark_free = 1;
3219
3220 ret = find_first_extent_bit(&root->fs_info->pinned_extents, 0,
3221 &start, &end, EXTENT_DELALLOC);
3222 if (!ret)
3223 mark_free = 0;
3224 3163
3225 while (1) { 3164 while (1) {
3226 ret = find_first_extent_bit(unpin, 0, &start, &end, 3165 ret = find_first_extent_bit(unpin, 0, &start, &end,
@@ -3231,16 +3170,12 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3231 ret = btrfs_discard_extent(root, start, end + 1 - start); 3170 ret = btrfs_discard_extent(root, start, end + 1 - start);
3232 3171
3233 /* unlocks the pinned mutex */ 3172 /* unlocks the pinned mutex */
3234 btrfs_update_pinned_extents(root, start, end + 1 - start, 0, 3173 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
3235 mark_free);
3236 clear_extent_dirty(unpin, start, end, GFP_NOFS); 3174 clear_extent_dirty(unpin, start, end, GFP_NOFS);
3237 3175
3238 cond_resched(); 3176 cond_resched();
3239 } 3177 }
3240 3178
3241 if (unlikely(!mark_free))
3242 btrfs_discard_pinned_extents(root->fs_info, NULL);
3243
3244 return ret; 3179 return ret;
3245} 3180}
3246 3181
@@ -3281,7 +3216,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
3281pinit: 3216pinit:
3282 btrfs_set_path_blocking(path); 3217 btrfs_set_path_blocking(path);
3283 /* unlocks the pinned mutex */ 3218 /* unlocks the pinned mutex */
3284 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1, 0); 3219 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
3285 3220
3286 BUG_ON(err < 0); 3221 BUG_ON(err < 0);
3287 return 0; 3222 return 0;
@@ -3592,7 +3527,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
3592 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 3527 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
3593 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 3528 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
3594 /* unlocks the pinned mutex */ 3529 /* unlocks the pinned mutex */
3595 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1, 0); 3530 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
3596 update_reserved_extents(root, bytenr, num_bytes, 0); 3531 update_reserved_extents(root, bytenr, num_bytes, 0);
3597 ret = 0; 3532 ret = 0;
3598 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 3533 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 11d0787c6188..d91b0de7c502 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -264,7 +264,7 @@ static int process_one_buffer(struct btrfs_root *log,
264{ 264{
265 if (wc->pin) 265 if (wc->pin)
266 btrfs_update_pinned_extents(log->fs_info->extent_root, 266 btrfs_update_pinned_extents(log->fs_info->extent_root,
267 eb->start, eb->len, 1, 0); 267 eb->start, eb->len, 1);
268 268
269 if (btrfs_buffer_uptodate(eb, gen)) { 269 if (btrfs_buffer_uptodate(eb, gen)) {
270 if (wc->write) 270 if (wc->write)