aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <clm@fb.com>2015-10-21 22:00:38 -0400
committerChris Mason <clm@fb.com>2015-10-21 22:00:38 -0400
commita9e6d153563d2ed69c6cd7fb4fa5ce4ca7c712eb (patch)
tree43fab2a259934e1f1dde9607610424d6089dba5e
parent56fa9d0762ed17153c1bdff3c0aeeecbe522b504 (diff)
parent0584f718ed1f351fca5047a4b1ebba9b5ea41215 (diff)
Merge branch 'allocator-fixes' into for-linus-4.4
Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/backref.c6
-rw-r--r--fs/btrfs/ctree.h21
-rw-r--r--fs/btrfs/disk-io.c20
-rw-r--r--fs/btrfs/extent-tree.c272
-rw-r--r--fs/btrfs/free-space-cache.c67
-rw-r--r--fs/btrfs/free-space-cache.h1
-rw-r--r--fs/btrfs/inode.c9
-rw-r--r--fs/btrfs/ordered-data.c64
-rw-r--r--fs/btrfs/ordered-data.h2
-rw-r--r--fs/btrfs/super.c30
-rw-r--r--fs/btrfs/tests/free-space-tests.c22
-rw-r--r--fs/btrfs/transaction.c52
-rw-r--r--fs/btrfs/transaction.h14
-rw-r--r--fs/btrfs/volumes.c2
14 files changed, 459 insertions, 123 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index ecbc63d3143e..2adc152a32e3 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -362,6 +362,12 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
362 goto out; 362 goto out;
363 } 363 }
364 364
365 if (btrfs_test_is_dummy_root(root)) {
366 srcu_read_unlock(&fs_info->subvol_srcu, index);
367 ret = -ENOENT;
368 goto out;
369 }
370
365 if (path->search_commit_root) 371 if (path->search_commit_root)
366 root_level = btrfs_header_level(root->commit_root); 372 root_level = btrfs_header_level(root->commit_root);
367 else if (time_seq == (u64)-1) 373 else if (time_seq == (u64)-1)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2135b82a6b61..bc3c711e82f2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1154,6 +1154,10 @@ struct btrfs_space_info {
1154 delalloc/allocations */ 1154 delalloc/allocations */
1155 u64 bytes_readonly; /* total bytes that are read only */ 1155 u64 bytes_readonly; /* total bytes that are read only */
1156 1156
1157 u64 max_extent_size; /* This will hold the maximum extent size of
1158 the space info if we had an ENOSPC in the
1159 allocator. */
1160
1157 unsigned int full:1; /* indicates that we cannot allocate any more 1161 unsigned int full:1; /* indicates that we cannot allocate any more
1158 chunks for this space */ 1162 chunks for this space */
1159 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ 1163 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
@@ -1228,6 +1232,9 @@ struct btrfs_free_cluster {
1228 /* first extent starting offset */ 1232 /* first extent starting offset */
1229 u64 window_start; 1233 u64 window_start;
1230 1234
1235 /* We did a full search and couldn't create a cluster */
1236 bool fragmented;
1237
1231 struct btrfs_block_group_cache *block_group; 1238 struct btrfs_block_group_cache *block_group;
1232 /* 1239 /*
1233 * when a cluster is allocated from a block group, we put the 1240 * when a cluster is allocated from a block group, we put the
@@ -2148,6 +2155,8 @@ struct btrfs_ioctl_defrag_range_args {
2148#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) 2155#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
2149#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) 2156#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
2150#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) 2157#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23)
2158#define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24)
2159#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
2151 2160
2152#define BTRFS_DEFAULT_COMMIT_INTERVAL (30) 2161#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
2153#define BTRFS_DEFAULT_MAX_INLINE (8192) 2162#define BTRFS_DEFAULT_MAX_INLINE (8192)
@@ -2172,6 +2181,18 @@ struct btrfs_ioctl_defrag_range_args {
2172 btrfs_clear_opt(root->fs_info->mount_opt, opt); \ 2181 btrfs_clear_opt(root->fs_info->mount_opt, opt); \
2173} 2182}
2174 2183
2184#ifdef CONFIG_BTRFS_DEBUG
2185static inline int
2186btrfs_should_fragment_free_space(struct btrfs_root *root,
2187 struct btrfs_block_group_cache *block_group)
2188{
2189 return (btrfs_test_opt(root, FRAGMENT_METADATA) &&
2190 block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
2191 (btrfs_test_opt(root, FRAGMENT_DATA) &&
2192 block_group->flags & BTRFS_BLOCK_GROUP_DATA);
2193}
2194#endif
2195
2175/* 2196/*
2176 * Requests for changes that need to be done during transaction commit. 2197 * Requests for changes that need to be done during transaction commit.
2177 * 2198 *
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0e0544e0e20b..86a11a902fcf 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4327,25 +4327,6 @@ again:
4327 return 0; 4327 return 0;
4328} 4328}
4329 4329
4330static void btrfs_free_pending_ordered(struct btrfs_transaction *cur_trans,
4331 struct btrfs_fs_info *fs_info)
4332{
4333 struct btrfs_ordered_extent *ordered;
4334
4335 spin_lock(&fs_info->trans_lock);
4336 while (!list_empty(&cur_trans->pending_ordered)) {
4337 ordered = list_first_entry(&cur_trans->pending_ordered,
4338 struct btrfs_ordered_extent,
4339 trans_list);
4340 list_del_init(&ordered->trans_list);
4341 spin_unlock(&fs_info->trans_lock);
4342
4343 btrfs_put_ordered_extent(ordered);
4344 spin_lock(&fs_info->trans_lock);
4345 }
4346 spin_unlock(&fs_info->trans_lock);
4347}
4348
4349void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, 4330void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
4350 struct btrfs_root *root) 4331 struct btrfs_root *root)
4351{ 4332{
@@ -4357,7 +4338,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
4357 cur_trans->state = TRANS_STATE_UNBLOCKED; 4338 cur_trans->state = TRANS_STATE_UNBLOCKED;
4358 wake_up(&root->fs_info->transaction_wait); 4339 wake_up(&root->fs_info->transaction_wait);
4359 4340
4360 btrfs_free_pending_ordered(cur_trans, root->fs_info);
4361 btrfs_destroy_delayed_inodes(root); 4341 btrfs_destroy_delayed_inodes(root);
4362 btrfs_assert_delayed_root_empty(root); 4342 btrfs_assert_delayed_root_empty(root);
4363 4343
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 46609607789b..92fdbc6b89e7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -332,6 +332,27 @@ static void put_caching_control(struct btrfs_caching_control *ctl)
332 kfree(ctl); 332 kfree(ctl);
333} 333}
334 334
335#ifdef CONFIG_BTRFS_DEBUG
336static void fragment_free_space(struct btrfs_root *root,
337 struct btrfs_block_group_cache *block_group)
338{
339 u64 start = block_group->key.objectid;
340 u64 len = block_group->key.offset;
341 u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
342 root->nodesize : root->sectorsize;
343 u64 step = chunk << 1;
344
345 while (len > chunk) {
346 btrfs_remove_free_space(block_group, start, chunk);
347 start += step;
348 if (len < step)
349 len = 0;
350 else
351 len -= step;
352 }
353}
354#endif
355
335/* 356/*
336 * this is only called by cache_block_group, since we could have freed extents 357 * this is only called by cache_block_group, since we could have freed extents
337 * we need to check the pinned_extents for any extents that can't be used yet 358 * we need to check the pinned_extents for any extents that can't be used yet
@@ -388,6 +409,7 @@ static noinline void caching_thread(struct btrfs_work *work)
388 u64 last = 0; 409 u64 last = 0;
389 u32 nritems; 410 u32 nritems;
390 int ret = -ENOMEM; 411 int ret = -ENOMEM;
412 bool wakeup = true;
391 413
392 caching_ctl = container_of(work, struct btrfs_caching_control, work); 414 caching_ctl = container_of(work, struct btrfs_caching_control, work);
393 block_group = caching_ctl->block_group; 415 block_group = caching_ctl->block_group;
@@ -400,6 +422,15 @@ static noinline void caching_thread(struct btrfs_work *work)
400 422
401 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 423 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
402 424
425#ifdef CONFIG_BTRFS_DEBUG
426 /*
427 * If we're fragmenting we don't want to make anybody think we can
428 * allocate from this block group until we've had a chance to fragment
429 * the free space.
430 */
431 if (btrfs_should_fragment_free_space(extent_root, block_group))
432 wakeup = false;
433#endif
403 /* 434 /*
404 * We don't want to deadlock with somebody trying to allocate a new 435 * We don't want to deadlock with somebody trying to allocate a new
405 * extent for the extent root while also trying to search the extent 436 * extent for the extent root while also trying to search the extent
@@ -441,7 +472,8 @@ next:
441 472
442 if (need_resched() || 473 if (need_resched() ||
443 rwsem_is_contended(&fs_info->commit_root_sem)) { 474 rwsem_is_contended(&fs_info->commit_root_sem)) {
444 caching_ctl->progress = last; 475 if (wakeup)
476 caching_ctl->progress = last;
445 btrfs_release_path(path); 477 btrfs_release_path(path);
446 up_read(&fs_info->commit_root_sem); 478 up_read(&fs_info->commit_root_sem);
447 mutex_unlock(&caching_ctl->mutex); 479 mutex_unlock(&caching_ctl->mutex);
@@ -464,7 +496,8 @@ next:
464 key.offset = 0; 496 key.offset = 0;
465 key.type = BTRFS_EXTENT_ITEM_KEY; 497 key.type = BTRFS_EXTENT_ITEM_KEY;
466 498
467 caching_ctl->progress = last; 499 if (wakeup)
500 caching_ctl->progress = last;
468 btrfs_release_path(path); 501 btrfs_release_path(path);
469 goto next; 502 goto next;
470 } 503 }
@@ -491,7 +524,8 @@ next:
491 524
492 if (total_found > (1024 * 1024 * 2)) { 525 if (total_found > (1024 * 1024 * 2)) {
493 total_found = 0; 526 total_found = 0;
494 wake_up(&caching_ctl->wait); 527 if (wakeup)
528 wake_up(&caching_ctl->wait);
495 } 529 }
496 } 530 }
497 path->slots[0]++; 531 path->slots[0]++;
@@ -501,13 +535,27 @@ next:
501 total_found += add_new_free_space(block_group, fs_info, last, 535 total_found += add_new_free_space(block_group, fs_info, last,
502 block_group->key.objectid + 536 block_group->key.objectid +
503 block_group->key.offset); 537 block_group->key.offset);
504 caching_ctl->progress = (u64)-1;
505
506 spin_lock(&block_group->lock); 538 spin_lock(&block_group->lock);
507 block_group->caching_ctl = NULL; 539 block_group->caching_ctl = NULL;
508 block_group->cached = BTRFS_CACHE_FINISHED; 540 block_group->cached = BTRFS_CACHE_FINISHED;
509 spin_unlock(&block_group->lock); 541 spin_unlock(&block_group->lock);
510 542
543#ifdef CONFIG_BTRFS_DEBUG
544 if (btrfs_should_fragment_free_space(extent_root, block_group)) {
545 u64 bytes_used;
546
547 spin_lock(&block_group->space_info->lock);
548 spin_lock(&block_group->lock);
549 bytes_used = block_group->key.offset -
550 btrfs_block_group_used(&block_group->item);
551 block_group->space_info->bytes_used += bytes_used >> 1;
552 spin_unlock(&block_group->lock);
553 spin_unlock(&block_group->space_info->lock);
554 fragment_free_space(extent_root, block_group);
555 }
556#endif
557
558 caching_ctl->progress = (u64)-1;
511err: 559err:
512 btrfs_free_path(path); 560 btrfs_free_path(path);
513 up_read(&fs_info->commit_root_sem); 561 up_read(&fs_info->commit_root_sem);
@@ -607,6 +655,22 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
607 } 655 }
608 } 656 }
609 spin_unlock(&cache->lock); 657 spin_unlock(&cache->lock);
658#ifdef CONFIG_BTRFS_DEBUG
659 if (ret == 1 &&
660 btrfs_should_fragment_free_space(fs_info->extent_root,
661 cache)) {
662 u64 bytes_used;
663
664 spin_lock(&cache->space_info->lock);
665 spin_lock(&cache->lock);
666 bytes_used = cache->key.offset -
667 btrfs_block_group_used(&cache->item);
668 cache->space_info->bytes_used += bytes_used >> 1;
669 spin_unlock(&cache->lock);
670 spin_unlock(&cache->space_info->lock);
671 fragment_free_space(fs_info->extent_root, cache);
672 }
673#endif
610 mutex_unlock(&caching_ctl->mutex); 674 mutex_unlock(&caching_ctl->mutex);
611 675
612 wake_up(&caching_ctl->wait); 676 wake_up(&caching_ctl->wait);
@@ -3344,6 +3408,15 @@ again:
3344 spin_unlock(&block_group->lock); 3408 spin_unlock(&block_group->lock);
3345 3409
3346 /* 3410 /*
3411 * We hit an ENOSPC when setting up the cache in this transaction, just
3412 * skip doing the setup, we've already cleared the cache so we're safe.
3413 */
3414 if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) {
3415 ret = -ENOSPC;
3416 goto out_put;
3417 }
3418
3419 /*
3347 * Try to preallocate enough space based on how big the block group is. 3420 * Try to preallocate enough space based on how big the block group is.
3348 * Keep in mind this has to include any pinned space which could end up 3421 * Keep in mind this has to include any pinned space which could end up
3349 * taking up quite a bit since it's not folded into the other space 3422 * taking up quite a bit since it's not folded into the other space
@@ -3363,8 +3436,18 @@ again:
3363 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, 3436 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
3364 num_pages, num_pages, 3437 num_pages, num_pages,
3365 &alloc_hint); 3438 &alloc_hint);
3439 /*
3440 * Our cache requires contiguous chunks so that we don't modify a bunch
3441 * of metadata or split extents when writing the cache out, which means
3442 * we can enospc if we are heavily fragmented in addition to just normal
3443 * out of space conditions. So if we hit this just skip setting up any
3444 * other block groups for this transaction, maybe we'll unpin enough
3445 * space the next time around.
3446 */
3366 if (!ret) 3447 if (!ret)
3367 dcs = BTRFS_DC_SETUP; 3448 dcs = BTRFS_DC_SETUP;
3449 else if (ret == -ENOSPC)
3450 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
3368 btrfs_free_reserved_data_space(inode, 0, num_pages); 3451 btrfs_free_reserved_data_space(inode, 0, num_pages);
3369 3452
3370out_put: 3453out_put:
@@ -3751,6 +3834,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3751 found->bytes_readonly = 0; 3834 found->bytes_readonly = 0;
3752 found->bytes_may_use = 0; 3835 found->bytes_may_use = 0;
3753 found->full = 0; 3836 found->full = 0;
3837 found->max_extent_size = 0;
3754 found->force_alloc = CHUNK_ALLOC_NO_FORCE; 3838 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3755 found->chunk_alloc = 0; 3839 found->chunk_alloc = 0;
3756 found->flush = 0; 3840 found->flush = 0;
@@ -4003,7 +4087,8 @@ commit_trans:
4003 if (IS_ERR(trans)) 4087 if (IS_ERR(trans))
4004 return PTR_ERR(trans); 4088 return PTR_ERR(trans);
4005 if (have_pinned_space >= 0 || 4089 if (have_pinned_space >= 0 ||
4006 trans->transaction->have_free_bgs || 4090 test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
4091 &trans->transaction->flags) ||
4007 need_commit > 0) { 4092 need_commit > 0) {
4008 ret = btrfs_commit_transaction(trans, root); 4093 ret = btrfs_commit_transaction(trans, root);
4009 if (ret) 4094 if (ret)
@@ -6112,6 +6197,34 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
6112 update_global_block_rsv(fs_info); 6197 update_global_block_rsv(fs_info);
6113} 6198}
6114 6199
6200/*
6201 * Returns the free cluster for the given space info and sets empty_cluster to
6202 * what it should be based on the mount options.
6203 */
6204static struct btrfs_free_cluster *
6205fetch_cluster_info(struct btrfs_root *root, struct btrfs_space_info *space_info,
6206 u64 *empty_cluster)
6207{
6208 struct btrfs_free_cluster *ret = NULL;
6209 bool ssd = btrfs_test_opt(root, SSD);
6210
6211 *empty_cluster = 0;
6212 if (btrfs_mixed_space_info(space_info))
6213 return ret;
6214
6215 if (ssd)
6216 *empty_cluster = 2 * 1024 * 1024;
6217 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
6218 ret = &root->fs_info->meta_alloc_cluster;
6219 if (!ssd)
6220 *empty_cluster = 64 * 1024;
6221 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) && ssd) {
6222 ret = &root->fs_info->data_alloc_cluster;
6223 }
6224
6225 return ret;
6226}
6227
6115static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, 6228static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
6116 const bool return_free_space) 6229 const bool return_free_space)
6117{ 6230{
@@ -6119,7 +6232,10 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
6119 struct btrfs_block_group_cache *cache = NULL; 6232 struct btrfs_block_group_cache *cache = NULL;
6120 struct btrfs_space_info *space_info; 6233 struct btrfs_space_info *space_info;
6121 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; 6234 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
6235 struct btrfs_free_cluster *cluster = NULL;
6122 u64 len; 6236 u64 len;
6237 u64 total_unpinned = 0;
6238 u64 empty_cluster = 0;
6123 bool readonly; 6239 bool readonly;
6124 6240
6125 while (start <= end) { 6241 while (start <= end) {
@@ -6128,8 +6244,14 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
6128 start >= cache->key.objectid + cache->key.offset) { 6244 start >= cache->key.objectid + cache->key.offset) {
6129 if (cache) 6245 if (cache)
6130 btrfs_put_block_group(cache); 6246 btrfs_put_block_group(cache);
6247 total_unpinned = 0;
6131 cache = btrfs_lookup_block_group(fs_info, start); 6248 cache = btrfs_lookup_block_group(fs_info, start);
6132 BUG_ON(!cache); /* Logic error */ 6249 BUG_ON(!cache); /* Logic error */
6250
6251 cluster = fetch_cluster_info(root,
6252 cache->space_info,
6253 &empty_cluster);
6254 empty_cluster <<= 1;
6133 } 6255 }
6134 6256
6135 len = cache->key.objectid + cache->key.offset - start; 6257 len = cache->key.objectid + cache->key.offset - start;
@@ -6142,12 +6264,27 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
6142 } 6264 }
6143 6265
6144 start += len; 6266 start += len;
6267 total_unpinned += len;
6145 space_info = cache->space_info; 6268 space_info = cache->space_info;
6146 6269
6270 /*
6271 * If this space cluster has been marked as fragmented and we've
6272 * unpinned enough in this block group to potentially allow a
6273 * cluster to be created inside of it go ahead and clear the
6274 * fragmented check.
6275 */
6276 if (cluster && cluster->fragmented &&
6277 total_unpinned > empty_cluster) {
6278 spin_lock(&cluster->lock);
6279 cluster->fragmented = 0;
6280 spin_unlock(&cluster->lock);
6281 }
6282
6147 spin_lock(&space_info->lock); 6283 spin_lock(&space_info->lock);
6148 spin_lock(&cache->lock); 6284 spin_lock(&cache->lock);
6149 cache->pinned -= len; 6285 cache->pinned -= len;
6150 space_info->bytes_pinned -= len; 6286 space_info->bytes_pinned -= len;
6287 space_info->max_extent_size = 0;
6151 percpu_counter_add(&space_info->total_bytes_pinned, -len); 6288 percpu_counter_add(&space_info->total_bytes_pinned, -len);
6152 if (cache->ro) { 6289 if (cache->ro) {
6153 space_info->bytes_readonly += len; 6290 space_info->bytes_readonly += len;
@@ -6880,7 +7017,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6880 struct btrfs_block_group_cache *block_group = NULL; 7017 struct btrfs_block_group_cache *block_group = NULL;
6881 u64 search_start = 0; 7018 u64 search_start = 0;
6882 u64 max_extent_size = 0; 7019 u64 max_extent_size = 0;
6883 int empty_cluster = 2 * 1024 * 1024; 7020 u64 empty_cluster = 0;
6884 struct btrfs_space_info *space_info; 7021 struct btrfs_space_info *space_info;
6885 int loop = 0; 7022 int loop = 0;
6886 int index = __get_raid_index(flags); 7023 int index = __get_raid_index(flags);
@@ -6890,6 +7027,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6890 bool failed_alloc = false; 7027 bool failed_alloc = false;
6891 bool use_cluster = true; 7028 bool use_cluster = true;
6892 bool have_caching_bg = false; 7029 bool have_caching_bg = false;
7030 bool full_search = false;
6893 7031
6894 WARN_ON(num_bytes < root->sectorsize); 7032 WARN_ON(num_bytes < root->sectorsize);
6895 ins->type = BTRFS_EXTENT_ITEM_KEY; 7033 ins->type = BTRFS_EXTENT_ITEM_KEY;
@@ -6905,36 +7043,47 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6905 } 7043 }
6906 7044
6907 /* 7045 /*
6908 * If the space info is for both data and metadata it means we have a 7046 * If our free space is heavily fragmented we may not be able to make
6909 * small filesystem and we can't use the clustering stuff. 7047 * big contiguous allocations, so instead of doing the expensive search
7048 * for free space, simply return ENOSPC with our max_extent_size so we
7049 * can go ahead and search for a more manageable chunk.
7050 *
7051 * If our max_extent_size is large enough for our allocation simply
7052 * disable clustering since we will likely not be able to find enough
7053 * space to create a cluster and induce latency trying.
6910 */ 7054 */
6911 if (btrfs_mixed_space_info(space_info)) 7055 if (unlikely(space_info->max_extent_size)) {
6912 use_cluster = false; 7056 spin_lock(&space_info->lock);
6913 7057 if (space_info->max_extent_size &&
6914 if (flags & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { 7058 num_bytes > space_info->max_extent_size) {
6915 last_ptr = &root->fs_info->meta_alloc_cluster; 7059 ins->offset = space_info->max_extent_size;
6916 if (!btrfs_test_opt(root, SSD)) 7060 spin_unlock(&space_info->lock);
6917 empty_cluster = 64 * 1024; 7061 return -ENOSPC;
6918 } 7062 } else if (space_info->max_extent_size) {
6919 7063 use_cluster = false;
6920 if ((flags & BTRFS_BLOCK_GROUP_DATA) && use_cluster && 7064 }
6921 btrfs_test_opt(root, SSD)) { 7065 spin_unlock(&space_info->lock);
6922 last_ptr = &root->fs_info->data_alloc_cluster;
6923 } 7066 }
6924 7067
7068 last_ptr = fetch_cluster_info(orig_root, space_info, &empty_cluster);
6925 if (last_ptr) { 7069 if (last_ptr) {
6926 spin_lock(&last_ptr->lock); 7070 spin_lock(&last_ptr->lock);
6927 if (last_ptr->block_group) 7071 if (last_ptr->block_group)
6928 hint_byte = last_ptr->window_start; 7072 hint_byte = last_ptr->window_start;
7073 if (last_ptr->fragmented) {
7074 /*
7075 * We still set window_start so we can keep track of the
7076 * last place we found an allocation to try and save
7077 * some time.
7078 */
7079 hint_byte = last_ptr->window_start;
7080 use_cluster = false;
7081 }
6929 spin_unlock(&last_ptr->lock); 7082 spin_unlock(&last_ptr->lock);
6930 } 7083 }
6931 7084
6932 search_start = max(search_start, first_logical_byte(root, 0)); 7085 search_start = max(search_start, first_logical_byte(root, 0));
6933 search_start = max(search_start, hint_byte); 7086 search_start = max(search_start, hint_byte);
6934
6935 if (!last_ptr)
6936 empty_cluster = 0;
6937
6938 if (search_start == hint_byte) { 7087 if (search_start == hint_byte) {
6939 block_group = btrfs_lookup_block_group(root->fs_info, 7088 block_group = btrfs_lookup_block_group(root->fs_info,
6940 search_start); 7089 search_start);
@@ -6969,6 +7118,8 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6969 } 7118 }
6970search: 7119search:
6971 have_caching_bg = false; 7120 have_caching_bg = false;
7121 if (index == 0 || index == __get_raid_index(flags))
7122 full_search = true;
6972 down_read(&space_info->groups_sem); 7123 down_read(&space_info->groups_sem);
6973 list_for_each_entry(block_group, &space_info->block_groups[index], 7124 list_for_each_entry(block_group, &space_info->block_groups[index],
6974 list) { 7125 list) {
@@ -7002,6 +7153,7 @@ search:
7002have_block_group: 7153have_block_group:
7003 cached = block_group_cache_done(block_group); 7154 cached = block_group_cache_done(block_group);
7004 if (unlikely(!cached)) { 7155 if (unlikely(!cached)) {
7156 have_caching_bg = true;
7005 ret = cache_block_group(block_group, 0); 7157 ret = cache_block_group(block_group, 0);
7006 BUG_ON(ret < 0); 7158 BUG_ON(ret < 0);
7007 ret = 0; 7159 ret = 0;
@@ -7016,7 +7168,7 @@ have_block_group:
7016 * Ok we want to try and use the cluster allocator, so 7168 * Ok we want to try and use the cluster allocator, so
7017 * lets look there 7169 * lets look there
7018 */ 7170 */
7019 if (last_ptr) { 7171 if (last_ptr && use_cluster) {
7020 struct btrfs_block_group_cache *used_block_group; 7172 struct btrfs_block_group_cache *used_block_group;
7021 unsigned long aligned_cluster; 7173 unsigned long aligned_cluster;
7022 /* 7174 /*
@@ -7142,6 +7294,16 @@ refill_cluster:
7142 } 7294 }
7143 7295
7144unclustered_alloc: 7296unclustered_alloc:
7297 /*
7298 * We are doing an unclustered alloc, set the fragmented flag so
7299 * we don't bother trying to setup a cluster again until we get
7300 * more space.
7301 */
7302 if (unlikely(last_ptr)) {
7303 spin_lock(&last_ptr->lock);
7304 last_ptr->fragmented = 1;
7305 spin_unlock(&last_ptr->lock);
7306 }
7145 spin_lock(&block_group->free_space_ctl->tree_lock); 7307 spin_lock(&block_group->free_space_ctl->tree_lock);
7146 if (cached && 7308 if (cached &&
7147 block_group->free_space_ctl->free_space < 7309 block_group->free_space_ctl->free_space <
@@ -7174,8 +7336,6 @@ unclustered_alloc:
7174 failed_alloc = true; 7336 failed_alloc = true;
7175 goto have_block_group; 7337 goto have_block_group;
7176 } else if (!offset) { 7338 } else if (!offset) {
7177 if (!cached)
7178 have_caching_bg = true;
7179 goto loop; 7339 goto loop;
7180 } 7340 }
7181checks: 7341checks:
@@ -7232,7 +7392,20 @@ loop:
7232 */ 7392 */
7233 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { 7393 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
7234 index = 0; 7394 index = 0;
7235 loop++; 7395 if (loop == LOOP_CACHING_NOWAIT) {
7396 /*
7397 * We want to skip the LOOP_CACHING_WAIT step if we
7398 * don't have any unached bgs and we've alrelady done a
7399 * full search through.
7400 */
7401 if (have_caching_bg || !full_search)
7402 loop = LOOP_CACHING_WAIT;
7403 else
7404 loop = LOOP_ALLOC_CHUNK;
7405 } else {
7406 loop++;
7407 }
7408
7236 if (loop == LOOP_ALLOC_CHUNK) { 7409 if (loop == LOOP_ALLOC_CHUNK) {
7237 struct btrfs_trans_handle *trans; 7410 struct btrfs_trans_handle *trans;
7238 int exist = 0; 7411 int exist = 0;
@@ -7250,6 +7423,15 @@ loop:
7250 7423
7251 ret = do_chunk_alloc(trans, root, flags, 7424 ret = do_chunk_alloc(trans, root, flags,
7252 CHUNK_ALLOC_FORCE); 7425 CHUNK_ALLOC_FORCE);
7426
7427 /*
7428 * If we can't allocate a new chunk we've already looped
7429 * through at least once, move on to the NO_EMPTY_SIZE
7430 * case.
7431 */
7432 if (ret == -ENOSPC)
7433 loop = LOOP_NO_EMPTY_SIZE;
7434
7253 /* 7435 /*
7254 * Do not bail out on ENOSPC since we 7436 * Do not bail out on ENOSPC since we
7255 * can do more things. 7437 * can do more things.
@@ -7266,6 +7448,15 @@ loop:
7266 } 7448 }
7267 7449
7268 if (loop == LOOP_NO_EMPTY_SIZE) { 7450 if (loop == LOOP_NO_EMPTY_SIZE) {
7451 /*
7452 * Don't loop again if we already have no empty_size and
7453 * no empty_cluster.
7454 */
7455 if (empty_size == 0 &&
7456 empty_cluster == 0) {
7457 ret = -ENOSPC;
7458 goto out;
7459 }
7269 empty_size = 0; 7460 empty_size = 0;
7270 empty_cluster = 0; 7461 empty_cluster = 0;
7271 } 7462 }
@@ -7274,11 +7465,20 @@ loop:
7274 } else if (!ins->objectid) { 7465 } else if (!ins->objectid) {
7275 ret = -ENOSPC; 7466 ret = -ENOSPC;
7276 } else if (ins->objectid) { 7467 } else if (ins->objectid) {
7468 if (!use_cluster && last_ptr) {
7469 spin_lock(&last_ptr->lock);
7470 last_ptr->window_start = ins->objectid;
7471 spin_unlock(&last_ptr->lock);
7472 }
7277 ret = 0; 7473 ret = 0;
7278 } 7474 }
7279out: 7475out:
7280 if (ret == -ENOSPC) 7476 if (ret == -ENOSPC) {
7477 spin_lock(&space_info->lock);
7478 space_info->max_extent_size = max_extent_size;
7479 spin_unlock(&space_info->lock);
7281 ins->offset = max_extent_size; 7480 ins->offset = max_extent_size;
7481 }
7282 return ret; 7482 return ret;
7283} 7483}
7284 7484
@@ -7327,7 +7527,7 @@ int btrfs_reserve_extent(struct btrfs_root *root,
7327 u64 empty_size, u64 hint_byte, 7527 u64 empty_size, u64 hint_byte,
7328 struct btrfs_key *ins, int is_data, int delalloc) 7528 struct btrfs_key *ins, int is_data, int delalloc)
7329{ 7529{
7330 bool final_tried = false; 7530 bool final_tried = num_bytes == min_alloc_size;
7331 u64 flags; 7531 u64 flags;
7332 int ret; 7532 int ret;
7333 7533
@@ -8929,7 +9129,7 @@ again:
8929 * back off and let this transaction commit 9129 * back off and let this transaction commit
8930 */ 9130 */
8931 mutex_lock(&root->fs_info->ro_block_group_mutex); 9131 mutex_lock(&root->fs_info->ro_block_group_mutex);
8932 if (trans->transaction->dirty_bg_run) { 9132 if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
8933 u64 transid = trans->transid; 9133 u64 transid = trans->transid;
8934 9134
8935 mutex_unlock(&root->fs_info->ro_block_group_mutex); 9135 mutex_unlock(&root->fs_info->ro_block_group_mutex);
@@ -9679,6 +9879,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
9679 9879
9680 free_excluded_extents(root, cache); 9880 free_excluded_extents(root, cache);
9681 9881
9882#ifdef CONFIG_BTRFS_DEBUG
9883 if (btrfs_should_fragment_free_space(root, cache)) {
9884 u64 new_bytes_used = size - bytes_used;
9885
9886 bytes_used += new_bytes_used >> 1;
9887 fragment_free_space(root, cache);
9888 }
9889#endif
9682 /* 9890 /*
9683 * Call to ensure the corresponding space_info object is created and 9891 * Call to ensure the corresponding space_info object is created and
9684 * assigned to our block group, but don't update its counters just yet. 9892 * assigned to our block group, but don't update its counters just yet.
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 5a9431dda07f..0948d34cb84a 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1730,7 +1730,7 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1730 */ 1730 */
1731static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1731static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1732 struct btrfs_free_space *bitmap_info, u64 *offset, 1732 struct btrfs_free_space *bitmap_info, u64 *offset,
1733 u64 *bytes) 1733 u64 *bytes, bool for_alloc)
1734{ 1734{
1735 unsigned long found_bits = 0; 1735 unsigned long found_bits = 0;
1736 unsigned long max_bits = 0; 1736 unsigned long max_bits = 0;
@@ -1738,11 +1738,26 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1738 unsigned long next_zero; 1738 unsigned long next_zero;
1739 unsigned long extent_bits; 1739 unsigned long extent_bits;
1740 1740
1741 /*
1742 * Skip searching the bitmap if we don't have a contiguous section that
1743 * is large enough for this allocation.
1744 */
1745 if (for_alloc &&
1746 bitmap_info->max_extent_size &&
1747 bitmap_info->max_extent_size < *bytes) {
1748 *bytes = bitmap_info->max_extent_size;
1749 return -1;
1750 }
1751
1741 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1752 i = offset_to_bit(bitmap_info->offset, ctl->unit,
1742 max_t(u64, *offset, bitmap_info->offset)); 1753 max_t(u64, *offset, bitmap_info->offset));
1743 bits = bytes_to_bits(*bytes, ctl->unit); 1754 bits = bytes_to_bits(*bytes, ctl->unit);
1744 1755
1745 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1756 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
1757 if (for_alloc && bits == 1) {
1758 found_bits = 1;
1759 break;
1760 }
1746 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1761 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1747 BITS_PER_BITMAP, i); 1762 BITS_PER_BITMAP, i);
1748 extent_bits = next_zero - i; 1763 extent_bits = next_zero - i;
@@ -1762,6 +1777,7 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1762 } 1777 }
1763 1778
1764 *bytes = (u64)(max_bits) * ctl->unit; 1779 *bytes = (u64)(max_bits) * ctl->unit;
1780 bitmap_info->max_extent_size = *bytes;
1765 return -1; 1781 return -1;
1766} 1782}
1767 1783
@@ -1813,7 +1829,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1813 if (entry->bitmap) { 1829 if (entry->bitmap) {
1814 u64 size = *bytes; 1830 u64 size = *bytes;
1815 1831
1816 ret = search_bitmap(ctl, entry, &tmp, &size); 1832 ret = search_bitmap(ctl, entry, &tmp, &size, true);
1817 if (!ret) { 1833 if (!ret) {
1818 *offset = tmp; 1834 *offset = tmp;
1819 *bytes = size; 1835 *bytes = size;
@@ -1874,7 +1890,8 @@ again:
1874 search_start = *offset; 1890 search_start = *offset;
1875 search_bytes = ctl->unit; 1891 search_bytes = ctl->unit;
1876 search_bytes = min(search_bytes, end - search_start + 1); 1892 search_bytes = min(search_bytes, end - search_start + 1);
1877 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1893 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes,
1894 false);
1878 if (ret < 0 || search_start != *offset) 1895 if (ret < 0 || search_start != *offset)
1879 return -EINVAL; 1896 return -EINVAL;
1880 1897
@@ -1919,7 +1936,7 @@ again:
1919 search_start = *offset; 1936 search_start = *offset;
1920 search_bytes = ctl->unit; 1937 search_bytes = ctl->unit;
1921 ret = search_bitmap(ctl, bitmap_info, &search_start, 1938 ret = search_bitmap(ctl, bitmap_info, &search_start,
1922 &search_bytes); 1939 &search_bytes, false);
1923 if (ret < 0 || search_start != *offset) 1940 if (ret < 0 || search_start != *offset)
1924 return -EAGAIN; 1941 return -EAGAIN;
1925 1942
@@ -1943,6 +1960,12 @@ static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1943 1960
1944 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1961 bitmap_set_bits(ctl, info, offset, bytes_to_set);
1945 1962
1963 /*
1964 * We set some bytes, we have no idea what the max extent size is
1965 * anymore.
1966 */
1967 info->max_extent_size = 0;
1968
1946 return bytes_to_set; 1969 return bytes_to_set;
1947 1970
1948} 1971}
@@ -1951,12 +1974,19 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1951 struct btrfs_free_space *info) 1974 struct btrfs_free_space *info)
1952{ 1975{
1953 struct btrfs_block_group_cache *block_group = ctl->private; 1976 struct btrfs_block_group_cache *block_group = ctl->private;
1977 bool forced = false;
1978
1979#ifdef CONFIG_BTRFS_DEBUG
1980 if (btrfs_should_fragment_free_space(block_group->fs_info->extent_root,
1981 block_group))
1982 forced = true;
1983#endif
1954 1984
1955 /* 1985 /*
1956 * If we are below the extents threshold then we can add this as an 1986 * If we are below the extents threshold then we can add this as an
1957 * extent, and don't have to deal with the bitmap 1987 * extent, and don't have to deal with the bitmap
1958 */ 1988 */
1959 if (ctl->free_extents < ctl->extents_thresh) { 1989 if (!forced && ctl->free_extents < ctl->extents_thresh) {
1960 /* 1990 /*
1961 * If this block group has some small extents we don't want to 1991 * If this block group has some small extents we don't want to
1962 * use up all of our free slots in the cache with them, we want 1992 * use up all of our free slots in the cache with them, we want
@@ -2661,7 +2691,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2661 search_start = min_start; 2691 search_start = min_start;
2662 search_bytes = bytes; 2692 search_bytes = bytes;
2663 2693
2664 err = search_bitmap(ctl, entry, &search_start, &search_bytes); 2694 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
2665 if (err) { 2695 if (err) {
2666 if (search_bytes > *max_extent_size) 2696 if (search_bytes > *max_extent_size)
2667 *max_extent_size = search_bytes; 2697 *max_extent_size = search_bytes;
@@ -2775,6 +2805,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2775 unsigned long want_bits; 2805 unsigned long want_bits;
2776 unsigned long min_bits; 2806 unsigned long min_bits;
2777 unsigned long found_bits; 2807 unsigned long found_bits;
2808 unsigned long max_bits = 0;
2778 unsigned long start = 0; 2809 unsigned long start = 0;
2779 unsigned long total_found = 0; 2810 unsigned long total_found = 0;
2780 int ret; 2811 int ret;
@@ -2784,6 +2815,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2784 want_bits = bytes_to_bits(bytes, ctl->unit); 2815 want_bits = bytes_to_bits(bytes, ctl->unit);
2785 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2816 min_bits = bytes_to_bits(min_bytes, ctl->unit);
2786 2817
2818 /*
2819 * Don't bother looking for a cluster in this bitmap if it's heavily
2820 * fragmented.
2821 */
2822 if (entry->max_extent_size &&
2823 entry->max_extent_size < cont1_bytes)
2824 return -ENOSPC;
2787again: 2825again:
2788 found_bits = 0; 2826 found_bits = 0;
2789 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2827 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
@@ -2791,13 +2829,19 @@ again:
2791 BITS_PER_BITMAP, i); 2829 BITS_PER_BITMAP, i);
2792 if (next_zero - i >= min_bits) { 2830 if (next_zero - i >= min_bits) {
2793 found_bits = next_zero - i; 2831 found_bits = next_zero - i;
2832 if (found_bits > max_bits)
2833 max_bits = found_bits;
2794 break; 2834 break;
2795 } 2835 }
2836 if (next_zero - i > max_bits)
2837 max_bits = next_zero - i;
2796 i = next_zero; 2838 i = next_zero;
2797 } 2839 }
2798 2840
2799 if (!found_bits) 2841 if (!found_bits) {
2842 entry->max_extent_size = (u64)max_bits * ctl->unit;
2800 return -ENOSPC; 2843 return -ENOSPC;
2844 }
2801 2845
2802 if (!total_found) { 2846 if (!total_found) {
2803 start = i; 2847 start = i;
@@ -3056,6 +3100,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
3056 spin_lock_init(&cluster->refill_lock); 3100 spin_lock_init(&cluster->refill_lock);
3057 cluster->root = RB_ROOT; 3101 cluster->root = RB_ROOT;
3058 cluster->max_size = 0; 3102 cluster->max_size = 0;
3103 cluster->fragmented = false;
3059 INIT_LIST_HEAD(&cluster->block_group_list); 3104 INIT_LIST_HEAD(&cluster->block_group_list);
3060 cluster->block_group = NULL; 3105 cluster->block_group = NULL;
3061} 3106}
@@ -3223,7 +3268,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
3223 } 3268 }
3224 3269
3225 bytes = minlen; 3270 bytes = minlen;
3226 ret2 = search_bitmap(ctl, entry, &start, &bytes); 3271 ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
3227 if (ret2 || start >= end) { 3272 if (ret2 || start >= end) {
3228 spin_unlock(&ctl->tree_lock); 3273 spin_unlock(&ctl->tree_lock);
3229 mutex_unlock(&ctl->cache_writeout_mutex); 3274 mutex_unlock(&ctl->cache_writeout_mutex);
@@ -3376,7 +3421,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
3376 u64 count = 1; 3421 u64 count = 1;
3377 int ret; 3422 int ret;
3378 3423
3379 ret = search_bitmap(ctl, entry, &offset, &count); 3424 ret = search_bitmap(ctl, entry, &offset, &count, true);
3380 /* Logic error; Should be empty if it can't find anything */ 3425 /* Logic error; Should be empty if it can't find anything */
3381 ASSERT(!ret); 3426 ASSERT(!ret);
3382 3427
@@ -3532,6 +3577,7 @@ again:
3532 spin_lock(&ctl->tree_lock); 3577 spin_lock(&ctl->tree_lock);
3533 info->offset = offset; 3578 info->offset = offset;
3534 info->bytes = bytes; 3579 info->bytes = bytes;
3580 info->max_extent_size = 0;
3535 ret = link_free_space(ctl, info); 3581 ret = link_free_space(ctl, info);
3536 spin_unlock(&ctl->tree_lock); 3582 spin_unlock(&ctl->tree_lock);
3537 if (ret) 3583 if (ret)
@@ -3559,6 +3605,7 @@ again:
3559 } 3605 }
3560 3606
3561 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3607 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
3608
3562 bytes -= bytes_added; 3609 bytes -= bytes_added;
3563 offset += bytes_added; 3610 offset += bytes_added;
3564 spin_unlock(&ctl->tree_lock); 3611 spin_unlock(&ctl->tree_lock);
@@ -3602,7 +3649,7 @@ have_info:
3602 3649
3603 bit_off = offset; 3650 bit_off = offset;
3604 bit_bytes = ctl->unit; 3651 bit_bytes = ctl->unit;
3605 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes); 3652 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false);
3606 if (!ret) { 3653 if (!ret) {
3607 if (bit_off == offset) { 3654 if (bit_off == offset) {
3608 ret = 1; 3655 ret = 1;
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index a16a029ad3b1..f251865eb6f3 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -23,6 +23,7 @@ struct btrfs_free_space {
23 struct rb_node offset_index; 23 struct rb_node offset_index;
24 u64 offset; 24 u64 offset;
25 u64 bytes; 25 u64 bytes;
26 u64 max_extent_size;
26 unsigned long *bitmap; 27 unsigned long *bitmap;
27 struct list_head list; 28 struct list_head list;
28}; 29};
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index df6b93f6b393..a018e4707dac 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9745,6 +9745,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
9745 u64 cur_offset = start; 9745 u64 cur_offset = start;
9746 u64 i_size; 9746 u64 i_size;
9747 u64 cur_bytes; 9747 u64 cur_bytes;
9748 u64 last_alloc = (u64)-1;
9748 int ret = 0; 9749 int ret = 0;
9749 bool own_trans = true; 9750 bool own_trans = true;
9750 9751
@@ -9761,6 +9762,13 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
9761 9762
9762 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); 9763 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
9763 cur_bytes = max(cur_bytes, min_size); 9764 cur_bytes = max(cur_bytes, min_size);
9765 /*
9766 * If we are severely fragmented we could end up with really
9767 * small allocations, so if the allocator is returning small
9768 * chunks lets make its job easier by only searching for those
9769 * sized chunks.
9770 */
9771 cur_bytes = min(cur_bytes, last_alloc);
9764 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, 9772 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
9765 *alloc_hint, &ins, 1, 0); 9773 *alloc_hint, &ins, 1, 0);
9766 if (ret) { 9774 if (ret) {
@@ -9769,6 +9777,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
9769 break; 9777 break;
9770 } 9778 }
9771 9779
9780 last_alloc = ins.offset;
9772 ret = insert_reserved_file_extent(trans, inode, 9781 ret = insert_reserved_file_extent(trans, inode,
9773 cur_offset, ins.objectid, 9782 cur_offset, ins.objectid,
9774 ins.offset, ins.offset, 9783 ins.offset, ins.offset,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 071005f008c1..8c27292ea9ea 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -490,15 +490,16 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
490 490
491 spin_lock_irq(&log->log_extents_lock[index]); 491 spin_lock_irq(&log->log_extents_lock[index]);
492 while (!list_empty(&log->logged_list[index])) { 492 while (!list_empty(&log->logged_list[index])) {
493 struct inode *inode;
493 ordered = list_first_entry(&log->logged_list[index], 494 ordered = list_first_entry(&log->logged_list[index],
494 struct btrfs_ordered_extent, 495 struct btrfs_ordered_extent,
495 log_list); 496 log_list);
496 list_del_init(&ordered->log_list); 497 list_del_init(&ordered->log_list);
498 inode = ordered->inode;
497 spin_unlock_irq(&log->log_extents_lock[index]); 499 spin_unlock_irq(&log->log_extents_lock[index]);
498 500
499 if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && 501 if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
500 !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { 502 !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
501 struct inode *inode = ordered->inode;
502 u64 start = ordered->file_offset; 503 u64 start = ordered->file_offset;
503 u64 end = ordered->file_offset + ordered->len - 1; 504 u64 end = ordered->file_offset + ordered->len - 1;
504 505
@@ -509,20 +510,25 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
509 &ordered->flags)); 510 &ordered->flags));
510 511
511 /* 512 /*
512 * If our ordered extent completed it means it updated the 513 * In order to keep us from losing our ordered extent
513 * fs/subvol and csum trees already, so no need to make the 514 * information when committing the transaction we have to make
514 * current transaction's commit wait for it, as we end up 515 * sure that any logged extents are completed when we go to
515 * holding memory unnecessarily and delaying the inode's iput 516 * commit the transaction. To do this we simply increase the
516 * until the transaction commit (we schedule an iput for the 517 * current transactions pending_ordered counter and decrement it
517 * inode when the ordered extent's refcount drops to 0), which 518 * when the ordered extent completes.
518 * prevents it from being evictable until the transaction
519 * commits.
520 */ 519 */
521 if (test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) 520 if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
522 btrfs_put_ordered_extent(ordered); 521 struct btrfs_ordered_inode_tree *tree;
523 else 522
524 list_add_tail(&ordered->trans_list, &trans->ordered); 523 tree = &BTRFS_I(inode)->ordered_tree;
525 524 spin_lock_irq(&tree->lock);
525 if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
526 set_bit(BTRFS_ORDERED_PENDING, &ordered->flags);
527 atomic_inc(&trans->transaction->pending_ordered);
528 }
529 spin_unlock_irq(&tree->lock);
530 }
531 btrfs_put_ordered_extent(ordered);
526 spin_lock_irq(&log->log_extents_lock[index]); 532 spin_lock_irq(&log->log_extents_lock[index]);
527 } 533 }
528 spin_unlock_irq(&log->log_extents_lock[index]); 534 spin_unlock_irq(&log->log_extents_lock[index]);
@@ -584,6 +590,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
584 struct btrfs_ordered_inode_tree *tree; 590 struct btrfs_ordered_inode_tree *tree;
585 struct btrfs_root *root = BTRFS_I(inode)->root; 591 struct btrfs_root *root = BTRFS_I(inode)->root;
586 struct rb_node *node; 592 struct rb_node *node;
593 bool dec_pending_ordered = false;
587 594
588 tree = &BTRFS_I(inode)->ordered_tree; 595 tree = &BTRFS_I(inode)->ordered_tree;
589 spin_lock_irq(&tree->lock); 596 spin_lock_irq(&tree->lock);
@@ -593,8 +600,37 @@ void btrfs_remove_ordered_extent(struct inode *inode,
593 if (tree->last == node) 600 if (tree->last == node)
594 tree->last = NULL; 601 tree->last = NULL;
595 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 602 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
603 if (test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags))
604 dec_pending_ordered = true;
596 spin_unlock_irq(&tree->lock); 605 spin_unlock_irq(&tree->lock);
597 606
607 /*
608 * The current running transaction is waiting on us, we need to let it
609 * know that we're complete and wake it up.
610 */
611 if (dec_pending_ordered) {
612 struct btrfs_transaction *trans;
613
614 /*
615 * The checks for trans are just a formality, it should be set,
616 * but if it isn't we don't want to deref/assert under the spin
617 * lock, so be nice and check if trans is set, but ASSERT() so
618 * if it isn't set a developer will notice.
619 */
620 spin_lock(&root->fs_info->trans_lock);
621 trans = root->fs_info->running_transaction;
622 if (trans)
623 atomic_inc(&trans->use_count);
624 spin_unlock(&root->fs_info->trans_lock);
625
626 ASSERT(trans);
627 if (trans) {
628 if (atomic_dec_and_test(&trans->pending_ordered))
629 wake_up(&trans->pending_wait);
630 btrfs_put_transaction(trans);
631 }
632 }
633
598 spin_lock(&root->ordered_extent_lock); 634 spin_lock(&root->ordered_extent_lock);
599 list_del_init(&entry->root_extent_list); 635 list_del_init(&entry->root_extent_list);
600 root->nr_ordered_extents--; 636 root->nr_ordered_extents--;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 7176cc0fe43f..23c96059cef2 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -73,6 +73,8 @@ struct btrfs_ordered_sum {
73 73
74#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent 74#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent
75 * in the logging code. */ 75 * in the logging code. */
76#define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to
77 * complete in the current transaction. */
76struct btrfs_ordered_extent { 78struct btrfs_ordered_extent {
77 /* logical offset in the file */ 79 /* logical offset in the file */
78 u64 file_offset; 80 u64 file_offset;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b23d49daa1a2..24154e422945 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -303,6 +303,9 @@ enum {
303 Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard, 303 Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
304 Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow, 304 Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
305 Opt_datasum, Opt_treelog, Opt_noinode_cache, 305 Opt_datasum, Opt_treelog, Opt_noinode_cache,
306#ifdef CONFIG_BTRFS_DEBUG
307 Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
308#endif
306 Opt_err, 309 Opt_err,
307}; 310};
308 311
@@ -355,6 +358,11 @@ static match_table_t tokens = {
355 {Opt_rescan_uuid_tree, "rescan_uuid_tree"}, 358 {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
356 {Opt_fatal_errors, "fatal_errors=%s"}, 359 {Opt_fatal_errors, "fatal_errors=%s"},
357 {Opt_commit_interval, "commit=%d"}, 360 {Opt_commit_interval, "commit=%d"},
361#ifdef CONFIG_BTRFS_DEBUG
362 {Opt_fragment_data, "fragment=data"},
363 {Opt_fragment_metadata, "fragment=metadata"},
364 {Opt_fragment_all, "fragment=all"},
365#endif
358 {Opt_err, NULL}, 366 {Opt_err, NULL},
359}; 367};
360 368
@@ -721,6 +729,22 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
721 info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; 729 info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
722 } 730 }
723 break; 731 break;
732#ifdef CONFIG_BTRFS_DEBUG
733 case Opt_fragment_all:
734 btrfs_info(root->fs_info, "fragmenting all space");
735 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
736 btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
737 break;
738 case Opt_fragment_metadata:
739 btrfs_info(root->fs_info, "fragmenting metadata");
740 btrfs_set_opt(info->mount_opt,
741 FRAGMENT_METADATA);
742 break;
743 case Opt_fragment_data:
744 btrfs_info(root->fs_info, "fragmenting data");
745 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
746 break;
747#endif
724 case Opt_err: 748 case Opt_err:
725 btrfs_info(root->fs_info, "unrecognized mount option '%s'", p); 749 btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
726 ret = -EINVAL; 750 ret = -EINVAL;
@@ -1172,6 +1196,12 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
1172 seq_puts(seq, ",fatal_errors=panic"); 1196 seq_puts(seq, ",fatal_errors=panic");
1173 if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) 1197 if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
1174 seq_printf(seq, ",commit=%d", info->commit_interval); 1198 seq_printf(seq, ",commit=%d", info->commit_interval);
1199#ifdef CONFIG_BTRFS_DEBUG
1200 if (btrfs_test_opt(root, FRAGMENT_DATA))
1201 seq_puts(seq, ",fragment=data");
1202 if (btrfs_test_opt(root, FRAGMENT_METADATA))
1203 seq_puts(seq, ",fragment=metadata");
1204#endif
1175 seq_printf(seq, ",subvolid=%llu", 1205 seq_printf(seq, ",subvolid=%llu",
1176 BTRFS_I(d_inode(dentry))->root->root_key.objectid); 1206 BTRFS_I(d_inode(dentry))->root->root_key.objectid);
1177 seq_puts(seq, ",subvol="); 1207 seq_puts(seq, ",subvol=");
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index 2299bfde39ee..c8c3d70c31ff 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -19,6 +19,7 @@
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include "btrfs-tests.h" 20#include "btrfs-tests.h"
21#include "../ctree.h" 21#include "../ctree.h"
22#include "../disk-io.h"
22#include "../free-space-cache.h" 23#include "../free-space-cache.h"
23 24
24#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 25#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
@@ -35,6 +36,12 @@ static struct btrfs_block_group_cache *init_test_block_group(void)
35 kfree(cache); 36 kfree(cache);
36 return NULL; 37 return NULL;
37 } 38 }
39 cache->fs_info = btrfs_alloc_dummy_fs_info();
40 if (!cache->fs_info) {
41 kfree(cache->free_space_ctl);
42 kfree(cache);
43 return NULL;
44 }
38 45
39 cache->key.objectid = 0; 46 cache->key.objectid = 0;
40 cache->key.offset = 1024 * 1024 * 1024; 47 cache->key.offset = 1024 * 1024 * 1024;
@@ -879,7 +886,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
879int btrfs_test_free_space_cache(void) 886int btrfs_test_free_space_cache(void)
880{ 887{
881 struct btrfs_block_group_cache *cache; 888 struct btrfs_block_group_cache *cache;
882 int ret; 889 struct btrfs_root *root = NULL;
890 int ret = -ENOMEM;
883 891
884 test_msg("Running btrfs free space cache tests\n"); 892 test_msg("Running btrfs free space cache tests\n");
885 893
@@ -889,6 +897,17 @@ int btrfs_test_free_space_cache(void)
889 return 0; 897 return 0;
890 } 898 }
891 899
900 root = btrfs_alloc_dummy_root();
901 if (!root)
902 goto out;
903
904 root->fs_info = btrfs_alloc_dummy_fs_info();
905 if (!root->fs_info)
906 goto out;
907
908 root->fs_info->extent_root = root;
909 cache->fs_info = root->fs_info;
910
892 ret = test_extents(cache); 911 ret = test_extents(cache);
893 if (ret) 912 if (ret)
894 goto out; 913 goto out;
@@ -904,6 +923,7 @@ out:
904 __btrfs_remove_free_space_cache(cache->free_space_ctl); 923 __btrfs_remove_free_space_cache(cache->free_space_ctl);
905 kfree(cache->free_space_ctl); 924 kfree(cache->free_space_ctl);
906 kfree(cache); 925 kfree(cache);
926 btrfs_free_dummy_root(root);
907 test_msg("Free space cache tests finished\n"); 927 test_msg("Free space cache tests finished\n");
908 return ret; 928 return ret;
909} 929}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e377d7bb454e..418c6a2ad7d8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -232,15 +232,16 @@ loop:
232 extwriter_counter_init(cur_trans, type); 232 extwriter_counter_init(cur_trans, type);
233 init_waitqueue_head(&cur_trans->writer_wait); 233 init_waitqueue_head(&cur_trans->writer_wait);
234 init_waitqueue_head(&cur_trans->commit_wait); 234 init_waitqueue_head(&cur_trans->commit_wait);
235 init_waitqueue_head(&cur_trans->pending_wait);
235 cur_trans->state = TRANS_STATE_RUNNING; 236 cur_trans->state = TRANS_STATE_RUNNING;
236 /* 237 /*
237 * One for this trans handle, one so it will live on until we 238 * One for this trans handle, one so it will live on until we
238 * commit the transaction. 239 * commit the transaction.
239 */ 240 */
240 atomic_set(&cur_trans->use_count, 2); 241 atomic_set(&cur_trans->use_count, 2);
241 cur_trans->have_free_bgs = 0; 242 atomic_set(&cur_trans->pending_ordered, 0);
243 cur_trans->flags = 0;
242 cur_trans->start_time = get_seconds(); 244 cur_trans->start_time = get_seconds();
243 cur_trans->dirty_bg_run = 0;
244 245
245 memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs)); 246 memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
246 247
@@ -266,7 +267,6 @@ loop:
266 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 267 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
267 INIT_LIST_HEAD(&cur_trans->pending_chunks); 268 INIT_LIST_HEAD(&cur_trans->pending_chunks);
268 INIT_LIST_HEAD(&cur_trans->switch_commits); 269 INIT_LIST_HEAD(&cur_trans->switch_commits);
269 INIT_LIST_HEAD(&cur_trans->pending_ordered);
270 INIT_LIST_HEAD(&cur_trans->dirty_bgs); 270 INIT_LIST_HEAD(&cur_trans->dirty_bgs);
271 INIT_LIST_HEAD(&cur_trans->io_bgs); 271 INIT_LIST_HEAD(&cur_trans->io_bgs);
272 INIT_LIST_HEAD(&cur_trans->dropped_roots); 272 INIT_LIST_HEAD(&cur_trans->dropped_roots);
@@ -549,7 +549,6 @@ again:
549 h->can_flush_pending_bgs = true; 549 h->can_flush_pending_bgs = true;
550 INIT_LIST_HEAD(&h->qgroup_ref_list); 550 INIT_LIST_HEAD(&h->qgroup_ref_list);
551 INIT_LIST_HEAD(&h->new_bgs); 551 INIT_LIST_HEAD(&h->new_bgs);
552 INIT_LIST_HEAD(&h->ordered);
553 552
554 smp_mb(); 553 smp_mb();
555 if (cur_trans->state >= TRANS_STATE_BLOCKED && 554 if (cur_trans->state >= TRANS_STATE_BLOCKED &&
@@ -780,12 +779,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
780 if (!list_empty(&trans->new_bgs)) 779 if (!list_empty(&trans->new_bgs))
781 btrfs_create_pending_block_groups(trans, root); 780 btrfs_create_pending_block_groups(trans, root);
782 781
783 if (!list_empty(&trans->ordered)) {
784 spin_lock(&info->trans_lock);
785 list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
786 spin_unlock(&info->trans_lock);
787 }
788
789 trans->delayed_ref_updates = 0; 782 trans->delayed_ref_updates = 0;
790 if (!trans->sync) { 783 if (!trans->sync) {
791 must_run_delayed_refs = 784 must_run_delayed_refs =
@@ -1776,25 +1769,10 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
1776} 1769}
1777 1770
1778static inline void 1771static inline void
1779btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans, 1772btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans)
1780 struct btrfs_fs_info *fs_info)
1781{ 1773{
1782 struct btrfs_ordered_extent *ordered; 1774 wait_event(cur_trans->pending_wait,
1783 1775 atomic_read(&cur_trans->pending_ordered) == 0);
1784 spin_lock(&fs_info->trans_lock);
1785 while (!list_empty(&cur_trans->pending_ordered)) {
1786 ordered = list_first_entry(&cur_trans->pending_ordered,
1787 struct btrfs_ordered_extent,
1788 trans_list);
1789 list_del_init(&ordered->trans_list);
1790 spin_unlock(&fs_info->trans_lock);
1791
1792 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE,
1793 &ordered->flags));
1794 btrfs_put_ordered_extent(ordered);
1795 spin_lock(&fs_info->trans_lock);
1796 }
1797 spin_unlock(&fs_info->trans_lock);
1798} 1776}
1799 1777
1800int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 1778int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -1842,7 +1820,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1842 return ret; 1820 return ret;
1843 } 1821 }
1844 1822
1845 if (!cur_trans->dirty_bg_run) { 1823 if (!test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &cur_trans->flags)) {
1846 int run_it = 0; 1824 int run_it = 0;
1847 1825
1848 /* this mutex is also taken before trying to set 1826 /* this mutex is also taken before trying to set
@@ -1851,18 +1829,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1851 * after a extents from that block group have been 1829 * after a extents from that block group have been
1852 * allocated for cache files. btrfs_set_block_group_ro 1830 * allocated for cache files. btrfs_set_block_group_ro
1853 * will wait for the transaction to commit if it 1831 * will wait for the transaction to commit if it
1854 * finds dirty_bg_run = 1 1832 * finds BTRFS_TRANS_DIRTY_BG_RUN set.
1855 * 1833 *
1856 * The dirty_bg_run flag is also used to make sure only 1834 * The BTRFS_TRANS_DIRTY_BG_RUN flag is also used to make sure
1857 * one process starts all the block group IO. It wouldn't 1835 * only one process starts all the block group IO. It wouldn't
1858 * hurt to have more than one go through, but there's no 1836 * hurt to have more than one go through, but there's no
1859 * real advantage to it either. 1837 * real advantage to it either.
1860 */ 1838 */
1861 mutex_lock(&root->fs_info->ro_block_group_mutex); 1839 mutex_lock(&root->fs_info->ro_block_group_mutex);
1862 if (!cur_trans->dirty_bg_run) { 1840 if (!test_and_set_bit(BTRFS_TRANS_DIRTY_BG_RUN,
1841 &cur_trans->flags))
1863 run_it = 1; 1842 run_it = 1;
1864 cur_trans->dirty_bg_run = 1;
1865 }
1866 mutex_unlock(&root->fs_info->ro_block_group_mutex); 1843 mutex_unlock(&root->fs_info->ro_block_group_mutex);
1867 1844
1868 if (run_it) 1845 if (run_it)
@@ -1874,7 +1851,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1874 } 1851 }
1875 1852
1876 spin_lock(&root->fs_info->trans_lock); 1853 spin_lock(&root->fs_info->trans_lock);
1877 list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
1878 if (cur_trans->state >= TRANS_STATE_COMMIT_START) { 1854 if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
1879 spin_unlock(&root->fs_info->trans_lock); 1855 spin_unlock(&root->fs_info->trans_lock);
1880 atomic_inc(&cur_trans->use_count); 1856 atomic_inc(&cur_trans->use_count);
@@ -1933,7 +1909,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1933 1909
1934 btrfs_wait_delalloc_flush(root->fs_info); 1910 btrfs_wait_delalloc_flush(root->fs_info);
1935 1911
1936 btrfs_wait_pending_ordered(cur_trans, root->fs_info); 1912 btrfs_wait_pending_ordered(cur_trans);
1937 1913
1938 btrfs_scrub_pause(root); 1914 btrfs_scrub_pause(root);
1939 /* 1915 /*
@@ -2133,7 +2109,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
2133 2109
2134 btrfs_finish_extent_commit(trans, root); 2110 btrfs_finish_extent_commit(trans, root);
2135 2111
2136 if (cur_trans->have_free_bgs) 2112 if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags))
2137 btrfs_clear_space_info_full(root->fs_info); 2113 btrfs_clear_space_info_full(root->fs_info);
2138 2114
2139 root->fs_info->last_trans_committed = cur_trans->transid; 2115 root->fs_info->last_trans_committed = cur_trans->transid;
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 54b7dea74967..b05b2f64d913 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -32,6 +32,10 @@ enum btrfs_trans_state {
32 TRANS_STATE_MAX = 6, 32 TRANS_STATE_MAX = 6,
33}; 33};
34 34
35#define BTRFS_TRANS_HAVE_FREE_BGS 0
36#define BTRFS_TRANS_DIRTY_BG_RUN 1
37#define BTRFS_TRANS_CACHE_ENOSPC 2
38
35struct btrfs_transaction { 39struct btrfs_transaction {
36 u64 transid; 40 u64 transid;
37 /* 41 /*
@@ -46,11 +50,9 @@ struct btrfs_transaction {
46 */ 50 */
47 atomic_t num_writers; 51 atomic_t num_writers;
48 atomic_t use_count; 52 atomic_t use_count;
53 atomic_t pending_ordered;
49 54
50 /* 55 unsigned long flags;
51 * true if there is free bgs operations in this transaction
52 */
53 int have_free_bgs;
54 56
55 /* Be protected by fs_info->trans_lock when we want to change it. */ 57 /* Be protected by fs_info->trans_lock when we want to change it. */
56 enum btrfs_trans_state state; 58 enum btrfs_trans_state state;
@@ -59,9 +61,9 @@ struct btrfs_transaction {
59 unsigned long start_time; 61 unsigned long start_time;
60 wait_queue_head_t writer_wait; 62 wait_queue_head_t writer_wait;
61 wait_queue_head_t commit_wait; 63 wait_queue_head_t commit_wait;
64 wait_queue_head_t pending_wait;
62 struct list_head pending_snapshots; 65 struct list_head pending_snapshots;
63 struct list_head pending_chunks; 66 struct list_head pending_chunks;
64 struct list_head pending_ordered;
65 struct list_head switch_commits; 67 struct list_head switch_commits;
66 struct list_head dirty_bgs; 68 struct list_head dirty_bgs;
67 struct list_head io_bgs; 69 struct list_head io_bgs;
@@ -80,7 +82,6 @@ struct btrfs_transaction {
80 spinlock_t dropped_roots_lock; 82 spinlock_t dropped_roots_lock;
81 struct btrfs_delayed_ref_root delayed_refs; 83 struct btrfs_delayed_ref_root delayed_refs;
82 int aborted; 84 int aborted;
83 int dirty_bg_run;
84}; 85};
85 86
86#define __TRANS_FREEZABLE (1U << 0) 87#define __TRANS_FREEZABLE (1U << 0)
@@ -128,7 +129,6 @@ struct btrfs_trans_handle {
128 */ 129 */
129 struct btrfs_root *root; 130 struct btrfs_root *root;
130 struct seq_list delayed_ref_elem; 131 struct seq_list delayed_ref_elem;
131 struct list_head ordered;
132 struct list_head qgroup_ref_list; 132 struct list_head qgroup_ref_list;
133 struct list_head new_bgs; 133 struct list_head new_bgs;
134}; 134};
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e875b6cc1e20..f86d83805b44 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1462,7 +1462,7 @@ again:
1462 btrfs_std_error(root->fs_info, ret, 1462 btrfs_std_error(root->fs_info, ret,
1463 "Failed to remove dev extent item"); 1463 "Failed to remove dev extent item");
1464 } else { 1464 } else {
1465 trans->transaction->have_free_bgs = 1; 1465 set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
1466 } 1466 }
1467out: 1467out:
1468 btrfs_free_path(path); 1468 btrfs_free_path(path);