diff options
author | Chris Mason <clm@fb.com> | 2015-10-21 22:00:38 -0400 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2015-10-21 22:00:38 -0400 |
commit | a9e6d153563d2ed69c6cd7fb4fa5ce4ca7c712eb (patch) | |
tree | 43fab2a259934e1f1dde9607610424d6089dba5e | |
parent | 56fa9d0762ed17153c1bdff3c0aeeecbe522b504 (diff) | |
parent | 0584f718ed1f351fca5047a4b1ebba9b5ea41215 (diff) |
Merge branch 'allocator-fixes' into for-linus-4.4
Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r-- | fs/btrfs/backref.c | 6 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 21 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 20 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 272 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 67 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.h | 1 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 9 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 64 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 2 | ||||
-rw-r--r-- | fs/btrfs/super.c | 30 | ||||
-rw-r--r-- | fs/btrfs/tests/free-space-tests.c | 22 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 52 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 14 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 2 |
14 files changed, 459 insertions, 123 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index ecbc63d3143e..2adc152a32e3 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -362,6 +362,12 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
362 | goto out; | 362 | goto out; |
363 | } | 363 | } |
364 | 364 | ||
365 | if (btrfs_test_is_dummy_root(root)) { | ||
366 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
367 | ret = -ENOENT; | ||
368 | goto out; | ||
369 | } | ||
370 | |||
365 | if (path->search_commit_root) | 371 | if (path->search_commit_root) |
366 | root_level = btrfs_header_level(root->commit_root); | 372 | root_level = btrfs_header_level(root->commit_root); |
367 | else if (time_seq == (u64)-1) | 373 | else if (time_seq == (u64)-1) |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2135b82a6b61..bc3c711e82f2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -1154,6 +1154,10 @@ struct btrfs_space_info { | |||
1154 | delalloc/allocations */ | 1154 | delalloc/allocations */ |
1155 | u64 bytes_readonly; /* total bytes that are read only */ | 1155 | u64 bytes_readonly; /* total bytes that are read only */ |
1156 | 1156 | ||
1157 | u64 max_extent_size; /* This will hold the maximum extent size of | ||
1158 | the space info if we had an ENOSPC in the | ||
1159 | allocator. */ | ||
1160 | |||
1157 | unsigned int full:1; /* indicates that we cannot allocate any more | 1161 | unsigned int full:1; /* indicates that we cannot allocate any more |
1158 | chunks for this space */ | 1162 | chunks for this space */ |
1159 | unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ | 1163 | unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ |
@@ -1228,6 +1232,9 @@ struct btrfs_free_cluster { | |||
1228 | /* first extent starting offset */ | 1232 | /* first extent starting offset */ |
1229 | u64 window_start; | 1233 | u64 window_start; |
1230 | 1234 | ||
1235 | /* We did a full search and couldn't create a cluster */ | ||
1236 | bool fragmented; | ||
1237 | |||
1231 | struct btrfs_block_group_cache *block_group; | 1238 | struct btrfs_block_group_cache *block_group; |
1232 | /* | 1239 | /* |
1233 | * when a cluster is allocated from a block group, we put the | 1240 | * when a cluster is allocated from a block group, we put the |
@@ -2148,6 +2155,8 @@ struct btrfs_ioctl_defrag_range_args { | |||
2148 | #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) | 2155 | #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) |
2149 | #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) | 2156 | #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) |
2150 | #define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) | 2157 | #define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) |
2158 | #define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24) | ||
2159 | #define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25) | ||
2151 | 2160 | ||
2152 | #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) | 2161 | #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) |
2153 | #define BTRFS_DEFAULT_MAX_INLINE (8192) | 2162 | #define BTRFS_DEFAULT_MAX_INLINE (8192) |
@@ -2172,6 +2181,18 @@ struct btrfs_ioctl_defrag_range_args { | |||
2172 | btrfs_clear_opt(root->fs_info->mount_opt, opt); \ | 2181 | btrfs_clear_opt(root->fs_info->mount_opt, opt); \ |
2173 | } | 2182 | } |
2174 | 2183 | ||
2184 | #ifdef CONFIG_BTRFS_DEBUG | ||
2185 | static inline int | ||
2186 | btrfs_should_fragment_free_space(struct btrfs_root *root, | ||
2187 | struct btrfs_block_group_cache *block_group) | ||
2188 | { | ||
2189 | return (btrfs_test_opt(root, FRAGMENT_METADATA) && | ||
2190 | block_group->flags & BTRFS_BLOCK_GROUP_METADATA) || | ||
2191 | (btrfs_test_opt(root, FRAGMENT_DATA) && | ||
2192 | block_group->flags & BTRFS_BLOCK_GROUP_DATA); | ||
2193 | } | ||
2194 | #endif | ||
2195 | |||
2175 | /* | 2196 | /* |
2176 | * Requests for changes that need to be done during transaction commit. | 2197 | * Requests for changes that need to be done during transaction commit. |
2177 | * | 2198 | * |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0e0544e0e20b..86a11a902fcf 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -4327,25 +4327,6 @@ again: | |||
4327 | return 0; | 4327 | return 0; |
4328 | } | 4328 | } |
4329 | 4329 | ||
4330 | static void btrfs_free_pending_ordered(struct btrfs_transaction *cur_trans, | ||
4331 | struct btrfs_fs_info *fs_info) | ||
4332 | { | ||
4333 | struct btrfs_ordered_extent *ordered; | ||
4334 | |||
4335 | spin_lock(&fs_info->trans_lock); | ||
4336 | while (!list_empty(&cur_trans->pending_ordered)) { | ||
4337 | ordered = list_first_entry(&cur_trans->pending_ordered, | ||
4338 | struct btrfs_ordered_extent, | ||
4339 | trans_list); | ||
4340 | list_del_init(&ordered->trans_list); | ||
4341 | spin_unlock(&fs_info->trans_lock); | ||
4342 | |||
4343 | btrfs_put_ordered_extent(ordered); | ||
4344 | spin_lock(&fs_info->trans_lock); | ||
4345 | } | ||
4346 | spin_unlock(&fs_info->trans_lock); | ||
4347 | } | ||
4348 | |||
4349 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, | 4330 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, |
4350 | struct btrfs_root *root) | 4331 | struct btrfs_root *root) |
4351 | { | 4332 | { |
@@ -4357,7 +4338,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, | |||
4357 | cur_trans->state = TRANS_STATE_UNBLOCKED; | 4338 | cur_trans->state = TRANS_STATE_UNBLOCKED; |
4358 | wake_up(&root->fs_info->transaction_wait); | 4339 | wake_up(&root->fs_info->transaction_wait); |
4359 | 4340 | ||
4360 | btrfs_free_pending_ordered(cur_trans, root->fs_info); | ||
4361 | btrfs_destroy_delayed_inodes(root); | 4341 | btrfs_destroy_delayed_inodes(root); |
4362 | btrfs_assert_delayed_root_empty(root); | 4342 | btrfs_assert_delayed_root_empty(root); |
4363 | 4343 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 46609607789b..92fdbc6b89e7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -332,6 +332,27 @@ static void put_caching_control(struct btrfs_caching_control *ctl) | |||
332 | kfree(ctl); | 332 | kfree(ctl); |
333 | } | 333 | } |
334 | 334 | ||
335 | #ifdef CONFIG_BTRFS_DEBUG | ||
336 | static void fragment_free_space(struct btrfs_root *root, | ||
337 | struct btrfs_block_group_cache *block_group) | ||
338 | { | ||
339 | u64 start = block_group->key.objectid; | ||
340 | u64 len = block_group->key.offset; | ||
341 | u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ? | ||
342 | root->nodesize : root->sectorsize; | ||
343 | u64 step = chunk << 1; | ||
344 | |||
345 | while (len > chunk) { | ||
346 | btrfs_remove_free_space(block_group, start, chunk); | ||
347 | start += step; | ||
348 | if (len < step) | ||
349 | len = 0; | ||
350 | else | ||
351 | len -= step; | ||
352 | } | ||
353 | } | ||
354 | #endif | ||
355 | |||
335 | /* | 356 | /* |
336 | * this is only called by cache_block_group, since we could have freed extents | 357 | * this is only called by cache_block_group, since we could have freed extents |
337 | * we need to check the pinned_extents for any extents that can't be used yet | 358 | * we need to check the pinned_extents for any extents that can't be used yet |
@@ -388,6 +409,7 @@ static noinline void caching_thread(struct btrfs_work *work) | |||
388 | u64 last = 0; | 409 | u64 last = 0; |
389 | u32 nritems; | 410 | u32 nritems; |
390 | int ret = -ENOMEM; | 411 | int ret = -ENOMEM; |
412 | bool wakeup = true; | ||
391 | 413 | ||
392 | caching_ctl = container_of(work, struct btrfs_caching_control, work); | 414 | caching_ctl = container_of(work, struct btrfs_caching_control, work); |
393 | block_group = caching_ctl->block_group; | 415 | block_group = caching_ctl->block_group; |
@@ -400,6 +422,15 @@ static noinline void caching_thread(struct btrfs_work *work) | |||
400 | 422 | ||
401 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 423 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
402 | 424 | ||
425 | #ifdef CONFIG_BTRFS_DEBUG | ||
426 | /* | ||
427 | * If we're fragmenting we don't want to make anybody think we can | ||
428 | * allocate from this block group until we've had a chance to fragment | ||
429 | * the free space. | ||
430 | */ | ||
431 | if (btrfs_should_fragment_free_space(extent_root, block_group)) | ||
432 | wakeup = false; | ||
433 | #endif | ||
403 | /* | 434 | /* |
404 | * We don't want to deadlock with somebody trying to allocate a new | 435 | * We don't want to deadlock with somebody trying to allocate a new |
405 | * extent for the extent root while also trying to search the extent | 436 | * extent for the extent root while also trying to search the extent |
@@ -441,7 +472,8 @@ next: | |||
441 | 472 | ||
442 | if (need_resched() || | 473 | if (need_resched() || |
443 | rwsem_is_contended(&fs_info->commit_root_sem)) { | 474 | rwsem_is_contended(&fs_info->commit_root_sem)) { |
444 | caching_ctl->progress = last; | 475 | if (wakeup) |
476 | caching_ctl->progress = last; | ||
445 | btrfs_release_path(path); | 477 | btrfs_release_path(path); |
446 | up_read(&fs_info->commit_root_sem); | 478 | up_read(&fs_info->commit_root_sem); |
447 | mutex_unlock(&caching_ctl->mutex); | 479 | mutex_unlock(&caching_ctl->mutex); |
@@ -464,7 +496,8 @@ next: | |||
464 | key.offset = 0; | 496 | key.offset = 0; |
465 | key.type = BTRFS_EXTENT_ITEM_KEY; | 497 | key.type = BTRFS_EXTENT_ITEM_KEY; |
466 | 498 | ||
467 | caching_ctl->progress = last; | 499 | if (wakeup) |
500 | caching_ctl->progress = last; | ||
468 | btrfs_release_path(path); | 501 | btrfs_release_path(path); |
469 | goto next; | 502 | goto next; |
470 | } | 503 | } |
@@ -491,7 +524,8 @@ next: | |||
491 | 524 | ||
492 | if (total_found > (1024 * 1024 * 2)) { | 525 | if (total_found > (1024 * 1024 * 2)) { |
493 | total_found = 0; | 526 | total_found = 0; |
494 | wake_up(&caching_ctl->wait); | 527 | if (wakeup) |
528 | wake_up(&caching_ctl->wait); | ||
495 | } | 529 | } |
496 | } | 530 | } |
497 | path->slots[0]++; | 531 | path->slots[0]++; |
@@ -501,13 +535,27 @@ next: | |||
501 | total_found += add_new_free_space(block_group, fs_info, last, | 535 | total_found += add_new_free_space(block_group, fs_info, last, |
502 | block_group->key.objectid + | 536 | block_group->key.objectid + |
503 | block_group->key.offset); | 537 | block_group->key.offset); |
504 | caching_ctl->progress = (u64)-1; | ||
505 | |||
506 | spin_lock(&block_group->lock); | 538 | spin_lock(&block_group->lock); |
507 | block_group->caching_ctl = NULL; | 539 | block_group->caching_ctl = NULL; |
508 | block_group->cached = BTRFS_CACHE_FINISHED; | 540 | block_group->cached = BTRFS_CACHE_FINISHED; |
509 | spin_unlock(&block_group->lock); | 541 | spin_unlock(&block_group->lock); |
510 | 542 | ||
543 | #ifdef CONFIG_BTRFS_DEBUG | ||
544 | if (btrfs_should_fragment_free_space(extent_root, block_group)) { | ||
545 | u64 bytes_used; | ||
546 | |||
547 | spin_lock(&block_group->space_info->lock); | ||
548 | spin_lock(&block_group->lock); | ||
549 | bytes_used = block_group->key.offset - | ||
550 | btrfs_block_group_used(&block_group->item); | ||
551 | block_group->space_info->bytes_used += bytes_used >> 1; | ||
552 | spin_unlock(&block_group->lock); | ||
553 | spin_unlock(&block_group->space_info->lock); | ||
554 | fragment_free_space(extent_root, block_group); | ||
555 | } | ||
556 | #endif | ||
557 | |||
558 | caching_ctl->progress = (u64)-1; | ||
511 | err: | 559 | err: |
512 | btrfs_free_path(path); | 560 | btrfs_free_path(path); |
513 | up_read(&fs_info->commit_root_sem); | 561 | up_read(&fs_info->commit_root_sem); |
@@ -607,6 +655,22 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
607 | } | 655 | } |
608 | } | 656 | } |
609 | spin_unlock(&cache->lock); | 657 | spin_unlock(&cache->lock); |
658 | #ifdef CONFIG_BTRFS_DEBUG | ||
659 | if (ret == 1 && | ||
660 | btrfs_should_fragment_free_space(fs_info->extent_root, | ||
661 | cache)) { | ||
662 | u64 bytes_used; | ||
663 | |||
664 | spin_lock(&cache->space_info->lock); | ||
665 | spin_lock(&cache->lock); | ||
666 | bytes_used = cache->key.offset - | ||
667 | btrfs_block_group_used(&cache->item); | ||
668 | cache->space_info->bytes_used += bytes_used >> 1; | ||
669 | spin_unlock(&cache->lock); | ||
670 | spin_unlock(&cache->space_info->lock); | ||
671 | fragment_free_space(fs_info->extent_root, cache); | ||
672 | } | ||
673 | #endif | ||
610 | mutex_unlock(&caching_ctl->mutex); | 674 | mutex_unlock(&caching_ctl->mutex); |
611 | 675 | ||
612 | wake_up(&caching_ctl->wait); | 676 | wake_up(&caching_ctl->wait); |
@@ -3344,6 +3408,15 @@ again: | |||
3344 | spin_unlock(&block_group->lock); | 3408 | spin_unlock(&block_group->lock); |
3345 | 3409 | ||
3346 | /* | 3410 | /* |
3411 | * We hit an ENOSPC when setting up the cache in this transaction, just | ||
3412 | * skip doing the setup, we've already cleared the cache so we're safe. | ||
3413 | */ | ||
3414 | if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) { | ||
3415 | ret = -ENOSPC; | ||
3416 | goto out_put; | ||
3417 | } | ||
3418 | |||
3419 | /* | ||
3347 | * Try to preallocate enough space based on how big the block group is. | 3420 | * Try to preallocate enough space based on how big the block group is. |
3348 | * Keep in mind this has to include any pinned space which could end up | 3421 | * Keep in mind this has to include any pinned space which could end up |
3349 | * taking up quite a bit since it's not folded into the other space | 3422 | * taking up quite a bit since it's not folded into the other space |
@@ -3363,8 +3436,18 @@ again: | |||
3363 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, | 3436 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, |
3364 | num_pages, num_pages, | 3437 | num_pages, num_pages, |
3365 | &alloc_hint); | 3438 | &alloc_hint); |
3439 | /* | ||
3440 | * Our cache requires contiguous chunks so that we don't modify a bunch | ||
3441 | * of metadata or split extents when writing the cache out, which means | ||
3442 | * we can enospc if we are heavily fragmented in addition to just normal | ||
3443 | * out of space conditions. So if we hit this just skip setting up any | ||
3444 | * other block groups for this transaction, maybe we'll unpin enough | ||
3445 | * space the next time around. | ||
3446 | */ | ||
3366 | if (!ret) | 3447 | if (!ret) |
3367 | dcs = BTRFS_DC_SETUP; | 3448 | dcs = BTRFS_DC_SETUP; |
3449 | else if (ret == -ENOSPC) | ||
3450 | set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags); | ||
3368 | btrfs_free_reserved_data_space(inode, 0, num_pages); | 3451 | btrfs_free_reserved_data_space(inode, 0, num_pages); |
3369 | 3452 | ||
3370 | out_put: | 3453 | out_put: |
@@ -3751,6 +3834,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3751 | found->bytes_readonly = 0; | 3834 | found->bytes_readonly = 0; |
3752 | found->bytes_may_use = 0; | 3835 | found->bytes_may_use = 0; |
3753 | found->full = 0; | 3836 | found->full = 0; |
3837 | found->max_extent_size = 0; | ||
3754 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; | 3838 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; |
3755 | found->chunk_alloc = 0; | 3839 | found->chunk_alloc = 0; |
3756 | found->flush = 0; | 3840 | found->flush = 0; |
@@ -4003,7 +4087,8 @@ commit_trans: | |||
4003 | if (IS_ERR(trans)) | 4087 | if (IS_ERR(trans)) |
4004 | return PTR_ERR(trans); | 4088 | return PTR_ERR(trans); |
4005 | if (have_pinned_space >= 0 || | 4089 | if (have_pinned_space >= 0 || |
4006 | trans->transaction->have_free_bgs || | 4090 | test_bit(BTRFS_TRANS_HAVE_FREE_BGS, |
4091 | &trans->transaction->flags) || | ||
4007 | need_commit > 0) { | 4092 | need_commit > 0) { |
4008 | ret = btrfs_commit_transaction(trans, root); | 4093 | ret = btrfs_commit_transaction(trans, root); |
4009 | if (ret) | 4094 | if (ret) |
@@ -6112,6 +6197,34 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
6112 | update_global_block_rsv(fs_info); | 6197 | update_global_block_rsv(fs_info); |
6113 | } | 6198 | } |
6114 | 6199 | ||
6200 | /* | ||
6201 | * Returns the free cluster for the given space info and sets empty_cluster to | ||
6202 | * what it should be based on the mount options. | ||
6203 | */ | ||
6204 | static struct btrfs_free_cluster * | ||
6205 | fetch_cluster_info(struct btrfs_root *root, struct btrfs_space_info *space_info, | ||
6206 | u64 *empty_cluster) | ||
6207 | { | ||
6208 | struct btrfs_free_cluster *ret = NULL; | ||
6209 | bool ssd = btrfs_test_opt(root, SSD); | ||
6210 | |||
6211 | *empty_cluster = 0; | ||
6212 | if (btrfs_mixed_space_info(space_info)) | ||
6213 | return ret; | ||
6214 | |||
6215 | if (ssd) | ||
6216 | *empty_cluster = 2 * 1024 * 1024; | ||
6217 | if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
6218 | ret = &root->fs_info->meta_alloc_cluster; | ||
6219 | if (!ssd) | ||
6220 | *empty_cluster = 64 * 1024; | ||
6221 | } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) && ssd) { | ||
6222 | ret = &root->fs_info->data_alloc_cluster; | ||
6223 | } | ||
6224 | |||
6225 | return ret; | ||
6226 | } | ||
6227 | |||
6115 | static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, | 6228 | static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, |
6116 | const bool return_free_space) | 6229 | const bool return_free_space) |
6117 | { | 6230 | { |
@@ -6119,7 +6232,10 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, | |||
6119 | struct btrfs_block_group_cache *cache = NULL; | 6232 | struct btrfs_block_group_cache *cache = NULL; |
6120 | struct btrfs_space_info *space_info; | 6233 | struct btrfs_space_info *space_info; |
6121 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | 6234 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
6235 | struct btrfs_free_cluster *cluster = NULL; | ||
6122 | u64 len; | 6236 | u64 len; |
6237 | u64 total_unpinned = 0; | ||
6238 | u64 empty_cluster = 0; | ||
6123 | bool readonly; | 6239 | bool readonly; |
6124 | 6240 | ||
6125 | while (start <= end) { | 6241 | while (start <= end) { |
@@ -6128,8 +6244,14 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, | |||
6128 | start >= cache->key.objectid + cache->key.offset) { | 6244 | start >= cache->key.objectid + cache->key.offset) { |
6129 | if (cache) | 6245 | if (cache) |
6130 | btrfs_put_block_group(cache); | 6246 | btrfs_put_block_group(cache); |
6247 | total_unpinned = 0; | ||
6131 | cache = btrfs_lookup_block_group(fs_info, start); | 6248 | cache = btrfs_lookup_block_group(fs_info, start); |
6132 | BUG_ON(!cache); /* Logic error */ | 6249 | BUG_ON(!cache); /* Logic error */ |
6250 | |||
6251 | cluster = fetch_cluster_info(root, | ||
6252 | cache->space_info, | ||
6253 | &empty_cluster); | ||
6254 | empty_cluster <<= 1; | ||
6133 | } | 6255 | } |
6134 | 6256 | ||
6135 | len = cache->key.objectid + cache->key.offset - start; | 6257 | len = cache->key.objectid + cache->key.offset - start; |
@@ -6142,12 +6264,27 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, | |||
6142 | } | 6264 | } |
6143 | 6265 | ||
6144 | start += len; | 6266 | start += len; |
6267 | total_unpinned += len; | ||
6145 | space_info = cache->space_info; | 6268 | space_info = cache->space_info; |
6146 | 6269 | ||
6270 | /* | ||
6271 | * If this space cluster has been marked as fragmented and we've | ||
6272 | * unpinned enough in this block group to potentially allow a | ||
6273 | * cluster to be created inside of it go ahead and clear the | ||
6274 | * fragmented check. | ||
6275 | */ | ||
6276 | if (cluster && cluster->fragmented && | ||
6277 | total_unpinned > empty_cluster) { | ||
6278 | spin_lock(&cluster->lock); | ||
6279 | cluster->fragmented = 0; | ||
6280 | spin_unlock(&cluster->lock); | ||
6281 | } | ||
6282 | |||
6147 | spin_lock(&space_info->lock); | 6283 | spin_lock(&space_info->lock); |
6148 | spin_lock(&cache->lock); | 6284 | spin_lock(&cache->lock); |
6149 | cache->pinned -= len; | 6285 | cache->pinned -= len; |
6150 | space_info->bytes_pinned -= len; | 6286 | space_info->bytes_pinned -= len; |
6287 | space_info->max_extent_size = 0; | ||
6151 | percpu_counter_add(&space_info->total_bytes_pinned, -len); | 6288 | percpu_counter_add(&space_info->total_bytes_pinned, -len); |
6152 | if (cache->ro) { | 6289 | if (cache->ro) { |
6153 | space_info->bytes_readonly += len; | 6290 | space_info->bytes_readonly += len; |
@@ -6880,7 +7017,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, | |||
6880 | struct btrfs_block_group_cache *block_group = NULL; | 7017 | struct btrfs_block_group_cache *block_group = NULL; |
6881 | u64 search_start = 0; | 7018 | u64 search_start = 0; |
6882 | u64 max_extent_size = 0; | 7019 | u64 max_extent_size = 0; |
6883 | int empty_cluster = 2 * 1024 * 1024; | 7020 | u64 empty_cluster = 0; |
6884 | struct btrfs_space_info *space_info; | 7021 | struct btrfs_space_info *space_info; |
6885 | int loop = 0; | 7022 | int loop = 0; |
6886 | int index = __get_raid_index(flags); | 7023 | int index = __get_raid_index(flags); |
@@ -6890,6 +7027,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, | |||
6890 | bool failed_alloc = false; | 7027 | bool failed_alloc = false; |
6891 | bool use_cluster = true; | 7028 | bool use_cluster = true; |
6892 | bool have_caching_bg = false; | 7029 | bool have_caching_bg = false; |
7030 | bool full_search = false; | ||
6893 | 7031 | ||
6894 | WARN_ON(num_bytes < root->sectorsize); | 7032 | WARN_ON(num_bytes < root->sectorsize); |
6895 | ins->type = BTRFS_EXTENT_ITEM_KEY; | 7033 | ins->type = BTRFS_EXTENT_ITEM_KEY; |
@@ -6905,36 +7043,47 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, | |||
6905 | } | 7043 | } |
6906 | 7044 | ||
6907 | /* | 7045 | /* |
6908 | * If the space info is for both data and metadata it means we have a | 7046 | * If our free space is heavily fragmented we may not be able to make |
6909 | * small filesystem and we can't use the clustering stuff. | 7047 | * big contiguous allocations, so instead of doing the expensive search |
7048 | * for free space, simply return ENOSPC with our max_extent_size so we | ||
7049 | * can go ahead and search for a more manageable chunk. | ||
7050 | * | ||
7051 | * If our max_extent_size is large enough for our allocation simply | ||
7052 | * disable clustering since we will likely not be able to find enough | ||
7053 | * space to create a cluster and induce latency trying. | ||
6910 | */ | 7054 | */ |
6911 | if (btrfs_mixed_space_info(space_info)) | 7055 | if (unlikely(space_info->max_extent_size)) { |
6912 | use_cluster = false; | 7056 | spin_lock(&space_info->lock); |
6913 | 7057 | if (space_info->max_extent_size && | |
6914 | if (flags & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { | 7058 | num_bytes > space_info->max_extent_size) { |
6915 | last_ptr = &root->fs_info->meta_alloc_cluster; | 7059 | ins->offset = space_info->max_extent_size; |
6916 | if (!btrfs_test_opt(root, SSD)) | 7060 | spin_unlock(&space_info->lock); |
6917 | empty_cluster = 64 * 1024; | 7061 | return -ENOSPC; |
6918 | } | 7062 | } else if (space_info->max_extent_size) { |
6919 | 7063 | use_cluster = false; | |
6920 | if ((flags & BTRFS_BLOCK_GROUP_DATA) && use_cluster && | 7064 | } |
6921 | btrfs_test_opt(root, SSD)) { | 7065 | spin_unlock(&space_info->lock); |
6922 | last_ptr = &root->fs_info->data_alloc_cluster; | ||
6923 | } | 7066 | } |
6924 | 7067 | ||
7068 | last_ptr = fetch_cluster_info(orig_root, space_info, &empty_cluster); | ||
6925 | if (last_ptr) { | 7069 | if (last_ptr) { |
6926 | spin_lock(&last_ptr->lock); | 7070 | spin_lock(&last_ptr->lock); |
6927 | if (last_ptr->block_group) | 7071 | if (last_ptr->block_group) |
6928 | hint_byte = last_ptr->window_start; | 7072 | hint_byte = last_ptr->window_start; |
7073 | if (last_ptr->fragmented) { | ||
7074 | /* | ||
7075 | * We still set window_start so we can keep track of the | ||
7076 | * last place we found an allocation to try and save | ||
7077 | * some time. | ||
7078 | */ | ||
7079 | hint_byte = last_ptr->window_start; | ||
7080 | use_cluster = false; | ||
7081 | } | ||
6929 | spin_unlock(&last_ptr->lock); | 7082 | spin_unlock(&last_ptr->lock); |
6930 | } | 7083 | } |
6931 | 7084 | ||
6932 | search_start = max(search_start, first_logical_byte(root, 0)); | 7085 | search_start = max(search_start, first_logical_byte(root, 0)); |
6933 | search_start = max(search_start, hint_byte); | 7086 | search_start = max(search_start, hint_byte); |
6934 | |||
6935 | if (!last_ptr) | ||
6936 | empty_cluster = 0; | ||
6937 | |||
6938 | if (search_start == hint_byte) { | 7087 | if (search_start == hint_byte) { |
6939 | block_group = btrfs_lookup_block_group(root->fs_info, | 7088 | block_group = btrfs_lookup_block_group(root->fs_info, |
6940 | search_start); | 7089 | search_start); |
@@ -6969,6 +7118,8 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, | |||
6969 | } | 7118 | } |
6970 | search: | 7119 | search: |
6971 | have_caching_bg = false; | 7120 | have_caching_bg = false; |
7121 | if (index == 0 || index == __get_raid_index(flags)) | ||
7122 | full_search = true; | ||
6972 | down_read(&space_info->groups_sem); | 7123 | down_read(&space_info->groups_sem); |
6973 | list_for_each_entry(block_group, &space_info->block_groups[index], | 7124 | list_for_each_entry(block_group, &space_info->block_groups[index], |
6974 | list) { | 7125 | list) { |
@@ -7002,6 +7153,7 @@ search: | |||
7002 | have_block_group: | 7153 | have_block_group: |
7003 | cached = block_group_cache_done(block_group); | 7154 | cached = block_group_cache_done(block_group); |
7004 | if (unlikely(!cached)) { | 7155 | if (unlikely(!cached)) { |
7156 | have_caching_bg = true; | ||
7005 | ret = cache_block_group(block_group, 0); | 7157 | ret = cache_block_group(block_group, 0); |
7006 | BUG_ON(ret < 0); | 7158 | BUG_ON(ret < 0); |
7007 | ret = 0; | 7159 | ret = 0; |
@@ -7016,7 +7168,7 @@ have_block_group: | |||
7016 | * Ok we want to try and use the cluster allocator, so | 7168 | * Ok we want to try and use the cluster allocator, so |
7017 | * lets look there | 7169 | * lets look there |
7018 | */ | 7170 | */ |
7019 | if (last_ptr) { | 7171 | if (last_ptr && use_cluster) { |
7020 | struct btrfs_block_group_cache *used_block_group; | 7172 | struct btrfs_block_group_cache *used_block_group; |
7021 | unsigned long aligned_cluster; | 7173 | unsigned long aligned_cluster; |
7022 | /* | 7174 | /* |
@@ -7142,6 +7294,16 @@ refill_cluster: | |||
7142 | } | 7294 | } |
7143 | 7295 | ||
7144 | unclustered_alloc: | 7296 | unclustered_alloc: |
7297 | /* | ||
7298 | * We are doing an unclustered alloc, set the fragmented flag so | ||
7299 | * we don't bother trying to setup a cluster again until we get | ||
7300 | * more space. | ||
7301 | */ | ||
7302 | if (unlikely(last_ptr)) { | ||
7303 | spin_lock(&last_ptr->lock); | ||
7304 | last_ptr->fragmented = 1; | ||
7305 | spin_unlock(&last_ptr->lock); | ||
7306 | } | ||
7145 | spin_lock(&block_group->free_space_ctl->tree_lock); | 7307 | spin_lock(&block_group->free_space_ctl->tree_lock); |
7146 | if (cached && | 7308 | if (cached && |
7147 | block_group->free_space_ctl->free_space < | 7309 | block_group->free_space_ctl->free_space < |
@@ -7174,8 +7336,6 @@ unclustered_alloc: | |||
7174 | failed_alloc = true; | 7336 | failed_alloc = true; |
7175 | goto have_block_group; | 7337 | goto have_block_group; |
7176 | } else if (!offset) { | 7338 | } else if (!offset) { |
7177 | if (!cached) | ||
7178 | have_caching_bg = true; | ||
7179 | goto loop; | 7339 | goto loop; |
7180 | } | 7340 | } |
7181 | checks: | 7341 | checks: |
@@ -7232,7 +7392,20 @@ loop: | |||
7232 | */ | 7392 | */ |
7233 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { | 7393 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { |
7234 | index = 0; | 7394 | index = 0; |
7235 | loop++; | 7395 | if (loop == LOOP_CACHING_NOWAIT) { |
7396 | /* | ||
7397 | * We want to skip the LOOP_CACHING_WAIT step if we | ||
7398 | * don't have any unached bgs and we've alrelady done a | ||
7399 | * full search through. | ||
7400 | */ | ||
7401 | if (have_caching_bg || !full_search) | ||
7402 | loop = LOOP_CACHING_WAIT; | ||
7403 | else | ||
7404 | loop = LOOP_ALLOC_CHUNK; | ||
7405 | } else { | ||
7406 | loop++; | ||
7407 | } | ||
7408 | |||
7236 | if (loop == LOOP_ALLOC_CHUNK) { | 7409 | if (loop == LOOP_ALLOC_CHUNK) { |
7237 | struct btrfs_trans_handle *trans; | 7410 | struct btrfs_trans_handle *trans; |
7238 | int exist = 0; | 7411 | int exist = 0; |
@@ -7250,6 +7423,15 @@ loop: | |||
7250 | 7423 | ||
7251 | ret = do_chunk_alloc(trans, root, flags, | 7424 | ret = do_chunk_alloc(trans, root, flags, |
7252 | CHUNK_ALLOC_FORCE); | 7425 | CHUNK_ALLOC_FORCE); |
7426 | |||
7427 | /* | ||
7428 | * If we can't allocate a new chunk we've already looped | ||
7429 | * through at least once, move on to the NO_EMPTY_SIZE | ||
7430 | * case. | ||
7431 | */ | ||
7432 | if (ret == -ENOSPC) | ||
7433 | loop = LOOP_NO_EMPTY_SIZE; | ||
7434 | |||
7253 | /* | 7435 | /* |
7254 | * Do not bail out on ENOSPC since we | 7436 | * Do not bail out on ENOSPC since we |
7255 | * can do more things. | 7437 | * can do more things. |
@@ -7266,6 +7448,15 @@ loop: | |||
7266 | } | 7448 | } |
7267 | 7449 | ||
7268 | if (loop == LOOP_NO_EMPTY_SIZE) { | 7450 | if (loop == LOOP_NO_EMPTY_SIZE) { |
7451 | /* | ||
7452 | * Don't loop again if we already have no empty_size and | ||
7453 | * no empty_cluster. | ||
7454 | */ | ||
7455 | if (empty_size == 0 && | ||
7456 | empty_cluster == 0) { | ||
7457 | ret = -ENOSPC; | ||
7458 | goto out; | ||
7459 | } | ||
7269 | empty_size = 0; | 7460 | empty_size = 0; |
7270 | empty_cluster = 0; | 7461 | empty_cluster = 0; |
7271 | } | 7462 | } |
@@ -7274,11 +7465,20 @@ loop: | |||
7274 | } else if (!ins->objectid) { | 7465 | } else if (!ins->objectid) { |
7275 | ret = -ENOSPC; | 7466 | ret = -ENOSPC; |
7276 | } else if (ins->objectid) { | 7467 | } else if (ins->objectid) { |
7468 | if (!use_cluster && last_ptr) { | ||
7469 | spin_lock(&last_ptr->lock); | ||
7470 | last_ptr->window_start = ins->objectid; | ||
7471 | spin_unlock(&last_ptr->lock); | ||
7472 | } | ||
7277 | ret = 0; | 7473 | ret = 0; |
7278 | } | 7474 | } |
7279 | out: | 7475 | out: |
7280 | if (ret == -ENOSPC) | 7476 | if (ret == -ENOSPC) { |
7477 | spin_lock(&space_info->lock); | ||
7478 | space_info->max_extent_size = max_extent_size; | ||
7479 | spin_unlock(&space_info->lock); | ||
7281 | ins->offset = max_extent_size; | 7480 | ins->offset = max_extent_size; |
7481 | } | ||
7282 | return ret; | 7482 | return ret; |
7283 | } | 7483 | } |
7284 | 7484 | ||
@@ -7327,7 +7527,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, | |||
7327 | u64 empty_size, u64 hint_byte, | 7527 | u64 empty_size, u64 hint_byte, |
7328 | struct btrfs_key *ins, int is_data, int delalloc) | 7528 | struct btrfs_key *ins, int is_data, int delalloc) |
7329 | { | 7529 | { |
7330 | bool final_tried = false; | 7530 | bool final_tried = num_bytes == min_alloc_size; |
7331 | u64 flags; | 7531 | u64 flags; |
7332 | int ret; | 7532 | int ret; |
7333 | 7533 | ||
@@ -8929,7 +9129,7 @@ again: | |||
8929 | * back off and let this transaction commit | 9129 | * back off and let this transaction commit |
8930 | */ | 9130 | */ |
8931 | mutex_lock(&root->fs_info->ro_block_group_mutex); | 9131 | mutex_lock(&root->fs_info->ro_block_group_mutex); |
8932 | if (trans->transaction->dirty_bg_run) { | 9132 | if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) { |
8933 | u64 transid = trans->transid; | 9133 | u64 transid = trans->transid; |
8934 | 9134 | ||
8935 | mutex_unlock(&root->fs_info->ro_block_group_mutex); | 9135 | mutex_unlock(&root->fs_info->ro_block_group_mutex); |
@@ -9679,6 +9879,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
9679 | 9879 | ||
9680 | free_excluded_extents(root, cache); | 9880 | free_excluded_extents(root, cache); |
9681 | 9881 | ||
9882 | #ifdef CONFIG_BTRFS_DEBUG | ||
9883 | if (btrfs_should_fragment_free_space(root, cache)) { | ||
9884 | u64 new_bytes_used = size - bytes_used; | ||
9885 | |||
9886 | bytes_used += new_bytes_used >> 1; | ||
9887 | fragment_free_space(root, cache); | ||
9888 | } | ||
9889 | #endif | ||
9682 | /* | 9890 | /* |
9683 | * Call to ensure the corresponding space_info object is created and | 9891 | * Call to ensure the corresponding space_info object is created and |
9684 | * assigned to our block group, but don't update its counters just yet. | 9892 | * assigned to our block group, but don't update its counters just yet. |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 5a9431dda07f..0948d34cb84a 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -1730,7 +1730,7 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, | |||
1730 | */ | 1730 | */ |
1731 | static int search_bitmap(struct btrfs_free_space_ctl *ctl, | 1731 | static int search_bitmap(struct btrfs_free_space_ctl *ctl, |
1732 | struct btrfs_free_space *bitmap_info, u64 *offset, | 1732 | struct btrfs_free_space *bitmap_info, u64 *offset, |
1733 | u64 *bytes) | 1733 | u64 *bytes, bool for_alloc) |
1734 | { | 1734 | { |
1735 | unsigned long found_bits = 0; | 1735 | unsigned long found_bits = 0; |
1736 | unsigned long max_bits = 0; | 1736 | unsigned long max_bits = 0; |
@@ -1738,11 +1738,26 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl, | |||
1738 | unsigned long next_zero; | 1738 | unsigned long next_zero; |
1739 | unsigned long extent_bits; | 1739 | unsigned long extent_bits; |
1740 | 1740 | ||
1741 | /* | ||
1742 | * Skip searching the bitmap if we don't have a contiguous section that | ||
1743 | * is large enough for this allocation. | ||
1744 | */ | ||
1745 | if (for_alloc && | ||
1746 | bitmap_info->max_extent_size && | ||
1747 | bitmap_info->max_extent_size < *bytes) { | ||
1748 | *bytes = bitmap_info->max_extent_size; | ||
1749 | return -1; | ||
1750 | } | ||
1751 | |||
1741 | i = offset_to_bit(bitmap_info->offset, ctl->unit, | 1752 | i = offset_to_bit(bitmap_info->offset, ctl->unit, |
1742 | max_t(u64, *offset, bitmap_info->offset)); | 1753 | max_t(u64, *offset, bitmap_info->offset)); |
1743 | bits = bytes_to_bits(*bytes, ctl->unit); | 1754 | bits = bytes_to_bits(*bytes, ctl->unit); |
1744 | 1755 | ||
1745 | for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { | 1756 | for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { |
1757 | if (for_alloc && bits == 1) { | ||
1758 | found_bits = 1; | ||
1759 | break; | ||
1760 | } | ||
1746 | next_zero = find_next_zero_bit(bitmap_info->bitmap, | 1761 | next_zero = find_next_zero_bit(bitmap_info->bitmap, |
1747 | BITS_PER_BITMAP, i); | 1762 | BITS_PER_BITMAP, i); |
1748 | extent_bits = next_zero - i; | 1763 | extent_bits = next_zero - i; |
@@ -1762,6 +1777,7 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl, | |||
1762 | } | 1777 | } |
1763 | 1778 | ||
1764 | *bytes = (u64)(max_bits) * ctl->unit; | 1779 | *bytes = (u64)(max_bits) * ctl->unit; |
1780 | bitmap_info->max_extent_size = *bytes; | ||
1765 | return -1; | 1781 | return -1; |
1766 | } | 1782 | } |
1767 | 1783 | ||
@@ -1813,7 +1829,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, | |||
1813 | if (entry->bitmap) { | 1829 | if (entry->bitmap) { |
1814 | u64 size = *bytes; | 1830 | u64 size = *bytes; |
1815 | 1831 | ||
1816 | ret = search_bitmap(ctl, entry, &tmp, &size); | 1832 | ret = search_bitmap(ctl, entry, &tmp, &size, true); |
1817 | if (!ret) { | 1833 | if (!ret) { |
1818 | *offset = tmp; | 1834 | *offset = tmp; |
1819 | *bytes = size; | 1835 | *bytes = size; |
@@ -1874,7 +1890,8 @@ again: | |||
1874 | search_start = *offset; | 1890 | search_start = *offset; |
1875 | search_bytes = ctl->unit; | 1891 | search_bytes = ctl->unit; |
1876 | search_bytes = min(search_bytes, end - search_start + 1); | 1892 | search_bytes = min(search_bytes, end - search_start + 1); |
1877 | ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); | 1893 | ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, |
1894 | false); | ||
1878 | if (ret < 0 || search_start != *offset) | 1895 | if (ret < 0 || search_start != *offset) |
1879 | return -EINVAL; | 1896 | return -EINVAL; |
1880 | 1897 | ||
@@ -1919,7 +1936,7 @@ again: | |||
1919 | search_start = *offset; | 1936 | search_start = *offset; |
1920 | search_bytes = ctl->unit; | 1937 | search_bytes = ctl->unit; |
1921 | ret = search_bitmap(ctl, bitmap_info, &search_start, | 1938 | ret = search_bitmap(ctl, bitmap_info, &search_start, |
1922 | &search_bytes); | 1939 | &search_bytes, false); |
1923 | if (ret < 0 || search_start != *offset) | 1940 | if (ret < 0 || search_start != *offset) |
1924 | return -EAGAIN; | 1941 | return -EAGAIN; |
1925 | 1942 | ||
@@ -1943,6 +1960,12 @@ static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, | |||
1943 | 1960 | ||
1944 | bitmap_set_bits(ctl, info, offset, bytes_to_set); | 1961 | bitmap_set_bits(ctl, info, offset, bytes_to_set); |
1945 | 1962 | ||
1963 | /* | ||
1964 | * We set some bytes, we have no idea what the max extent size is | ||
1965 | * anymore. | ||
1966 | */ | ||
1967 | info->max_extent_size = 0; | ||
1968 | |||
1946 | return bytes_to_set; | 1969 | return bytes_to_set; |
1947 | 1970 | ||
1948 | } | 1971 | } |
@@ -1951,12 +1974,19 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl, | |||
1951 | struct btrfs_free_space *info) | 1974 | struct btrfs_free_space *info) |
1952 | { | 1975 | { |
1953 | struct btrfs_block_group_cache *block_group = ctl->private; | 1976 | struct btrfs_block_group_cache *block_group = ctl->private; |
1977 | bool forced = false; | ||
1978 | |||
1979 | #ifdef CONFIG_BTRFS_DEBUG | ||
1980 | if (btrfs_should_fragment_free_space(block_group->fs_info->extent_root, | ||
1981 | block_group)) | ||
1982 | forced = true; | ||
1983 | #endif | ||
1954 | 1984 | ||
1955 | /* | 1985 | /* |
1956 | * If we are below the extents threshold then we can add this as an | 1986 | * If we are below the extents threshold then we can add this as an |
1957 | * extent, and don't have to deal with the bitmap | 1987 | * extent, and don't have to deal with the bitmap |
1958 | */ | 1988 | */ |
1959 | if (ctl->free_extents < ctl->extents_thresh) { | 1989 | if (!forced && ctl->free_extents < ctl->extents_thresh) { |
1960 | /* | 1990 | /* |
1961 | * If this block group has some small extents we don't want to | 1991 | * If this block group has some small extents we don't want to |
1962 | * use up all of our free slots in the cache with them, we want | 1992 | * use up all of our free slots in the cache with them, we want |
@@ -2661,7 +2691,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, | |||
2661 | search_start = min_start; | 2691 | search_start = min_start; |
2662 | search_bytes = bytes; | 2692 | search_bytes = bytes; |
2663 | 2693 | ||
2664 | err = search_bitmap(ctl, entry, &search_start, &search_bytes); | 2694 | err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); |
2665 | if (err) { | 2695 | if (err) { |
2666 | if (search_bytes > *max_extent_size) | 2696 | if (search_bytes > *max_extent_size) |
2667 | *max_extent_size = search_bytes; | 2697 | *max_extent_size = search_bytes; |
@@ -2775,6 +2805,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, | |||
2775 | unsigned long want_bits; | 2805 | unsigned long want_bits; |
2776 | unsigned long min_bits; | 2806 | unsigned long min_bits; |
2777 | unsigned long found_bits; | 2807 | unsigned long found_bits; |
2808 | unsigned long max_bits = 0; | ||
2778 | unsigned long start = 0; | 2809 | unsigned long start = 0; |
2779 | unsigned long total_found = 0; | 2810 | unsigned long total_found = 0; |
2780 | int ret; | 2811 | int ret; |
@@ -2784,6 +2815,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, | |||
2784 | want_bits = bytes_to_bits(bytes, ctl->unit); | 2815 | want_bits = bytes_to_bits(bytes, ctl->unit); |
2785 | min_bits = bytes_to_bits(min_bytes, ctl->unit); | 2816 | min_bits = bytes_to_bits(min_bytes, ctl->unit); |
2786 | 2817 | ||
2818 | /* | ||
2819 | * Don't bother looking for a cluster in this bitmap if it's heavily | ||
2820 | * fragmented. | ||
2821 | */ | ||
2822 | if (entry->max_extent_size && | ||
2823 | entry->max_extent_size < cont1_bytes) | ||
2824 | return -ENOSPC; | ||
2787 | again: | 2825 | again: |
2788 | found_bits = 0; | 2826 | found_bits = 0; |
2789 | for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { | 2827 | for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { |
@@ -2791,13 +2829,19 @@ again: | |||
2791 | BITS_PER_BITMAP, i); | 2829 | BITS_PER_BITMAP, i); |
2792 | if (next_zero - i >= min_bits) { | 2830 | if (next_zero - i >= min_bits) { |
2793 | found_bits = next_zero - i; | 2831 | found_bits = next_zero - i; |
2832 | if (found_bits > max_bits) | ||
2833 | max_bits = found_bits; | ||
2794 | break; | 2834 | break; |
2795 | } | 2835 | } |
2836 | if (next_zero - i > max_bits) | ||
2837 | max_bits = next_zero - i; | ||
2796 | i = next_zero; | 2838 | i = next_zero; |
2797 | } | 2839 | } |
2798 | 2840 | ||
2799 | if (!found_bits) | 2841 | if (!found_bits) { |
2842 | entry->max_extent_size = (u64)max_bits * ctl->unit; | ||
2800 | return -ENOSPC; | 2843 | return -ENOSPC; |
2844 | } | ||
2801 | 2845 | ||
2802 | if (!total_found) { | 2846 | if (!total_found) { |
2803 | start = i; | 2847 | start = i; |
@@ -3056,6 +3100,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | |||
3056 | spin_lock_init(&cluster->refill_lock); | 3100 | spin_lock_init(&cluster->refill_lock); |
3057 | cluster->root = RB_ROOT; | 3101 | cluster->root = RB_ROOT; |
3058 | cluster->max_size = 0; | 3102 | cluster->max_size = 0; |
3103 | cluster->fragmented = false; | ||
3059 | INIT_LIST_HEAD(&cluster->block_group_list); | 3104 | INIT_LIST_HEAD(&cluster->block_group_list); |
3060 | cluster->block_group = NULL; | 3105 | cluster->block_group = NULL; |
3061 | } | 3106 | } |
@@ -3223,7 +3268,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, | |||
3223 | } | 3268 | } |
3224 | 3269 | ||
3225 | bytes = minlen; | 3270 | bytes = minlen; |
3226 | ret2 = search_bitmap(ctl, entry, &start, &bytes); | 3271 | ret2 = search_bitmap(ctl, entry, &start, &bytes, false); |
3227 | if (ret2 || start >= end) { | 3272 | if (ret2 || start >= end) { |
3228 | spin_unlock(&ctl->tree_lock); | 3273 | spin_unlock(&ctl->tree_lock); |
3229 | mutex_unlock(&ctl->cache_writeout_mutex); | 3274 | mutex_unlock(&ctl->cache_writeout_mutex); |
@@ -3376,7 +3421,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) | |||
3376 | u64 count = 1; | 3421 | u64 count = 1; |
3377 | int ret; | 3422 | int ret; |
3378 | 3423 | ||
3379 | ret = search_bitmap(ctl, entry, &offset, &count); | 3424 | ret = search_bitmap(ctl, entry, &offset, &count, true); |
3380 | /* Logic error; Should be empty if it can't find anything */ | 3425 | /* Logic error; Should be empty if it can't find anything */ |
3381 | ASSERT(!ret); | 3426 | ASSERT(!ret); |
3382 | 3427 | ||
@@ -3532,6 +3577,7 @@ again: | |||
3532 | spin_lock(&ctl->tree_lock); | 3577 | spin_lock(&ctl->tree_lock); |
3533 | info->offset = offset; | 3578 | info->offset = offset; |
3534 | info->bytes = bytes; | 3579 | info->bytes = bytes; |
3580 | info->max_extent_size = 0; | ||
3535 | ret = link_free_space(ctl, info); | 3581 | ret = link_free_space(ctl, info); |
3536 | spin_unlock(&ctl->tree_lock); | 3582 | spin_unlock(&ctl->tree_lock); |
3537 | if (ret) | 3583 | if (ret) |
@@ -3559,6 +3605,7 @@ again: | |||
3559 | } | 3605 | } |
3560 | 3606 | ||
3561 | bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); | 3607 | bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); |
3608 | |||
3562 | bytes -= bytes_added; | 3609 | bytes -= bytes_added; |
3563 | offset += bytes_added; | 3610 | offset += bytes_added; |
3564 | spin_unlock(&ctl->tree_lock); | 3611 | spin_unlock(&ctl->tree_lock); |
@@ -3602,7 +3649,7 @@ have_info: | |||
3602 | 3649 | ||
3603 | bit_off = offset; | 3650 | bit_off = offset; |
3604 | bit_bytes = ctl->unit; | 3651 | bit_bytes = ctl->unit; |
3605 | ret = search_bitmap(ctl, info, &bit_off, &bit_bytes); | 3652 | ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); |
3606 | if (!ret) { | 3653 | if (!ret) { |
3607 | if (bit_off == offset) { | 3654 | if (bit_off == offset) { |
3608 | ret = 1; | 3655 | ret = 1; |
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index a16a029ad3b1..f251865eb6f3 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h | |||
@@ -23,6 +23,7 @@ struct btrfs_free_space { | |||
23 | struct rb_node offset_index; | 23 | struct rb_node offset_index; |
24 | u64 offset; | 24 | u64 offset; |
25 | u64 bytes; | 25 | u64 bytes; |
26 | u64 max_extent_size; | ||
26 | unsigned long *bitmap; | 27 | unsigned long *bitmap; |
27 | struct list_head list; | 28 | struct list_head list; |
28 | }; | 29 | }; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index df6b93f6b393..a018e4707dac 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -9745,6 +9745,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
9745 | u64 cur_offset = start; | 9745 | u64 cur_offset = start; |
9746 | u64 i_size; | 9746 | u64 i_size; |
9747 | u64 cur_bytes; | 9747 | u64 cur_bytes; |
9748 | u64 last_alloc = (u64)-1; | ||
9748 | int ret = 0; | 9749 | int ret = 0; |
9749 | bool own_trans = true; | 9750 | bool own_trans = true; |
9750 | 9751 | ||
@@ -9761,6 +9762,13 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
9761 | 9762 | ||
9762 | cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); | 9763 | cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); |
9763 | cur_bytes = max(cur_bytes, min_size); | 9764 | cur_bytes = max(cur_bytes, min_size); |
9765 | /* | ||
9766 | * If we are severely fragmented we could end up with really | ||
9767 | * small allocations, so if the allocator is returning small | ||
9768 | * chunks lets make its job easier by only searching for those | ||
9769 | * sized chunks. | ||
9770 | */ | ||
9771 | cur_bytes = min(cur_bytes, last_alloc); | ||
9764 | ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, | 9772 | ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, |
9765 | *alloc_hint, &ins, 1, 0); | 9773 | *alloc_hint, &ins, 1, 0); |
9766 | if (ret) { | 9774 | if (ret) { |
@@ -9769,6 +9777,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
9769 | break; | 9777 | break; |
9770 | } | 9778 | } |
9771 | 9779 | ||
9780 | last_alloc = ins.offset; | ||
9772 | ret = insert_reserved_file_extent(trans, inode, | 9781 | ret = insert_reserved_file_extent(trans, inode, |
9773 | cur_offset, ins.objectid, | 9782 | cur_offset, ins.objectid, |
9774 | ins.offset, ins.offset, | 9783 | ins.offset, ins.offset, |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 071005f008c1..8c27292ea9ea 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -490,15 +490,16 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, | |||
490 | 490 | ||
491 | spin_lock_irq(&log->log_extents_lock[index]); | 491 | spin_lock_irq(&log->log_extents_lock[index]); |
492 | while (!list_empty(&log->logged_list[index])) { | 492 | while (!list_empty(&log->logged_list[index])) { |
493 | struct inode *inode; | ||
493 | ordered = list_first_entry(&log->logged_list[index], | 494 | ordered = list_first_entry(&log->logged_list[index], |
494 | struct btrfs_ordered_extent, | 495 | struct btrfs_ordered_extent, |
495 | log_list); | 496 | log_list); |
496 | list_del_init(&ordered->log_list); | 497 | list_del_init(&ordered->log_list); |
498 | inode = ordered->inode; | ||
497 | spin_unlock_irq(&log->log_extents_lock[index]); | 499 | spin_unlock_irq(&log->log_extents_lock[index]); |
498 | 500 | ||
499 | if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && | 501 | if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && |
500 | !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { | 502 | !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { |
501 | struct inode *inode = ordered->inode; | ||
502 | u64 start = ordered->file_offset; | 503 | u64 start = ordered->file_offset; |
503 | u64 end = ordered->file_offset + ordered->len - 1; | 504 | u64 end = ordered->file_offset + ordered->len - 1; |
504 | 505 | ||
@@ -509,20 +510,25 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, | |||
509 | &ordered->flags)); | 510 | &ordered->flags)); |
510 | 511 | ||
511 | /* | 512 | /* |
512 | * If our ordered extent completed it means it updated the | 513 | * In order to keep us from losing our ordered extent |
513 | * fs/subvol and csum trees already, so no need to make the | 514 | * information when committing the transaction we have to make |
514 | * current transaction's commit wait for it, as we end up | 515 | * sure that any logged extents are completed when we go to |
515 | * holding memory unnecessarily and delaying the inode's iput | 516 | * commit the transaction. To do this we simply increase the |
516 | * until the transaction commit (we schedule an iput for the | 517 | * current transactions pending_ordered counter and decrement it |
517 | * inode when the ordered extent's refcount drops to 0), which | 518 | * when the ordered extent completes. |
518 | * prevents it from being evictable until the transaction | ||
519 | * commits. | ||
520 | */ | 519 | */ |
521 | if (test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) | 520 | if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) { |
522 | btrfs_put_ordered_extent(ordered); | 521 | struct btrfs_ordered_inode_tree *tree; |
523 | else | 522 | |
524 | list_add_tail(&ordered->trans_list, &trans->ordered); | 523 | tree = &BTRFS_I(inode)->ordered_tree; |
525 | 524 | spin_lock_irq(&tree->lock); | |
525 | if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) { | ||
526 | set_bit(BTRFS_ORDERED_PENDING, &ordered->flags); | ||
527 | atomic_inc(&trans->transaction->pending_ordered); | ||
528 | } | ||
529 | spin_unlock_irq(&tree->lock); | ||
530 | } | ||
531 | btrfs_put_ordered_extent(ordered); | ||
526 | spin_lock_irq(&log->log_extents_lock[index]); | 532 | spin_lock_irq(&log->log_extents_lock[index]); |
527 | } | 533 | } |
528 | spin_unlock_irq(&log->log_extents_lock[index]); | 534 | spin_unlock_irq(&log->log_extents_lock[index]); |
@@ -584,6 +590,7 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
584 | struct btrfs_ordered_inode_tree *tree; | 590 | struct btrfs_ordered_inode_tree *tree; |
585 | struct btrfs_root *root = BTRFS_I(inode)->root; | 591 | struct btrfs_root *root = BTRFS_I(inode)->root; |
586 | struct rb_node *node; | 592 | struct rb_node *node; |
593 | bool dec_pending_ordered = false; | ||
587 | 594 | ||
588 | tree = &BTRFS_I(inode)->ordered_tree; | 595 | tree = &BTRFS_I(inode)->ordered_tree; |
589 | spin_lock_irq(&tree->lock); | 596 | spin_lock_irq(&tree->lock); |
@@ -593,8 +600,37 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
593 | if (tree->last == node) | 600 | if (tree->last == node) |
594 | tree->last = NULL; | 601 | tree->last = NULL; |
595 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 602 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
603 | if (test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags)) | ||
604 | dec_pending_ordered = true; | ||
596 | spin_unlock_irq(&tree->lock); | 605 | spin_unlock_irq(&tree->lock); |
597 | 606 | ||
607 | /* | ||
608 | * The current running transaction is waiting on us, we need to let it | ||
609 | * know that we're complete and wake it up. | ||
610 | */ | ||
611 | if (dec_pending_ordered) { | ||
612 | struct btrfs_transaction *trans; | ||
613 | |||
614 | /* | ||
615 | * The checks for trans are just a formality, it should be set, | ||
616 | * but if it isn't we don't want to deref/assert under the spin | ||
617 | * lock, so be nice and check if trans is set, but ASSERT() so | ||
618 | * if it isn't set a developer will notice. | ||
619 | */ | ||
620 | spin_lock(&root->fs_info->trans_lock); | ||
621 | trans = root->fs_info->running_transaction; | ||
622 | if (trans) | ||
623 | atomic_inc(&trans->use_count); | ||
624 | spin_unlock(&root->fs_info->trans_lock); | ||
625 | |||
626 | ASSERT(trans); | ||
627 | if (trans) { | ||
628 | if (atomic_dec_and_test(&trans->pending_ordered)) | ||
629 | wake_up(&trans->pending_wait); | ||
630 | btrfs_put_transaction(trans); | ||
631 | } | ||
632 | } | ||
633 | |||
598 | spin_lock(&root->ordered_extent_lock); | 634 | spin_lock(&root->ordered_extent_lock); |
599 | list_del_init(&entry->root_extent_list); | 635 | list_del_init(&entry->root_extent_list); |
600 | root->nr_ordered_extents--; | 636 | root->nr_ordered_extents--; |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 7176cc0fe43f..23c96059cef2 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -73,6 +73,8 @@ struct btrfs_ordered_sum { | |||
73 | 73 | ||
74 | #define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent | 74 | #define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent |
75 | * in the logging code. */ | 75 | * in the logging code. */ |
76 | #define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to | ||
77 | * complete in the current transaction. */ | ||
76 | struct btrfs_ordered_extent { | 78 | struct btrfs_ordered_extent { |
77 | /* logical offset in the file */ | 79 | /* logical offset in the file */ |
78 | u64 file_offset; | 80 | u64 file_offset; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b23d49daa1a2..24154e422945 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -303,6 +303,9 @@ enum { | |||
303 | Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard, | 303 | Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard, |
304 | Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow, | 304 | Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow, |
305 | Opt_datasum, Opt_treelog, Opt_noinode_cache, | 305 | Opt_datasum, Opt_treelog, Opt_noinode_cache, |
306 | #ifdef CONFIG_BTRFS_DEBUG | ||
307 | Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all, | ||
308 | #endif | ||
306 | Opt_err, | 309 | Opt_err, |
307 | }; | 310 | }; |
308 | 311 | ||
@@ -355,6 +358,11 @@ static match_table_t tokens = { | |||
355 | {Opt_rescan_uuid_tree, "rescan_uuid_tree"}, | 358 | {Opt_rescan_uuid_tree, "rescan_uuid_tree"}, |
356 | {Opt_fatal_errors, "fatal_errors=%s"}, | 359 | {Opt_fatal_errors, "fatal_errors=%s"}, |
357 | {Opt_commit_interval, "commit=%d"}, | 360 | {Opt_commit_interval, "commit=%d"}, |
361 | #ifdef CONFIG_BTRFS_DEBUG | ||
362 | {Opt_fragment_data, "fragment=data"}, | ||
363 | {Opt_fragment_metadata, "fragment=metadata"}, | ||
364 | {Opt_fragment_all, "fragment=all"}, | ||
365 | #endif | ||
358 | {Opt_err, NULL}, | 366 | {Opt_err, NULL}, |
359 | }; | 367 | }; |
360 | 368 | ||
@@ -721,6 +729,22 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
721 | info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; | 729 | info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; |
722 | } | 730 | } |
723 | break; | 731 | break; |
732 | #ifdef CONFIG_BTRFS_DEBUG | ||
733 | case Opt_fragment_all: | ||
734 | btrfs_info(root->fs_info, "fragmenting all space"); | ||
735 | btrfs_set_opt(info->mount_opt, FRAGMENT_DATA); | ||
736 | btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA); | ||
737 | break; | ||
738 | case Opt_fragment_metadata: | ||
739 | btrfs_info(root->fs_info, "fragmenting metadata"); | ||
740 | btrfs_set_opt(info->mount_opt, | ||
741 | FRAGMENT_METADATA); | ||
742 | break; | ||
743 | case Opt_fragment_data: | ||
744 | btrfs_info(root->fs_info, "fragmenting data"); | ||
745 | btrfs_set_opt(info->mount_opt, FRAGMENT_DATA); | ||
746 | break; | ||
747 | #endif | ||
724 | case Opt_err: | 748 | case Opt_err: |
725 | btrfs_info(root->fs_info, "unrecognized mount option '%s'", p); | 749 | btrfs_info(root->fs_info, "unrecognized mount option '%s'", p); |
726 | ret = -EINVAL; | 750 | ret = -EINVAL; |
@@ -1172,6 +1196,12 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) | |||
1172 | seq_puts(seq, ",fatal_errors=panic"); | 1196 | seq_puts(seq, ",fatal_errors=panic"); |
1173 | if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) | 1197 | if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) |
1174 | seq_printf(seq, ",commit=%d", info->commit_interval); | 1198 | seq_printf(seq, ",commit=%d", info->commit_interval); |
1199 | #ifdef CONFIG_BTRFS_DEBUG | ||
1200 | if (btrfs_test_opt(root, FRAGMENT_DATA)) | ||
1201 | seq_puts(seq, ",fragment=data"); | ||
1202 | if (btrfs_test_opt(root, FRAGMENT_METADATA)) | ||
1203 | seq_puts(seq, ",fragment=metadata"); | ||
1204 | #endif | ||
1175 | seq_printf(seq, ",subvolid=%llu", | 1205 | seq_printf(seq, ",subvolid=%llu", |
1176 | BTRFS_I(d_inode(dentry))->root->root_key.objectid); | 1206 | BTRFS_I(d_inode(dentry))->root->root_key.objectid); |
1177 | seq_puts(seq, ",subvol="); | 1207 | seq_puts(seq, ",subvol="); |
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index 2299bfde39ee..c8c3d70c31ff 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include "btrfs-tests.h" | 20 | #include "btrfs-tests.h" |
21 | #include "../ctree.h" | 21 | #include "../ctree.h" |
22 | #include "../disk-io.h" | ||
22 | #include "../free-space-cache.h" | 23 | #include "../free-space-cache.h" |
23 | 24 | ||
24 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) | 25 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) |
@@ -35,6 +36,12 @@ static struct btrfs_block_group_cache *init_test_block_group(void) | |||
35 | kfree(cache); | 36 | kfree(cache); |
36 | return NULL; | 37 | return NULL; |
37 | } | 38 | } |
39 | cache->fs_info = btrfs_alloc_dummy_fs_info(); | ||
40 | if (!cache->fs_info) { | ||
41 | kfree(cache->free_space_ctl); | ||
42 | kfree(cache); | ||
43 | return NULL; | ||
44 | } | ||
38 | 45 | ||
39 | cache->key.objectid = 0; | 46 | cache->key.objectid = 0; |
40 | cache->key.offset = 1024 * 1024 * 1024; | 47 | cache->key.offset = 1024 * 1024 * 1024; |
@@ -879,7 +886,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) | |||
879 | int btrfs_test_free_space_cache(void) | 886 | int btrfs_test_free_space_cache(void) |
880 | { | 887 | { |
881 | struct btrfs_block_group_cache *cache; | 888 | struct btrfs_block_group_cache *cache; |
882 | int ret; | 889 | struct btrfs_root *root = NULL; |
890 | int ret = -ENOMEM; | ||
883 | 891 | ||
884 | test_msg("Running btrfs free space cache tests\n"); | 892 | test_msg("Running btrfs free space cache tests\n"); |
885 | 893 | ||
@@ -889,6 +897,17 @@ int btrfs_test_free_space_cache(void) | |||
889 | return 0; | 897 | return 0; |
890 | } | 898 | } |
891 | 899 | ||
900 | root = btrfs_alloc_dummy_root(); | ||
901 | if (!root) | ||
902 | goto out; | ||
903 | |||
904 | root->fs_info = btrfs_alloc_dummy_fs_info(); | ||
905 | if (!root->fs_info) | ||
906 | goto out; | ||
907 | |||
908 | root->fs_info->extent_root = root; | ||
909 | cache->fs_info = root->fs_info; | ||
910 | |||
892 | ret = test_extents(cache); | 911 | ret = test_extents(cache); |
893 | if (ret) | 912 | if (ret) |
894 | goto out; | 913 | goto out; |
@@ -904,6 +923,7 @@ out: | |||
904 | __btrfs_remove_free_space_cache(cache->free_space_ctl); | 923 | __btrfs_remove_free_space_cache(cache->free_space_ctl); |
905 | kfree(cache->free_space_ctl); | 924 | kfree(cache->free_space_ctl); |
906 | kfree(cache); | 925 | kfree(cache); |
926 | btrfs_free_dummy_root(root); | ||
907 | test_msg("Free space cache tests finished\n"); | 927 | test_msg("Free space cache tests finished\n"); |
908 | return ret; | 928 | return ret; |
909 | } | 929 | } |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e377d7bb454e..418c6a2ad7d8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -232,15 +232,16 @@ loop: | |||
232 | extwriter_counter_init(cur_trans, type); | 232 | extwriter_counter_init(cur_trans, type); |
233 | init_waitqueue_head(&cur_trans->writer_wait); | 233 | init_waitqueue_head(&cur_trans->writer_wait); |
234 | init_waitqueue_head(&cur_trans->commit_wait); | 234 | init_waitqueue_head(&cur_trans->commit_wait); |
235 | init_waitqueue_head(&cur_trans->pending_wait); | ||
235 | cur_trans->state = TRANS_STATE_RUNNING; | 236 | cur_trans->state = TRANS_STATE_RUNNING; |
236 | /* | 237 | /* |
237 | * One for this trans handle, one so it will live on until we | 238 | * One for this trans handle, one so it will live on until we |
238 | * commit the transaction. | 239 | * commit the transaction. |
239 | */ | 240 | */ |
240 | atomic_set(&cur_trans->use_count, 2); | 241 | atomic_set(&cur_trans->use_count, 2); |
241 | cur_trans->have_free_bgs = 0; | 242 | atomic_set(&cur_trans->pending_ordered, 0); |
243 | cur_trans->flags = 0; | ||
242 | cur_trans->start_time = get_seconds(); | 244 | cur_trans->start_time = get_seconds(); |
243 | cur_trans->dirty_bg_run = 0; | ||
244 | 245 | ||
245 | memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs)); | 246 | memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs)); |
246 | 247 | ||
@@ -266,7 +267,6 @@ loop: | |||
266 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 267 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
267 | INIT_LIST_HEAD(&cur_trans->pending_chunks); | 268 | INIT_LIST_HEAD(&cur_trans->pending_chunks); |
268 | INIT_LIST_HEAD(&cur_trans->switch_commits); | 269 | INIT_LIST_HEAD(&cur_trans->switch_commits); |
269 | INIT_LIST_HEAD(&cur_trans->pending_ordered); | ||
270 | INIT_LIST_HEAD(&cur_trans->dirty_bgs); | 270 | INIT_LIST_HEAD(&cur_trans->dirty_bgs); |
271 | INIT_LIST_HEAD(&cur_trans->io_bgs); | 271 | INIT_LIST_HEAD(&cur_trans->io_bgs); |
272 | INIT_LIST_HEAD(&cur_trans->dropped_roots); | 272 | INIT_LIST_HEAD(&cur_trans->dropped_roots); |
@@ -549,7 +549,6 @@ again: | |||
549 | h->can_flush_pending_bgs = true; | 549 | h->can_flush_pending_bgs = true; |
550 | INIT_LIST_HEAD(&h->qgroup_ref_list); | 550 | INIT_LIST_HEAD(&h->qgroup_ref_list); |
551 | INIT_LIST_HEAD(&h->new_bgs); | 551 | INIT_LIST_HEAD(&h->new_bgs); |
552 | INIT_LIST_HEAD(&h->ordered); | ||
553 | 552 | ||
554 | smp_mb(); | 553 | smp_mb(); |
555 | if (cur_trans->state >= TRANS_STATE_BLOCKED && | 554 | if (cur_trans->state >= TRANS_STATE_BLOCKED && |
@@ -780,12 +779,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
780 | if (!list_empty(&trans->new_bgs)) | 779 | if (!list_empty(&trans->new_bgs)) |
781 | btrfs_create_pending_block_groups(trans, root); | 780 | btrfs_create_pending_block_groups(trans, root); |
782 | 781 | ||
783 | if (!list_empty(&trans->ordered)) { | ||
784 | spin_lock(&info->trans_lock); | ||
785 | list_splice_init(&trans->ordered, &cur_trans->pending_ordered); | ||
786 | spin_unlock(&info->trans_lock); | ||
787 | } | ||
788 | |||
789 | trans->delayed_ref_updates = 0; | 782 | trans->delayed_ref_updates = 0; |
790 | if (!trans->sync) { | 783 | if (!trans->sync) { |
791 | must_run_delayed_refs = | 784 | must_run_delayed_refs = |
@@ -1776,25 +1769,10 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) | |||
1776 | } | 1769 | } |
1777 | 1770 | ||
1778 | static inline void | 1771 | static inline void |
1779 | btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans, | 1772 | btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans) |
1780 | struct btrfs_fs_info *fs_info) | ||
1781 | { | 1773 | { |
1782 | struct btrfs_ordered_extent *ordered; | 1774 | wait_event(cur_trans->pending_wait, |
1783 | 1775 | atomic_read(&cur_trans->pending_ordered) == 0); | |
1784 | spin_lock(&fs_info->trans_lock); | ||
1785 | while (!list_empty(&cur_trans->pending_ordered)) { | ||
1786 | ordered = list_first_entry(&cur_trans->pending_ordered, | ||
1787 | struct btrfs_ordered_extent, | ||
1788 | trans_list); | ||
1789 | list_del_init(&ordered->trans_list); | ||
1790 | spin_unlock(&fs_info->trans_lock); | ||
1791 | |||
1792 | wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE, | ||
1793 | &ordered->flags)); | ||
1794 | btrfs_put_ordered_extent(ordered); | ||
1795 | spin_lock(&fs_info->trans_lock); | ||
1796 | } | ||
1797 | spin_unlock(&fs_info->trans_lock); | ||
1798 | } | 1776 | } |
1799 | 1777 | ||
1800 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 1778 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
@@ -1842,7 +1820,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1842 | return ret; | 1820 | return ret; |
1843 | } | 1821 | } |
1844 | 1822 | ||
1845 | if (!cur_trans->dirty_bg_run) { | 1823 | if (!test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &cur_trans->flags)) { |
1846 | int run_it = 0; | 1824 | int run_it = 0; |
1847 | 1825 | ||
1848 | /* this mutex is also taken before trying to set | 1826 | /* this mutex is also taken before trying to set |
@@ -1851,18 +1829,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1851 | * after a extents from that block group have been | 1829 | * after a extents from that block group have been |
1852 | * allocated for cache files. btrfs_set_block_group_ro | 1830 | * allocated for cache files. btrfs_set_block_group_ro |
1853 | * will wait for the transaction to commit if it | 1831 | * will wait for the transaction to commit if it |
1854 | * finds dirty_bg_run = 1 | 1832 | * finds BTRFS_TRANS_DIRTY_BG_RUN set. |
1855 | * | 1833 | * |
1856 | * The dirty_bg_run flag is also used to make sure only | 1834 | * The BTRFS_TRANS_DIRTY_BG_RUN flag is also used to make sure |
1857 | * one process starts all the block group IO. It wouldn't | 1835 | * only one process starts all the block group IO. It wouldn't |
1858 | * hurt to have more than one go through, but there's no | 1836 | * hurt to have more than one go through, but there's no |
1859 | * real advantage to it either. | 1837 | * real advantage to it either. |
1860 | */ | 1838 | */ |
1861 | mutex_lock(&root->fs_info->ro_block_group_mutex); | 1839 | mutex_lock(&root->fs_info->ro_block_group_mutex); |
1862 | if (!cur_trans->dirty_bg_run) { | 1840 | if (!test_and_set_bit(BTRFS_TRANS_DIRTY_BG_RUN, |
1841 | &cur_trans->flags)) | ||
1863 | run_it = 1; | 1842 | run_it = 1; |
1864 | cur_trans->dirty_bg_run = 1; | ||
1865 | } | ||
1866 | mutex_unlock(&root->fs_info->ro_block_group_mutex); | 1843 | mutex_unlock(&root->fs_info->ro_block_group_mutex); |
1867 | 1844 | ||
1868 | if (run_it) | 1845 | if (run_it) |
@@ -1874,7 +1851,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1874 | } | 1851 | } |
1875 | 1852 | ||
1876 | spin_lock(&root->fs_info->trans_lock); | 1853 | spin_lock(&root->fs_info->trans_lock); |
1877 | list_splice_init(&trans->ordered, &cur_trans->pending_ordered); | ||
1878 | if (cur_trans->state >= TRANS_STATE_COMMIT_START) { | 1854 | if (cur_trans->state >= TRANS_STATE_COMMIT_START) { |
1879 | spin_unlock(&root->fs_info->trans_lock); | 1855 | spin_unlock(&root->fs_info->trans_lock); |
1880 | atomic_inc(&cur_trans->use_count); | 1856 | atomic_inc(&cur_trans->use_count); |
@@ -1933,7 +1909,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1933 | 1909 | ||
1934 | btrfs_wait_delalloc_flush(root->fs_info); | 1910 | btrfs_wait_delalloc_flush(root->fs_info); |
1935 | 1911 | ||
1936 | btrfs_wait_pending_ordered(cur_trans, root->fs_info); | 1912 | btrfs_wait_pending_ordered(cur_trans); |
1937 | 1913 | ||
1938 | btrfs_scrub_pause(root); | 1914 | btrfs_scrub_pause(root); |
1939 | /* | 1915 | /* |
@@ -2133,7 +2109,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
2133 | 2109 | ||
2134 | btrfs_finish_extent_commit(trans, root); | 2110 | btrfs_finish_extent_commit(trans, root); |
2135 | 2111 | ||
2136 | if (cur_trans->have_free_bgs) | 2112 | if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags)) |
2137 | btrfs_clear_space_info_full(root->fs_info); | 2113 | btrfs_clear_space_info_full(root->fs_info); |
2138 | 2114 | ||
2139 | root->fs_info->last_trans_committed = cur_trans->transid; | 2115 | root->fs_info->last_trans_committed = cur_trans->transid; |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 54b7dea74967..b05b2f64d913 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -32,6 +32,10 @@ enum btrfs_trans_state { | |||
32 | TRANS_STATE_MAX = 6, | 32 | TRANS_STATE_MAX = 6, |
33 | }; | 33 | }; |
34 | 34 | ||
35 | #define BTRFS_TRANS_HAVE_FREE_BGS 0 | ||
36 | #define BTRFS_TRANS_DIRTY_BG_RUN 1 | ||
37 | #define BTRFS_TRANS_CACHE_ENOSPC 2 | ||
38 | |||
35 | struct btrfs_transaction { | 39 | struct btrfs_transaction { |
36 | u64 transid; | 40 | u64 transid; |
37 | /* | 41 | /* |
@@ -46,11 +50,9 @@ struct btrfs_transaction { | |||
46 | */ | 50 | */ |
47 | atomic_t num_writers; | 51 | atomic_t num_writers; |
48 | atomic_t use_count; | 52 | atomic_t use_count; |
53 | atomic_t pending_ordered; | ||
49 | 54 | ||
50 | /* | 55 | unsigned long flags; |
51 | * true if there is free bgs operations in this transaction | ||
52 | */ | ||
53 | int have_free_bgs; | ||
54 | 56 | ||
55 | /* Be protected by fs_info->trans_lock when we want to change it. */ | 57 | /* Be protected by fs_info->trans_lock when we want to change it. */ |
56 | enum btrfs_trans_state state; | 58 | enum btrfs_trans_state state; |
@@ -59,9 +61,9 @@ struct btrfs_transaction { | |||
59 | unsigned long start_time; | 61 | unsigned long start_time; |
60 | wait_queue_head_t writer_wait; | 62 | wait_queue_head_t writer_wait; |
61 | wait_queue_head_t commit_wait; | 63 | wait_queue_head_t commit_wait; |
64 | wait_queue_head_t pending_wait; | ||
62 | struct list_head pending_snapshots; | 65 | struct list_head pending_snapshots; |
63 | struct list_head pending_chunks; | 66 | struct list_head pending_chunks; |
64 | struct list_head pending_ordered; | ||
65 | struct list_head switch_commits; | 67 | struct list_head switch_commits; |
66 | struct list_head dirty_bgs; | 68 | struct list_head dirty_bgs; |
67 | struct list_head io_bgs; | 69 | struct list_head io_bgs; |
@@ -80,7 +82,6 @@ struct btrfs_transaction { | |||
80 | spinlock_t dropped_roots_lock; | 82 | spinlock_t dropped_roots_lock; |
81 | struct btrfs_delayed_ref_root delayed_refs; | 83 | struct btrfs_delayed_ref_root delayed_refs; |
82 | int aborted; | 84 | int aborted; |
83 | int dirty_bg_run; | ||
84 | }; | 85 | }; |
85 | 86 | ||
86 | #define __TRANS_FREEZABLE (1U << 0) | 87 | #define __TRANS_FREEZABLE (1U << 0) |
@@ -128,7 +129,6 @@ struct btrfs_trans_handle { | |||
128 | */ | 129 | */ |
129 | struct btrfs_root *root; | 130 | struct btrfs_root *root; |
130 | struct seq_list delayed_ref_elem; | 131 | struct seq_list delayed_ref_elem; |
131 | struct list_head ordered; | ||
132 | struct list_head qgroup_ref_list; | 132 | struct list_head qgroup_ref_list; |
133 | struct list_head new_bgs; | 133 | struct list_head new_bgs; |
134 | }; | 134 | }; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e875b6cc1e20..f86d83805b44 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -1462,7 +1462,7 @@ again: | |||
1462 | btrfs_std_error(root->fs_info, ret, | 1462 | btrfs_std_error(root->fs_info, ret, |
1463 | "Failed to remove dev extent item"); | 1463 | "Failed to remove dev extent item"); |
1464 | } else { | 1464 | } else { |
1465 | trans->transaction->have_free_bgs = 1; | 1465 | set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags); |
1466 | } | 1466 | } |
1467 | out: | 1467 | out: |
1468 | btrfs_free_path(path); | 1468 | btrfs_free_path(path); |