aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2015-09-23 14:54:14 -0400
committerChris Mason <clm@fb.com>2015-10-21 21:51:43 -0400
commitd0bd456074dca089579818312da7cbe726ad2ff9 (patch)
tree6dcb3498d7b964a19562dccf8322483359ede3ef
parentd9ee522ba3ab51b7e3c6dfcf3743216371bc810f (diff)
Btrfs: add fragment=* debug mount option
In tracking down these weird bitmap problems it was helpful to artificially create an extremely fragmented file system. These mount options let us either fragment data or metadata or both. With these options I could reproduce all sorts of weird latencies and hangs that occur under extreme fragmentation and get them fixed. Thanks, Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/ctree.h14
-rw-r--r--fs/btrfs/extent-tree.c82
-rw-r--r--fs/btrfs/free-space-cache.c9
-rw-r--r--fs/btrfs/super.c30
-rw-r--r--fs/btrfs/tests/free-space-tests.c22
5 files changed, 150 insertions, 7 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 49bc792108b9..16384231db82 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2145,6 +2145,8 @@ struct btrfs_ioctl_defrag_range_args {
2145#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) 2145#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
2146#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) 2146#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
2147#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) 2147#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23)
2148#define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24)
2149#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
2148 2150
2149#define BTRFS_DEFAULT_COMMIT_INTERVAL (30) 2151#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
2150#define BTRFS_DEFAULT_MAX_INLINE (8192) 2152#define BTRFS_DEFAULT_MAX_INLINE (8192)
@@ -2169,6 +2171,18 @@ struct btrfs_ioctl_defrag_range_args {
2169 btrfs_clear_opt(root->fs_info->mount_opt, opt); \ 2171 btrfs_clear_opt(root->fs_info->mount_opt, opt); \
2170} 2172}
2171 2173
2174#ifdef CONFIG_BTRFS_DEBUG
2175static inline int
2176btrfs_should_fragment_free_space(struct btrfs_root *root,
2177 struct btrfs_block_group_cache *block_group)
2178{
2179 return (btrfs_test_opt(root, FRAGMENT_METADATA) &&
2180 block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
2181 (btrfs_test_opt(root, FRAGMENT_DATA) &&
2182 block_group->flags & BTRFS_BLOCK_GROUP_DATA);
2183}
2184#endif
2185
2172/* 2186/*
2173 * Requests for changes that need to be done during transaction commit. 2187 * Requests for changes that need to be done during transaction commit.
2174 * 2188 *
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2df4bc77f5b4..0e32abf53b5b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -332,6 +332,27 @@ static void put_caching_control(struct btrfs_caching_control *ctl)
332 kfree(ctl); 332 kfree(ctl);
333} 333}
334 334
335#ifdef CONFIG_BTRFS_DEBUG
336static void fragment_free_space(struct btrfs_root *root,
337 struct btrfs_block_group_cache *block_group)
338{
339 u64 start = block_group->key.objectid;
340 u64 len = block_group->key.offset;
341 u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
342 root->nodesize : root->sectorsize;
343 u64 step = chunk << 1;
344
345 while (len > chunk) {
346 btrfs_remove_free_space(block_group, start, chunk);
347 start += step;
348 if (len < step)
349 len = 0;
350 else
351 len -= step;
352 }
353}
354#endif
355
335/* 356/*
336 * this is only called by cache_block_group, since we could have freed extents 357 * this is only called by cache_block_group, since we could have freed extents
337 * we need to check the pinned_extents for any extents that can't be used yet 358 * we need to check the pinned_extents for any extents that can't be used yet
@@ -388,6 +409,7 @@ static noinline void caching_thread(struct btrfs_work *work)
388 u64 last = 0; 409 u64 last = 0;
389 u32 nritems; 410 u32 nritems;
390 int ret = -ENOMEM; 411 int ret = -ENOMEM;
412 bool wakeup = true;
391 413
392 caching_ctl = container_of(work, struct btrfs_caching_control, work); 414 caching_ctl = container_of(work, struct btrfs_caching_control, work);
393 block_group = caching_ctl->block_group; 415 block_group = caching_ctl->block_group;
@@ -400,6 +422,15 @@ static noinline void caching_thread(struct btrfs_work *work)
400 422
401 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 423 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
402 424
425#ifdef CONFIG_BTRFS_DEBUG
426 /*
427 * If we're fragmenting we don't want to make anybody think we can
428 * allocate from this block group until we've had a chance to fragment
429 * the free space.
430 */
431 if (btrfs_should_fragment_free_space(extent_root, block_group))
432 wakeup = false;
433#endif
403 /* 434 /*
404 * We don't want to deadlock with somebody trying to allocate a new 435 * We don't want to deadlock with somebody trying to allocate a new
405 * extent for the extent root while also trying to search the extent 436 * extent for the extent root while also trying to search the extent
@@ -441,7 +472,8 @@ next:
441 472
442 if (need_resched() || 473 if (need_resched() ||
443 rwsem_is_contended(&fs_info->commit_root_sem)) { 474 rwsem_is_contended(&fs_info->commit_root_sem)) {
444 caching_ctl->progress = last; 475 if (wakeup)
476 caching_ctl->progress = last;
445 btrfs_release_path(path); 477 btrfs_release_path(path);
446 up_read(&fs_info->commit_root_sem); 478 up_read(&fs_info->commit_root_sem);
447 mutex_unlock(&caching_ctl->mutex); 479 mutex_unlock(&caching_ctl->mutex);
@@ -464,7 +496,8 @@ next:
464 key.offset = 0; 496 key.offset = 0;
465 key.type = BTRFS_EXTENT_ITEM_KEY; 497 key.type = BTRFS_EXTENT_ITEM_KEY;
466 498
467 caching_ctl->progress = last; 499 if (wakeup)
500 caching_ctl->progress = last;
468 btrfs_release_path(path); 501 btrfs_release_path(path);
469 goto next; 502 goto next;
470 } 503 }
@@ -491,7 +524,8 @@ next:
491 524
492 if (total_found > (1024 * 1024 * 2)) { 525 if (total_found > (1024 * 1024 * 2)) {
493 total_found = 0; 526 total_found = 0;
494 wake_up(&caching_ctl->wait); 527 if (wakeup)
528 wake_up(&caching_ctl->wait);
495 } 529 }
496 } 530 }
497 path->slots[0]++; 531 path->slots[0]++;
@@ -501,13 +535,27 @@ next:
501 total_found += add_new_free_space(block_group, fs_info, last, 535 total_found += add_new_free_space(block_group, fs_info, last,
502 block_group->key.objectid + 536 block_group->key.objectid +
503 block_group->key.offset); 537 block_group->key.offset);
504 caching_ctl->progress = (u64)-1;
505
506 spin_lock(&block_group->lock); 538 spin_lock(&block_group->lock);
507 block_group->caching_ctl = NULL; 539 block_group->caching_ctl = NULL;
508 block_group->cached = BTRFS_CACHE_FINISHED; 540 block_group->cached = BTRFS_CACHE_FINISHED;
509 spin_unlock(&block_group->lock); 541 spin_unlock(&block_group->lock);
510 542
543#ifdef CONFIG_BTRFS_DEBUG
544 if (btrfs_should_fragment_free_space(extent_root, block_group)) {
545 u64 bytes_used;
546
547 spin_lock(&block_group->space_info->lock);
548 spin_lock(&block_group->lock);
549 bytes_used = block_group->key.offset -
550 btrfs_block_group_used(&block_group->item);
551 block_group->space_info->bytes_used += bytes_used >> 1;
552 spin_unlock(&block_group->lock);
553 spin_unlock(&block_group->space_info->lock);
554 fragment_free_space(extent_root, block_group);
555 }
556#endif
557
558 caching_ctl->progress = (u64)-1;
511err: 559err:
512 btrfs_free_path(path); 560 btrfs_free_path(path);
513 up_read(&fs_info->commit_root_sem); 561 up_read(&fs_info->commit_root_sem);
@@ -607,6 +655,22 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
607 } 655 }
608 } 656 }
609 spin_unlock(&cache->lock); 657 spin_unlock(&cache->lock);
658#ifdef CONFIG_BTRFS_DEBUG
659 if (ret == 1 &&
660 btrfs_should_fragment_free_space(fs_info->extent_root,
661 cache)) {
662 u64 bytes_used;
663
664 spin_lock(&cache->space_info->lock);
665 spin_lock(&cache->lock);
666 bytes_used = cache->key.offset -
667 btrfs_block_group_used(&cache->item);
668 cache->space_info->bytes_used += bytes_used >> 1;
669 spin_unlock(&cache->lock);
670 spin_unlock(&cache->space_info->lock);
671 fragment_free_space(fs_info->extent_root, cache);
672 }
673#endif
610 mutex_unlock(&caching_ctl->mutex); 674 mutex_unlock(&caching_ctl->mutex);
611 675
612 wake_up(&caching_ctl->wait); 676 wake_up(&caching_ctl->wait);
@@ -9624,6 +9688,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
9624 9688
9625 free_excluded_extents(root, cache); 9689 free_excluded_extents(root, cache);
9626 9690
9691#ifdef CONFIG_BTRFS_DEBUG
9692 if (btrfs_should_fragment_free_space(root, cache)) {
9693 u64 new_bytes_used = size - bytes_used;
9694
9695 bytes_used += new_bytes_used >> 1;
9696 fragment_free_space(root, cache);
9697 }
9698#endif
9627 /* 9699 /*
9628 * Call to ensure the corresponding space_info object is created and 9700 * Call to ensure the corresponding space_info object is created and
9629 * assigned to our block group, but don't update its counters just yet. 9701 * assigned to our block group, but don't update its counters just yet.
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 5a9431dda07f..c0eb84ecb78f 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1951,12 +1951,19 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1951 struct btrfs_free_space *info) 1951 struct btrfs_free_space *info)
1952{ 1952{
1953 struct btrfs_block_group_cache *block_group = ctl->private; 1953 struct btrfs_block_group_cache *block_group = ctl->private;
1954 bool forced = false;
1955
1956#ifdef CONFIG_BTRFS_DEBUG
1957 if (btrfs_should_fragment_free_space(block_group->fs_info->extent_root,
1958 block_group))
1959 forced = true;
1960#endif
1954 1961
1955 /* 1962 /*
1956 * If we are below the extents threshold then we can add this as an 1963 * If we are below the extents threshold then we can add this as an
1957 * extent, and don't have to deal with the bitmap 1964 * extent, and don't have to deal with the bitmap
1958 */ 1965 */
1959 if (ctl->free_extents < ctl->extents_thresh) { 1966 if (!forced && ctl->free_extents < ctl->extents_thresh) {
1960 /* 1967 /*
1961 * If this block group has some small extents we don't want to 1968 * If this block group has some small extents we don't want to
1962 * use up all of our free slots in the cache with them, we want 1969 * use up all of our free slots in the cache with them, we want
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b23d49daa1a2..24154e422945 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -303,6 +303,9 @@ enum {
303 Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard, 303 Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
304 Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow, 304 Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
305 Opt_datasum, Opt_treelog, Opt_noinode_cache, 305 Opt_datasum, Opt_treelog, Opt_noinode_cache,
306#ifdef CONFIG_BTRFS_DEBUG
307 Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
308#endif
306 Opt_err, 309 Opt_err,
307}; 310};
308 311
@@ -355,6 +358,11 @@ static match_table_t tokens = {
355 {Opt_rescan_uuid_tree, "rescan_uuid_tree"}, 358 {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
356 {Opt_fatal_errors, "fatal_errors=%s"}, 359 {Opt_fatal_errors, "fatal_errors=%s"},
357 {Opt_commit_interval, "commit=%d"}, 360 {Opt_commit_interval, "commit=%d"},
361#ifdef CONFIG_BTRFS_DEBUG
362 {Opt_fragment_data, "fragment=data"},
363 {Opt_fragment_metadata, "fragment=metadata"},
364 {Opt_fragment_all, "fragment=all"},
365#endif
358 {Opt_err, NULL}, 366 {Opt_err, NULL},
359}; 367};
360 368
@@ -721,6 +729,22 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
721 info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; 729 info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
722 } 730 }
723 break; 731 break;
732#ifdef CONFIG_BTRFS_DEBUG
733 case Opt_fragment_all:
734 btrfs_info(root->fs_info, "fragmenting all space");
735 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
736 btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
737 break;
738 case Opt_fragment_metadata:
739 btrfs_info(root->fs_info, "fragmenting metadata");
740 btrfs_set_opt(info->mount_opt,
741 FRAGMENT_METADATA);
742 break;
743 case Opt_fragment_data:
744 btrfs_info(root->fs_info, "fragmenting data");
745 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
746 break;
747#endif
724 case Opt_err: 748 case Opt_err:
725 btrfs_info(root->fs_info, "unrecognized mount option '%s'", p); 749 btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
726 ret = -EINVAL; 750 ret = -EINVAL;
@@ -1172,6 +1196,12 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
1172 seq_puts(seq, ",fatal_errors=panic"); 1196 seq_puts(seq, ",fatal_errors=panic");
1173 if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) 1197 if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
1174 seq_printf(seq, ",commit=%d", info->commit_interval); 1198 seq_printf(seq, ",commit=%d", info->commit_interval);
1199#ifdef CONFIG_BTRFS_DEBUG
1200 if (btrfs_test_opt(root, FRAGMENT_DATA))
1201 seq_puts(seq, ",fragment=data");
1202 if (btrfs_test_opt(root, FRAGMENT_METADATA))
1203 seq_puts(seq, ",fragment=metadata");
1204#endif
1175 seq_printf(seq, ",subvolid=%llu", 1205 seq_printf(seq, ",subvolid=%llu",
1176 BTRFS_I(d_inode(dentry))->root->root_key.objectid); 1206 BTRFS_I(d_inode(dentry))->root->root_key.objectid);
1177 seq_puts(seq, ",subvol="); 1207 seq_puts(seq, ",subvol=");
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index 2299bfde39ee..c8c3d70c31ff 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -19,6 +19,7 @@
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include "btrfs-tests.h" 20#include "btrfs-tests.h"
21#include "../ctree.h" 21#include "../ctree.h"
22#include "../disk-io.h"
22#include "../free-space-cache.h" 23#include "../free-space-cache.h"
23 24
24#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 25#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
@@ -35,6 +36,12 @@ static struct btrfs_block_group_cache *init_test_block_group(void)
35 kfree(cache); 36 kfree(cache);
36 return NULL; 37 return NULL;
37 } 38 }
39 cache->fs_info = btrfs_alloc_dummy_fs_info();
40 if (!cache->fs_info) {
41 kfree(cache->free_space_ctl);
42 kfree(cache);
43 return NULL;
44 }
38 45
39 cache->key.objectid = 0; 46 cache->key.objectid = 0;
40 cache->key.offset = 1024 * 1024 * 1024; 47 cache->key.offset = 1024 * 1024 * 1024;
@@ -879,7 +886,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
879int btrfs_test_free_space_cache(void) 886int btrfs_test_free_space_cache(void)
880{ 887{
881 struct btrfs_block_group_cache *cache; 888 struct btrfs_block_group_cache *cache;
882 int ret; 889 struct btrfs_root *root = NULL;
890 int ret = -ENOMEM;
883 891
884 test_msg("Running btrfs free space cache tests\n"); 892 test_msg("Running btrfs free space cache tests\n");
885 893
@@ -889,6 +897,17 @@ int btrfs_test_free_space_cache(void)
889 return 0; 897 return 0;
890 } 898 }
891 899
900 root = btrfs_alloc_dummy_root();
901 if (!root)
902 goto out;
903
904 root->fs_info = btrfs_alloc_dummy_fs_info();
905 if (!root->fs_info)
906 goto out;
907
908 root->fs_info->extent_root = root;
909 cache->fs_info = root->fs_info;
910
892 ret = test_extents(cache); 911 ret = test_extents(cache);
893 if (ret) 912 if (ret)
894 goto out; 913 goto out;
@@ -904,6 +923,7 @@ out:
904 __btrfs_remove_free_space_cache(cache->free_space_ctl); 923 __btrfs_remove_free_space_cache(cache->free_space_ctl);
905 kfree(cache->free_space_ctl); 924 kfree(cache->free_space_ctl);
906 kfree(cache); 925 kfree(cache);
926 btrfs_free_dummy_root(root);
907 test_msg("Free space cache tests finished\n"); 927 test_msg("Free space cache tests finished\n");
908 return ret; 928 return ret;
909} 929}