diff options
author | Josef Bacik <jbacik@fusionio.com> | 2013-06-19 15:00:04 -0400 |
---|---|---|
committer | Josef Bacik <jbacik@fusionio.com> | 2013-07-02 11:50:42 -0400 |
commit | b150a4f10d8786a204db1ae3dccada17f950cf54 (patch) | |
tree | 3d1d6d471e8dafecb765435b87d24f22482acda5 /fs | |
parent | f23b5a59955c0ea13c6da211fb06f39348e3c794 (diff) |
Btrfs: use a percpu to keep track of possibly pinned bytes
There are all of these checks in the ENOSPC code to see if committing the
transaction would free up enough space to make the allocation. This is because
early on we just committed the transaction and hoped and prayed, which resulted
in cases where it took _forever_ to get an ENOSPC when we really were out of
space. So we check space_info->bytes_pinned, except this isn't completely true
because it doesn't account for space we may free but are stuck in delayed refs.
So tests like xfstests 226 would fail because we wouldn't commit the transaction
to free up the data space. So instead add a percpu counter that will be a
little fuzzier, it will add bytes as soon as we try to free up the space, and
remove any space it doesn't actually free up when we get around to doing the
actual free. We then 0 out this counter every transaction period so we have a
better idea of how much space we will actually free up by committing this
transaction. With this patch we now pass xfstests 226. Thanks,
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/ctree.h | 12 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 59 |
2 files changed, 66 insertions, 5 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 76e4983b39ea..b528a5509cb8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -1102,6 +1102,18 @@ struct btrfs_space_info { | |||
1102 | account */ | 1102 | account */ |
1103 | 1103 | ||
1104 | /* | 1104 | /* |
1105 | * bytes_pinned is kept in line with what is actually pinned, as in | ||
1106 | * we've called update_block_group and dropped the bytes_used counter | ||
1107 | * and increased the bytes_pinned counter. However this means that | ||
1108 | * bytes_pinned does not reflect the bytes that will be pinned once the | ||
1109 | * delayed refs are flushed, so this counter is inc'ed everytime we call | ||
1110 | * btrfs_free_extent so it is a realtime count of what will be freed | ||
1111 | * once the transaction is committed. It will be zero'ed everytime the | ||
1112 | * transaction commits. | ||
1113 | */ | ||
1114 | struct percpu_counter total_bytes_pinned; | ||
1115 | |||
1116 | /* | ||
1105 | * we bump reservation progress every time we decrement | 1117 | * we bump reservation progress every time we decrement |
1106 | * bytes_reserved. This way people waiting for reservations | 1118 | * bytes_reserved. This way people waiting for reservations |
1107 | * know something good has happened and they can check | 1119 | * know something good has happened and they can check |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6d5c5f73ad64..bbd3db7d0833 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/ratelimit.h> | 26 | #include <linux/ratelimit.h> |
27 | #include <linux/percpu_counter.h> | ||
27 | #include "compat.h" | 28 | #include "compat.h" |
28 | #include "hash.h" | 29 | #include "hash.h" |
29 | #include "ctree.h" | 30 | #include "ctree.h" |
@@ -3357,6 +3358,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3357 | struct btrfs_space_info *found; | 3358 | struct btrfs_space_info *found; |
3358 | int i; | 3359 | int i; |
3359 | int factor; | 3360 | int factor; |
3361 | int ret; | ||
3360 | 3362 | ||
3361 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | 3363 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | |
3362 | BTRFS_BLOCK_GROUP_RAID10)) | 3364 | BTRFS_BLOCK_GROUP_RAID10)) |
@@ -3380,6 +3382,12 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3380 | if (!found) | 3382 | if (!found) |
3381 | return -ENOMEM; | 3383 | return -ENOMEM; |
3382 | 3384 | ||
3385 | ret = percpu_counter_init(&found->total_bytes_pinned, 0); | ||
3386 | if (ret) { | ||
3387 | kfree(found); | ||
3388 | return ret; | ||
3389 | } | ||
3390 | |||
3383 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) | 3391 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
3384 | INIT_LIST_HEAD(&found->block_groups[i]); | 3392 | INIT_LIST_HEAD(&found->block_groups[i]); |
3385 | init_rwsem(&found->groups_sem); | 3393 | init_rwsem(&found->groups_sem); |
@@ -3612,10 +3620,11 @@ alloc: | |||
3612 | } | 3620 | } |
3613 | 3621 | ||
3614 | /* | 3622 | /* |
3615 | * If we have less pinned bytes than we want to allocate then | 3623 | * If we don't have enough pinned space to deal with this |
3616 | * don't bother committing the transaction, it won't help us. | 3624 | * allocation don't bother committing the transaction. |
3617 | */ | 3625 | */ |
3618 | if (data_sinfo->bytes_pinned < bytes) | 3626 | if (percpu_counter_compare(&data_sinfo->total_bytes_pinned, |
3627 | bytes) < 0) | ||
3619 | committed = 1; | 3628 | committed = 1; |
3620 | spin_unlock(&data_sinfo->lock); | 3629 | spin_unlock(&data_sinfo->lock); |
3621 | 3630 | ||
@@ -3624,6 +3633,7 @@ commit_trans: | |||
3624 | if (!committed && | 3633 | if (!committed && |
3625 | !atomic_read(&root->fs_info->open_ioctl_trans)) { | 3634 | !atomic_read(&root->fs_info->open_ioctl_trans)) { |
3626 | committed = 1; | 3635 | committed = 1; |
3636 | |||
3627 | trans = btrfs_join_transaction(root); | 3637 | trans = btrfs_join_transaction(root); |
3628 | if (IS_ERR(trans)) | 3638 | if (IS_ERR(trans)) |
3629 | return PTR_ERR(trans); | 3639 | return PTR_ERR(trans); |
@@ -4044,7 +4054,8 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
4044 | 4054 | ||
4045 | /* See if there is enough pinned space to make this reservation */ | 4055 | /* See if there is enough pinned space to make this reservation */ |
4046 | spin_lock(&space_info->lock); | 4056 | spin_lock(&space_info->lock); |
4047 | if (space_info->bytes_pinned >= bytes) { | 4057 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
4058 | bytes) >= 0) { | ||
4048 | spin_unlock(&space_info->lock); | 4059 | spin_unlock(&space_info->lock); |
4049 | goto commit; | 4060 | goto commit; |
4050 | } | 4061 | } |
@@ -4059,7 +4070,8 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
4059 | 4070 | ||
4060 | spin_lock(&space_info->lock); | 4071 | spin_lock(&space_info->lock); |
4061 | spin_lock(&delayed_rsv->lock); | 4072 | spin_lock(&delayed_rsv->lock); |
4062 | if (space_info->bytes_pinned + delayed_rsv->size < bytes) { | 4073 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
4074 | bytes - delayed_rsv->size) >= 0) { | ||
4063 | spin_unlock(&delayed_rsv->lock); | 4075 | spin_unlock(&delayed_rsv->lock); |
4064 | spin_unlock(&space_info->lock); | 4076 | spin_unlock(&space_info->lock); |
4065 | return -ENOSPC; | 4077 | return -ENOSPC; |
@@ -5397,6 +5409,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
5397 | struct btrfs_caching_control *next; | 5409 | struct btrfs_caching_control *next; |
5398 | struct btrfs_caching_control *caching_ctl; | 5410 | struct btrfs_caching_control *caching_ctl; |
5399 | struct btrfs_block_group_cache *cache; | 5411 | struct btrfs_block_group_cache *cache; |
5412 | struct btrfs_space_info *space_info; | ||
5400 | 5413 | ||
5401 | down_write(&fs_info->extent_commit_sem); | 5414 | down_write(&fs_info->extent_commit_sem); |
5402 | 5415 | ||
@@ -5419,6 +5432,9 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
5419 | 5432 | ||
5420 | up_write(&fs_info->extent_commit_sem); | 5433 | up_write(&fs_info->extent_commit_sem); |
5421 | 5434 | ||
5435 | list_for_each_entry_rcu(space_info, &fs_info->space_info, list) | ||
5436 | percpu_counter_set(&space_info->total_bytes_pinned, 0); | ||
5437 | |||
5422 | update_global_block_rsv(fs_info); | 5438 | update_global_block_rsv(fs_info); |
5423 | } | 5439 | } |
5424 | 5440 | ||
@@ -5516,6 +5532,27 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
5516 | return 0; | 5532 | return 0; |
5517 | } | 5533 | } |
5518 | 5534 | ||
5535 | static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes, | ||
5536 | u64 owner, u64 root_objectid) | ||
5537 | { | ||
5538 | struct btrfs_space_info *space_info; | ||
5539 | u64 flags; | ||
5540 | |||
5541 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | ||
5542 | if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID) | ||
5543 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
5544 | else | ||
5545 | flags = BTRFS_BLOCK_GROUP_METADATA; | ||
5546 | } else { | ||
5547 | flags = BTRFS_BLOCK_GROUP_DATA; | ||
5548 | } | ||
5549 | |||
5550 | space_info = __find_space_info(fs_info, flags); | ||
5551 | BUG_ON(!space_info); /* Logic bug */ | ||
5552 | percpu_counter_add(&space_info->total_bytes_pinned, num_bytes); | ||
5553 | } | ||
5554 | |||
5555 | |||
5519 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 5556 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
5520 | struct btrfs_root *root, | 5557 | struct btrfs_root *root, |
5521 | u64 bytenr, u64 num_bytes, u64 parent, | 5558 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -5736,6 +5773,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5736 | goto out; | 5773 | goto out; |
5737 | } | 5774 | } |
5738 | } | 5775 | } |
5776 | add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid, | ||
5777 | root_objectid); | ||
5739 | } else { | 5778 | } else { |
5740 | if (found_extent) { | 5779 | if (found_extent) { |
5741 | BUG_ON(is_data && refs_to_drop != | 5780 | BUG_ON(is_data && refs_to_drop != |
@@ -5859,6 +5898,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
5859 | u64 parent, int last_ref) | 5898 | u64 parent, int last_ref) |
5860 | { | 5899 | { |
5861 | struct btrfs_block_group_cache *cache = NULL; | 5900 | struct btrfs_block_group_cache *cache = NULL; |
5901 | int pin = 1; | ||
5862 | int ret; | 5902 | int ret; |
5863 | 5903 | ||
5864 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | 5904 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
@@ -5891,8 +5931,14 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
5891 | 5931 | ||
5892 | btrfs_add_free_space(cache, buf->start, buf->len); | 5932 | btrfs_add_free_space(cache, buf->start, buf->len); |
5893 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); | 5933 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); |
5934 | pin = 0; | ||
5894 | } | 5935 | } |
5895 | out: | 5936 | out: |
5937 | if (pin) | ||
5938 | add_pinned_bytes(root->fs_info, buf->len, | ||
5939 | btrfs_header_level(buf), | ||
5940 | root->root_key.objectid); | ||
5941 | |||
5896 | /* | 5942 | /* |
5897 | * Deleting the buffer, clear the corrupt flag since it doesn't matter | 5943 | * Deleting the buffer, clear the corrupt flag since it doesn't matter |
5898 | * anymore. | 5944 | * anymore. |
@@ -5909,6 +5955,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
5909 | int ret; | 5955 | int ret; |
5910 | struct btrfs_fs_info *fs_info = root->fs_info; | 5956 | struct btrfs_fs_info *fs_info = root->fs_info; |
5911 | 5957 | ||
5958 | add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); | ||
5959 | |||
5912 | /* | 5960 | /* |
5913 | * tree log blocks never actually go into the extent allocation | 5961 | * tree log blocks never actually go into the extent allocation |
5914 | * tree, just update pinning info and exit early. | 5962 | * tree, just update pinning info and exit early. |
@@ -8152,6 +8200,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
8152 | dump_space_info(space_info, 0, 0); | 8200 | dump_space_info(space_info, 0, 0); |
8153 | } | 8201 | } |
8154 | } | 8202 | } |
8203 | percpu_counter_destroy(&space_info->total_bytes_pinned); | ||
8155 | list_del(&space_info->list); | 8204 | list_del(&space_info->list); |
8156 | kfree(space_info); | 8205 | kfree(space_info); |
8157 | } | 8206 | } |