aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fusionio.com>2013-06-19 15:00:04 -0400
committerJosef Bacik <jbacik@fusionio.com>2013-07-02 11:50:42 -0400
commitb150a4f10d8786a204db1ae3dccada17f950cf54 (patch)
tree3d1d6d471e8dafecb765435b87d24f22482acda5 /fs
parentf23b5a59955c0ea13c6da211fb06f39348e3c794 (diff)
Btrfs: use a percpu to keep track of possibly pinned bytes
There are all of these checks in the ENOSPC code to see if committing the transaction would free up enough space to make the allocation. This is because early on we just committed the transaction and hoped and prayed, which resulted in cases where it took _forever_ to get an ENOSPC when we really were out of space. So we check space_info->bytes_pinned, except this isn't completely true because it doesn't account for space we may free but are stuck in delayed refs. So tests like xfstests 226 would fail because we wouldn't commit the transaction to free up the data space. So instead add a percpu counter that will be a little fuzzier, it will add bytes as soon as we try to free up the space, and remove any space it doesn't actually free up when we get around to doing the actual free. We then 0 out this counter every transaction period so we have a better idea of how much space we will actually free up by committing this transaction. With this patch we now pass xfstests 226. Thanks, Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/extent-tree.c59
2 files changed, 66 insertions, 5 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 76e4983b39ea..b528a5509cb8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1102,6 +1102,18 @@ struct btrfs_space_info {
1102 account */ 1102 account */
1103 1103
1104 /* 1104 /*
1105 * bytes_pinned is kept in line with what is actually pinned, as in
1106 * we've called update_block_group and dropped the bytes_used counter
1107 * and increased the bytes_pinned counter. However this means that
1108 * bytes_pinned does not reflect the bytes that will be pinned once the
1109 * delayed refs are flushed, so this counter is inc'ed everytime we call
1110 * btrfs_free_extent so it is a realtime count of what will be freed
1111 * once the transaction is committed. It will be zero'ed everytime the
1112 * transaction commits.
1113 */
1114 struct percpu_counter total_bytes_pinned;
1115
1116 /*
1105 * we bump reservation progress every time we decrement 1117 * we bump reservation progress every time we decrement
1106 * bytes_reserved. This way people waiting for reservations 1118 * bytes_reserved. This way people waiting for reservations
1107 * know something good has happened and they can check 1119 * know something good has happened and they can check
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6d5c5f73ad64..bbd3db7d0833 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -24,6 +24,7 @@
24#include <linux/kthread.h> 24#include <linux/kthread.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/ratelimit.h> 26#include <linux/ratelimit.h>
27#include <linux/percpu_counter.h>
27#include "compat.h" 28#include "compat.h"
28#include "hash.h" 29#include "hash.h"
29#include "ctree.h" 30#include "ctree.h"
@@ -3357,6 +3358,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3357 struct btrfs_space_info *found; 3358 struct btrfs_space_info *found;
3358 int i; 3359 int i;
3359 int factor; 3360 int factor;
3361 int ret;
3360 3362
3361 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | 3363 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
3362 BTRFS_BLOCK_GROUP_RAID10)) 3364 BTRFS_BLOCK_GROUP_RAID10))
@@ -3380,6 +3382,12 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3380 if (!found) 3382 if (!found)
3381 return -ENOMEM; 3383 return -ENOMEM;
3382 3384
3385 ret = percpu_counter_init(&found->total_bytes_pinned, 0);
3386 if (ret) {
3387 kfree(found);
3388 return ret;
3389 }
3390
3383 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) 3391 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
3384 INIT_LIST_HEAD(&found->block_groups[i]); 3392 INIT_LIST_HEAD(&found->block_groups[i]);
3385 init_rwsem(&found->groups_sem); 3393 init_rwsem(&found->groups_sem);
@@ -3612,10 +3620,11 @@ alloc:
3612 } 3620 }
3613 3621
3614 /* 3622 /*
3615 * If we have less pinned bytes than we want to allocate then 3623 * If we don't have enough pinned space to deal with this
3616 * don't bother committing the transaction, it won't help us. 3624 * allocation don't bother committing the transaction.
3617 */ 3625 */
3618 if (data_sinfo->bytes_pinned < bytes) 3626 if (percpu_counter_compare(&data_sinfo->total_bytes_pinned,
3627 bytes) < 0)
3619 committed = 1; 3628 committed = 1;
3620 spin_unlock(&data_sinfo->lock); 3629 spin_unlock(&data_sinfo->lock);
3621 3630
@@ -3624,6 +3633,7 @@ commit_trans:
3624 if (!committed && 3633 if (!committed &&
3625 !atomic_read(&root->fs_info->open_ioctl_trans)) { 3634 !atomic_read(&root->fs_info->open_ioctl_trans)) {
3626 committed = 1; 3635 committed = 1;
3636
3627 trans = btrfs_join_transaction(root); 3637 trans = btrfs_join_transaction(root);
3628 if (IS_ERR(trans)) 3638 if (IS_ERR(trans))
3629 return PTR_ERR(trans); 3639 return PTR_ERR(trans);
@@ -4044,7 +4054,8 @@ static int may_commit_transaction(struct btrfs_root *root,
4044 4054
4045 /* See if there is enough pinned space to make this reservation */ 4055 /* See if there is enough pinned space to make this reservation */
4046 spin_lock(&space_info->lock); 4056 spin_lock(&space_info->lock);
4047 if (space_info->bytes_pinned >= bytes) { 4057 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4058 bytes) >= 0) {
4048 spin_unlock(&space_info->lock); 4059 spin_unlock(&space_info->lock);
4049 goto commit; 4060 goto commit;
4050 } 4061 }
@@ -4059,7 +4070,8 @@ static int may_commit_transaction(struct btrfs_root *root,
4059 4070
4060 spin_lock(&space_info->lock); 4071 spin_lock(&space_info->lock);
4061 spin_lock(&delayed_rsv->lock); 4072 spin_lock(&delayed_rsv->lock);
4062 if (space_info->bytes_pinned + delayed_rsv->size < bytes) { 4073 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4074 bytes - delayed_rsv->size) >= 0) {
4063 spin_unlock(&delayed_rsv->lock); 4075 spin_unlock(&delayed_rsv->lock);
4064 spin_unlock(&space_info->lock); 4076 spin_unlock(&space_info->lock);
4065 return -ENOSPC; 4077 return -ENOSPC;
@@ -5397,6 +5409,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
5397 struct btrfs_caching_control *next; 5409 struct btrfs_caching_control *next;
5398 struct btrfs_caching_control *caching_ctl; 5410 struct btrfs_caching_control *caching_ctl;
5399 struct btrfs_block_group_cache *cache; 5411 struct btrfs_block_group_cache *cache;
5412 struct btrfs_space_info *space_info;
5400 5413
5401 down_write(&fs_info->extent_commit_sem); 5414 down_write(&fs_info->extent_commit_sem);
5402 5415
@@ -5419,6 +5432,9 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
5419 5432
5420 up_write(&fs_info->extent_commit_sem); 5433 up_write(&fs_info->extent_commit_sem);
5421 5434
5435 list_for_each_entry_rcu(space_info, &fs_info->space_info, list)
5436 percpu_counter_set(&space_info->total_bytes_pinned, 0);
5437
5422 update_global_block_rsv(fs_info); 5438 update_global_block_rsv(fs_info);
5423} 5439}
5424 5440
@@ -5516,6 +5532,27 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
5516 return 0; 5532 return 0;
5517} 5533}
5518 5534
5535static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
5536 u64 owner, u64 root_objectid)
5537{
5538 struct btrfs_space_info *space_info;
5539 u64 flags;
5540
5541 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
5542 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
5543 flags = BTRFS_BLOCK_GROUP_SYSTEM;
5544 else
5545 flags = BTRFS_BLOCK_GROUP_METADATA;
5546 } else {
5547 flags = BTRFS_BLOCK_GROUP_DATA;
5548 }
5549
5550 space_info = __find_space_info(fs_info, flags);
5551 BUG_ON(!space_info); /* Logic bug */
5552 percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
5553}
5554
5555
5519static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 5556static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5520 struct btrfs_root *root, 5557 struct btrfs_root *root,
5521 u64 bytenr, u64 num_bytes, u64 parent, 5558 u64 bytenr, u64 num_bytes, u64 parent,
@@ -5736,6 +5773,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5736 goto out; 5773 goto out;
5737 } 5774 }
5738 } 5775 }
5776 add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid,
5777 root_objectid);
5739 } else { 5778 } else {
5740 if (found_extent) { 5779 if (found_extent) {
5741 BUG_ON(is_data && refs_to_drop != 5780 BUG_ON(is_data && refs_to_drop !=
@@ -5859,6 +5898,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
5859 u64 parent, int last_ref) 5898 u64 parent, int last_ref)
5860{ 5899{
5861 struct btrfs_block_group_cache *cache = NULL; 5900 struct btrfs_block_group_cache *cache = NULL;
5901 int pin = 1;
5862 int ret; 5902 int ret;
5863 5903
5864 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 5904 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
@@ -5891,8 +5931,14 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
5891 5931
5892 btrfs_add_free_space(cache, buf->start, buf->len); 5932 btrfs_add_free_space(cache, buf->start, buf->len);
5893 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); 5933 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
5934 pin = 0;
5894 } 5935 }
5895out: 5936out:
5937 if (pin)
5938 add_pinned_bytes(root->fs_info, buf->len,
5939 btrfs_header_level(buf),
5940 root->root_key.objectid);
5941
5896 /* 5942 /*
5897 * Deleting the buffer, clear the corrupt flag since it doesn't matter 5943 * Deleting the buffer, clear the corrupt flag since it doesn't matter
5898 * anymore. 5944 * anymore.
@@ -5909,6 +5955,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
5909 int ret; 5955 int ret;
5910 struct btrfs_fs_info *fs_info = root->fs_info; 5956 struct btrfs_fs_info *fs_info = root->fs_info;
5911 5957
5958 add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
5959
5912 /* 5960 /*
5913 * tree log blocks never actually go into the extent allocation 5961 * tree log blocks never actually go into the extent allocation
5914 * tree, just update pinning info and exit early. 5962 * tree, just update pinning info and exit early.
@@ -8152,6 +8200,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8152 dump_space_info(space_info, 0, 0); 8200 dump_space_info(space_info, 0, 0);
8153 } 8201 }
8154 } 8202 }
8203 percpu_counter_destroy(&space_info->total_bytes_pinned);
8155 list_del(&space_info->list); 8204 list_del(&space_info->list);
8156 kfree(space_info); 8205 kfree(space_info);
8157 } 8206 }