aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2011-07-15 11:16:44 -0400
committerChris Mason <chris.mason@oracle.com>2011-07-27 12:46:44 -0400
commit9e0baf60dea69f31ac3b1adeb35b03b02a53e8e1 (patch)
tree0fb899e1fa78b599d22389ca3befc8ab51ff5049 /fs/btrfs
parenta5991428064e98c7367fe1c1686ea6a23fb6a4b3 (diff)
Btrfs: fix enospc problems with delalloc
So I had this brilliant idea to use atomic counters for outstanding and reserved extents, but this turned out to be a bad idea. Consider this where we have 1 outstanding extent and 1 reserved extent Reserver Releaser atomic_dec(outstanding) now 0 atomic_read(outstanding)+1 get 1 atomic_read(reserved) get 1 don't actually reserve anything because they are the same atomic_cmpxchg(reserved, 1, 0) atomic_inc(outstanding) atomic_add(0, reserved) free reserved space for 1 extent Then the reserver now has no actual space reserved for it, and when it goes to finish the ordered IO it won't have enough space to do it's allocation and you get those lovely warnings. Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/btrfs_inode.h7
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent-tree.c90
-rw-r--r--fs/btrfs/file.c8
-rw-r--r--fs/btrfs/inode.c35
-rw-r--r--fs/btrfs/ioctl.c4
6 files changed, 86 insertions, 60 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 52d7eca8c7bf..03dce3f40ce0 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -34,6 +34,9 @@ struct btrfs_inode {
34 */ 34 */
35 struct btrfs_key location; 35 struct btrfs_key location;
36 36
37 /* Lock for counters */
38 spinlock_t lock;
39
37 /* the extent_tree has caches of all the extent mappings to disk */ 40 /* the extent_tree has caches of all the extent mappings to disk */
38 struct extent_map_tree extent_tree; 41 struct extent_map_tree extent_tree;
39 42
@@ -134,8 +137,8 @@ struct btrfs_inode {
134 * items we think we'll end up using, and reserved_extents is the number 137 * items we think we'll end up using, and reserved_extents is the number
135 * of extent items we've reserved metadata for. 138 * of extent items we've reserved metadata for.
136 */ 139 */
137 atomic_t outstanding_extents; 140 unsigned outstanding_extents;
138 atomic_t reserved_extents; 141 unsigned reserved_extents;
139 142
140 /* 143 /*
141 * ordered_data_close is set by truncate when a file that used 144 * ordered_data_close is set by truncate when a file that used
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9f6f342900c9..3063f21d3fc6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2134,7 +2134,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
2134 2134
2135/* extent-tree.c */ 2135/* extent-tree.c */
2136static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, 2136static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
2137 int num_items) 2137 unsigned num_items)
2138{ 2138{
2139 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * 2139 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
2140 3 * num_items; 2140 3 * num_items;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0a5bd67e2894..d7031e7dfd76 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3726,7 +3726,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3726 if (commit_trans) { 3726 if (commit_trans) {
3727 if (trans) 3727 if (trans)
3728 return -EAGAIN; 3728 return -EAGAIN;
3729
3730 trans = btrfs_join_transaction(root); 3729 trans = btrfs_join_transaction(root);
3731 BUG_ON(IS_ERR(trans)); 3730 BUG_ON(IS_ERR(trans));
3732 ret = btrfs_commit_transaction(trans, root); 3731 ret = btrfs_commit_transaction(trans, root);
@@ -3946,6 +3945,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
3946 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3945 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3947} 3946}
3948 3947
3948static unsigned drop_outstanding_extent(struct inode *inode)
3949{
3950 unsigned dropped_extents = 0;
3951
3952 spin_lock(&BTRFS_I(inode)->lock);
3953 BUG_ON(!BTRFS_I(inode)->outstanding_extents);
3954 BTRFS_I(inode)->outstanding_extents--;
3955
3956 /*
3957 * If we have more or the same amount of outsanding extents than we have
3958 * reserved then we need to leave the reserved extents count alone.
3959 */
3960 if (BTRFS_I(inode)->outstanding_extents >=
3961 BTRFS_I(inode)->reserved_extents)
3962 goto out;
3963
3964 dropped_extents = BTRFS_I(inode)->reserved_extents -
3965 BTRFS_I(inode)->outstanding_extents;
3966 BTRFS_I(inode)->reserved_extents -= dropped_extents;
3967out:
3968 spin_unlock(&BTRFS_I(inode)->lock);
3969 return dropped_extents;
3970}
3971
3949static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) 3972static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
3950{ 3973{
3951 return num_bytes >>= 3; 3974 return num_bytes >>= 3;
@@ -3955,9 +3978,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3955{ 3978{
3956 struct btrfs_root *root = BTRFS_I(inode)->root; 3979 struct btrfs_root *root = BTRFS_I(inode)->root;
3957 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3980 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
3958 u64 to_reserve; 3981 u64 to_reserve = 0;
3959 int nr_extents; 3982 unsigned nr_extents = 0;
3960 int reserved_extents;
3961 int ret; 3983 int ret;
3962 3984
3963 if (btrfs_transaction_in_commit(root->fs_info)) 3985 if (btrfs_transaction_in_commit(root->fs_info))
@@ -3965,24 +3987,31 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3965 3987
3966 num_bytes = ALIGN(num_bytes, root->sectorsize); 3988 num_bytes = ALIGN(num_bytes, root->sectorsize);
3967 3989
3968 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 3990 spin_lock(&BTRFS_I(inode)->lock);
3969 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); 3991 BTRFS_I(inode)->outstanding_extents++;
3992
3993 if (BTRFS_I(inode)->outstanding_extents >
3994 BTRFS_I(inode)->reserved_extents) {
3995 nr_extents = BTRFS_I(inode)->outstanding_extents -
3996 BTRFS_I(inode)->reserved_extents;
3997 BTRFS_I(inode)->reserved_extents += nr_extents;
3970 3998
3971 if (nr_extents > reserved_extents) {
3972 nr_extents -= reserved_extents;
3973 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); 3999 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
3974 } else {
3975 nr_extents = 0;
3976 to_reserve = 0;
3977 } 4000 }
4001 spin_unlock(&BTRFS_I(inode)->lock);
3978 4002
3979 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4003 to_reserve += calc_csum_metadata_size(inode, num_bytes);
3980 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 4004 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
3981 if (ret) 4005 if (ret) {
4006 unsigned dropped;
4007 /*
4008 * We don't need the return value since our reservation failed,
4009 * we just need to clean up our counter.
4010 */
4011 dropped = drop_outstanding_extent(inode);
4012 WARN_ON(dropped > 1);
3982 return ret; 4013 return ret;
3983 4014 }
3984 atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
3985 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
3986 4015
3987 block_rsv_add_bytes(block_rsv, to_reserve, 1); 4016 block_rsv_add_bytes(block_rsv, to_reserve, 1);
3988 4017
@@ -3992,36 +4021,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3992void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) 4021void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
3993{ 4022{
3994 struct btrfs_root *root = BTRFS_I(inode)->root; 4023 struct btrfs_root *root = BTRFS_I(inode)->root;
3995 u64 to_free; 4024 u64 to_free = 0;
3996 int nr_extents; 4025 unsigned dropped;
3997 int reserved_extents;
3998 4026
3999 num_bytes = ALIGN(num_bytes, root->sectorsize); 4027 num_bytes = ALIGN(num_bytes, root->sectorsize);
4000 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 4028 dropped = drop_outstanding_extent(inode);
4001 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
4002
4003 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
4004 do {
4005 int old, new;
4006
4007 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
4008 if (nr_extents >= reserved_extents) {
4009 nr_extents = 0;
4010 break;
4011 }
4012 old = reserved_extents;
4013 nr_extents = reserved_extents - nr_extents;
4014 new = reserved_extents - nr_extents;
4015 old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
4016 reserved_extents, new);
4017 if (likely(old == reserved_extents))
4018 break;
4019 reserved_extents = old;
4020 } while (1);
4021 4029
4022 to_free = calc_csum_metadata_size(inode, num_bytes); 4030 to_free = calc_csum_metadata_size(inode, num_bytes);
4023 if (nr_extents > 0) 4031 if (dropped > 0)
4024 to_free += btrfs_calc_trans_metadata_size(root, nr_extents); 4032 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4025 4033
4026 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 4034 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
4027 to_free); 4035 to_free);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index bd6bbb877ff2..6e56a468d1f5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1239,9 +1239,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1239 * managed to copy. 1239 * managed to copy.
1240 */ 1240 */
1241 if (num_pages > dirty_pages) { 1241 if (num_pages > dirty_pages) {
1242 if (copied > 0) 1242 if (copied > 0) {
1243 atomic_inc( 1243 spin_lock(&BTRFS_I(inode)->lock);
1244 &BTRFS_I(inode)->outstanding_extents); 1244 BTRFS_I(inode)->outstanding_extents++;
1245 spin_unlock(&BTRFS_I(inode)->lock);
1246 }
1245 btrfs_delalloc_release_space(inode, 1247 btrfs_delalloc_release_space(inode,
1246 (num_pages - dirty_pages) << 1248 (num_pages - dirty_pages) <<
1247 PAGE_CACHE_SHIFT); 1249 PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2bf4d2b97d54..55d68ea1866b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1298,7 +1298,9 @@ static int btrfs_split_extent_hook(struct inode *inode,
1298 if (!(orig->state & EXTENT_DELALLOC)) 1298 if (!(orig->state & EXTENT_DELALLOC))
1299 return 0; 1299 return 0;
1300 1300
1301 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 1301 spin_lock(&BTRFS_I(inode)->lock);
1302 BTRFS_I(inode)->outstanding_extents++;
1303 spin_unlock(&BTRFS_I(inode)->lock);
1302 return 0; 1304 return 0;
1303} 1305}
1304 1306
@@ -1316,7 +1318,9 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1316 if (!(other->state & EXTENT_DELALLOC)) 1318 if (!(other->state & EXTENT_DELALLOC))
1317 return 0; 1319 return 0;
1318 1320
1319 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 1321 spin_lock(&BTRFS_I(inode)->lock);
1322 BTRFS_I(inode)->outstanding_extents--;
1323 spin_unlock(&BTRFS_I(inode)->lock);
1320 return 0; 1324 return 0;
1321} 1325}
1322 1326
@@ -1339,10 +1343,13 @@ static int btrfs_set_bit_hook(struct inode *inode,
1339 u64 len = state->end + 1 - state->start; 1343 u64 len = state->end + 1 - state->start;
1340 bool do_list = !is_free_space_inode(root, inode); 1344 bool do_list = !is_free_space_inode(root, inode);
1341 1345
1342 if (*bits & EXTENT_FIRST_DELALLOC) 1346 if (*bits & EXTENT_FIRST_DELALLOC) {
1343 *bits &= ~EXTENT_FIRST_DELALLOC; 1347 *bits &= ~EXTENT_FIRST_DELALLOC;
1344 else 1348 } else {
1345 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 1349 spin_lock(&BTRFS_I(inode)->lock);
1350 BTRFS_I(inode)->outstanding_extents++;
1351 spin_unlock(&BTRFS_I(inode)->lock);
1352 }
1346 1353
1347 spin_lock(&root->fs_info->delalloc_lock); 1354 spin_lock(&root->fs_info->delalloc_lock);
1348 BTRFS_I(inode)->delalloc_bytes += len; 1355 BTRFS_I(inode)->delalloc_bytes += len;
@@ -1372,10 +1379,13 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1372 u64 len = state->end + 1 - state->start; 1379 u64 len = state->end + 1 - state->start;
1373 bool do_list = !is_free_space_inode(root, inode); 1380 bool do_list = !is_free_space_inode(root, inode);
1374 1381
1375 if (*bits & EXTENT_FIRST_DELALLOC) 1382 if (*bits & EXTENT_FIRST_DELALLOC) {
1376 *bits &= ~EXTENT_FIRST_DELALLOC; 1383 *bits &= ~EXTENT_FIRST_DELALLOC;
1377 else if (!(*bits & EXTENT_DO_ACCOUNTING)) 1384 } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
1378 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 1385 spin_lock(&BTRFS_I(inode)->lock);
1386 BTRFS_I(inode)->outstanding_extents--;
1387 spin_unlock(&BTRFS_I(inode)->lock);
1388 }
1379 1389
1380 if (*bits & EXTENT_DO_ACCOUNTING) 1390 if (*bits & EXTENT_DO_ACCOUNTING)
1381 btrfs_delalloc_release_metadata(inode, len); 1391 btrfs_delalloc_release_metadata(inode, len);
@@ -6735,8 +6745,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6735 ei->index_cnt = (u64)-1; 6745 ei->index_cnt = (u64)-1;
6736 ei->last_unlink_trans = 0; 6746 ei->last_unlink_trans = 0;
6737 6747
6738 atomic_set(&ei->outstanding_extents, 0); 6748 spin_lock_init(&ei->lock);
6739 atomic_set(&ei->reserved_extents, 0); 6749 ei->outstanding_extents = 0;
6750 ei->reserved_extents = 0;
6740 6751
6741 ei->ordered_data_close = 0; 6752 ei->ordered_data_close = 0;
6742 ei->orphan_meta_reserved = 0; 6753 ei->orphan_meta_reserved = 0;
@@ -6774,8 +6785,8 @@ void btrfs_destroy_inode(struct inode *inode)
6774 6785
6775 WARN_ON(!list_empty(&inode->i_dentry)); 6786 WARN_ON(!list_empty(&inode->i_dentry));
6776 WARN_ON(inode->i_data.nrpages); 6787 WARN_ON(inode->i_data.nrpages);
6777 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); 6788 WARN_ON(BTRFS_I(inode)->outstanding_extents);
6778 WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); 6789 WARN_ON(BTRFS_I(inode)->reserved_extents);
6779 6790
6780 /* 6791 /*
6781 * This can happen where we create an inode, but somebody else also 6792 * This can happen where we create an inode, but somebody else also
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 09c9a8d26ee9..fd252fff4c66 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -938,7 +938,9 @@ again:
938 GFP_NOFS); 938 GFP_NOFS);
939 939
940 if (i_done != num_pages) { 940 if (i_done != num_pages) {
941 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 941 spin_lock(&BTRFS_I(inode)->lock);
942 BTRFS_I(inode)->outstanding_extents++;
943 spin_unlock(&BTRFS_I(inode)->lock);
942 btrfs_delalloc_release_space(inode, 944 btrfs_delalloc_release_space(inode,
943 (num_pages - i_done) << PAGE_CACHE_SHIFT); 945 (num_pages - i_done) << PAGE_CACHE_SHIFT);
944 } 946 }