diff options
author | Josef Bacik <josef@redhat.com> | 2011-07-15 11:16:44 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2011-07-27 12:46:44 -0400 |
commit | 9e0baf60dea69f31ac3b1adeb35b03b02a53e8e1 (patch) | |
tree | 0fb899e1fa78b599d22389ca3befc8ab51ff5049 /fs/btrfs | |
parent | a5991428064e98c7367fe1c1686ea6a23fb6a4b3 (diff) |
Btrfs: fix enospc problems with delalloc
So I had this brilliant idea to use atomic counters for outstanding and reserved
extents, but this turned out to be a bad idea. Consider this where we have 1
outstanding extent and 1 reserved extent
Reserver Releaser
atomic_dec(outstanding) now 0
atomic_read(outstanding)+1 get 1
atomic_read(reserved) get 1
don't actually reserve anything because
they are the same
atomic_cmpxchg(reserved, 1, 0)
atomic_inc(outstanding)
atomic_add(0, reserved)
free reserved space for 1 extent
Then the reserver now has no actual space reserved for it, and when it goes to
finish the ordered IO it won't have enough space to do it's allocation and you
get those lovely warnings.
Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 7 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 2 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 90 | ||||
-rw-r--r-- | fs/btrfs/file.c | 8 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 35 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 4 |
6 files changed, 86 insertions, 60 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 52d7eca8c7bf..03dce3f40ce0 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -34,6 +34,9 @@ struct btrfs_inode { | |||
34 | */ | 34 | */ |
35 | struct btrfs_key location; | 35 | struct btrfs_key location; |
36 | 36 | ||
37 | /* Lock for counters */ | ||
38 | spinlock_t lock; | ||
39 | |||
37 | /* the extent_tree has caches of all the extent mappings to disk */ | 40 | /* the extent_tree has caches of all the extent mappings to disk */ |
38 | struct extent_map_tree extent_tree; | 41 | struct extent_map_tree extent_tree; |
39 | 42 | ||
@@ -134,8 +137,8 @@ struct btrfs_inode { | |||
134 | * items we think we'll end up using, and reserved_extents is the number | 137 | * items we think we'll end up using, and reserved_extents is the number |
135 | * of extent items we've reserved metadata for. | 138 | * of extent items we've reserved metadata for. |
136 | */ | 139 | */ |
137 | atomic_t outstanding_extents; | 140 | unsigned outstanding_extents; |
138 | atomic_t reserved_extents; | 141 | unsigned reserved_extents; |
139 | 142 | ||
140 | /* | 143 | /* |
141 | * ordered_data_close is set by truncate when a file that used | 144 | * ordered_data_close is set by truncate when a file that used |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9f6f342900c9..3063f21d3fc6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -2134,7 +2134,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) | |||
2134 | 2134 | ||
2135 | /* extent-tree.c */ | 2135 | /* extent-tree.c */ |
2136 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | 2136 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, |
2137 | int num_items) | 2137 | unsigned num_items) |
2138 | { | 2138 | { |
2139 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | 2139 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * |
2140 | 3 * num_items; | 2140 | 3 * num_items; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0a5bd67e2894..d7031e7dfd76 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3726,7 +3726,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3726 | if (commit_trans) { | 3726 | if (commit_trans) { |
3727 | if (trans) | 3727 | if (trans) |
3728 | return -EAGAIN; | 3728 | return -EAGAIN; |
3729 | |||
3730 | trans = btrfs_join_transaction(root); | 3729 | trans = btrfs_join_transaction(root); |
3731 | BUG_ON(IS_ERR(trans)); | 3730 | BUG_ON(IS_ERR(trans)); |
3732 | ret = btrfs_commit_transaction(trans, root); | 3731 | ret = btrfs_commit_transaction(trans, root); |
@@ -3946,6 +3945,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3946 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 3945 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
3947 | } | 3946 | } |
3948 | 3947 | ||
3948 | static unsigned drop_outstanding_extent(struct inode *inode) | ||
3949 | { | ||
3950 | unsigned dropped_extents = 0; | ||
3951 | |||
3952 | spin_lock(&BTRFS_I(inode)->lock); | ||
3953 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); | ||
3954 | BTRFS_I(inode)->outstanding_extents--; | ||
3955 | |||
3956 | /* | ||
3957 | * If we have more or the same amount of outsanding extents than we have | ||
3958 | * reserved then we need to leave the reserved extents count alone. | ||
3959 | */ | ||
3960 | if (BTRFS_I(inode)->outstanding_extents >= | ||
3961 | BTRFS_I(inode)->reserved_extents) | ||
3962 | goto out; | ||
3963 | |||
3964 | dropped_extents = BTRFS_I(inode)->reserved_extents - | ||
3965 | BTRFS_I(inode)->outstanding_extents; | ||
3966 | BTRFS_I(inode)->reserved_extents -= dropped_extents; | ||
3967 | out: | ||
3968 | spin_unlock(&BTRFS_I(inode)->lock); | ||
3969 | return dropped_extents; | ||
3970 | } | ||
3971 | |||
3949 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) | 3972 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) |
3950 | { | 3973 | { |
3951 | return num_bytes >>= 3; | 3974 | return num_bytes >>= 3; |
@@ -3955,9 +3978,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3955 | { | 3978 | { |
3956 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3979 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3957 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 3980 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
3958 | u64 to_reserve; | 3981 | u64 to_reserve = 0; |
3959 | int nr_extents; | 3982 | unsigned nr_extents = 0; |
3960 | int reserved_extents; | ||
3961 | int ret; | 3983 | int ret; |
3962 | 3984 | ||
3963 | if (btrfs_transaction_in_commit(root->fs_info)) | 3985 | if (btrfs_transaction_in_commit(root->fs_info)) |
@@ -3965,24 +3987,31 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3965 | 3987 | ||
3966 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 3988 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
3967 | 3989 | ||
3968 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | 3990 | spin_lock(&BTRFS_I(inode)->lock); |
3969 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); | 3991 | BTRFS_I(inode)->outstanding_extents++; |
3992 | |||
3993 | if (BTRFS_I(inode)->outstanding_extents > | ||
3994 | BTRFS_I(inode)->reserved_extents) { | ||
3995 | nr_extents = BTRFS_I(inode)->outstanding_extents - | ||
3996 | BTRFS_I(inode)->reserved_extents; | ||
3997 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
3970 | 3998 | ||
3971 | if (nr_extents > reserved_extents) { | ||
3972 | nr_extents -= reserved_extents; | ||
3973 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | 3999 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); |
3974 | } else { | ||
3975 | nr_extents = 0; | ||
3976 | to_reserve = 0; | ||
3977 | } | 4000 | } |
4001 | spin_unlock(&BTRFS_I(inode)->lock); | ||
3978 | 4002 | ||
3979 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 4003 | to_reserve += calc_csum_metadata_size(inode, num_bytes); |
3980 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); | 4004 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); |
3981 | if (ret) | 4005 | if (ret) { |
4006 | unsigned dropped; | ||
4007 | /* | ||
4008 | * We don't need the return value since our reservation failed, | ||
4009 | * we just need to clean up our counter. | ||
4010 | */ | ||
4011 | dropped = drop_outstanding_extent(inode); | ||
4012 | WARN_ON(dropped > 1); | ||
3982 | return ret; | 4013 | return ret; |
3983 | 4014 | } | |
3984 | atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); | ||
3985 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
3986 | 4015 | ||
3987 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 4016 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
3988 | 4017 | ||
@@ -3992,36 +4021,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3992 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | 4021 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) |
3993 | { | 4022 | { |
3994 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4023 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3995 | u64 to_free; | 4024 | u64 to_free = 0; |
3996 | int nr_extents; | 4025 | unsigned dropped; |
3997 | int reserved_extents; | ||
3998 | 4026 | ||
3999 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4027 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4000 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 4028 | dropped = drop_outstanding_extent(inode); |
4001 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); | ||
4002 | |||
4003 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); | ||
4004 | do { | ||
4005 | int old, new; | ||
4006 | |||
4007 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | ||
4008 | if (nr_extents >= reserved_extents) { | ||
4009 | nr_extents = 0; | ||
4010 | break; | ||
4011 | } | ||
4012 | old = reserved_extents; | ||
4013 | nr_extents = reserved_extents - nr_extents; | ||
4014 | new = reserved_extents - nr_extents; | ||
4015 | old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, | ||
4016 | reserved_extents, new); | ||
4017 | if (likely(old == reserved_extents)) | ||
4018 | break; | ||
4019 | reserved_extents = old; | ||
4020 | } while (1); | ||
4021 | 4029 | ||
4022 | to_free = calc_csum_metadata_size(inode, num_bytes); | 4030 | to_free = calc_csum_metadata_size(inode, num_bytes); |
4023 | if (nr_extents > 0) | 4031 | if (dropped > 0) |
4024 | to_free += btrfs_calc_trans_metadata_size(root, nr_extents); | 4032 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
4025 | 4033 | ||
4026 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 4034 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, |
4027 | to_free); | 4035 | to_free); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index bd6bbb877ff2..6e56a468d1f5 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1239,9 +1239,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1239 | * managed to copy. | 1239 | * managed to copy. |
1240 | */ | 1240 | */ |
1241 | if (num_pages > dirty_pages) { | 1241 | if (num_pages > dirty_pages) { |
1242 | if (copied > 0) | 1242 | if (copied > 0) { |
1243 | atomic_inc( | 1243 | spin_lock(&BTRFS_I(inode)->lock); |
1244 | &BTRFS_I(inode)->outstanding_extents); | 1244 | BTRFS_I(inode)->outstanding_extents++; |
1245 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1246 | } | ||
1245 | btrfs_delalloc_release_space(inode, | 1247 | btrfs_delalloc_release_space(inode, |
1246 | (num_pages - dirty_pages) << | 1248 | (num_pages - dirty_pages) << |
1247 | PAGE_CACHE_SHIFT); | 1249 | PAGE_CACHE_SHIFT); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2bf4d2b97d54..55d68ea1866b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -1298,7 +1298,9 @@ static int btrfs_split_extent_hook(struct inode *inode, | |||
1298 | if (!(orig->state & EXTENT_DELALLOC)) | 1298 | if (!(orig->state & EXTENT_DELALLOC)) |
1299 | return 0; | 1299 | return 0; |
1300 | 1300 | ||
1301 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 1301 | spin_lock(&BTRFS_I(inode)->lock); |
1302 | BTRFS_I(inode)->outstanding_extents++; | ||
1303 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1302 | return 0; | 1304 | return 0; |
1303 | } | 1305 | } |
1304 | 1306 | ||
@@ -1316,7 +1318,9 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
1316 | if (!(other->state & EXTENT_DELALLOC)) | 1318 | if (!(other->state & EXTENT_DELALLOC)) |
1317 | return 0; | 1319 | return 0; |
1318 | 1320 | ||
1319 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 1321 | spin_lock(&BTRFS_I(inode)->lock); |
1322 | BTRFS_I(inode)->outstanding_extents--; | ||
1323 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1320 | return 0; | 1324 | return 0; |
1321 | } | 1325 | } |
1322 | 1326 | ||
@@ -1339,10 +1343,13 @@ static int btrfs_set_bit_hook(struct inode *inode, | |||
1339 | u64 len = state->end + 1 - state->start; | 1343 | u64 len = state->end + 1 - state->start; |
1340 | bool do_list = !is_free_space_inode(root, inode); | 1344 | bool do_list = !is_free_space_inode(root, inode); |
1341 | 1345 | ||
1342 | if (*bits & EXTENT_FIRST_DELALLOC) | 1346 | if (*bits & EXTENT_FIRST_DELALLOC) { |
1343 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1347 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1344 | else | 1348 | } else { |
1345 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 1349 | spin_lock(&BTRFS_I(inode)->lock); |
1350 | BTRFS_I(inode)->outstanding_extents++; | ||
1351 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1352 | } | ||
1346 | 1353 | ||
1347 | spin_lock(&root->fs_info->delalloc_lock); | 1354 | spin_lock(&root->fs_info->delalloc_lock); |
1348 | BTRFS_I(inode)->delalloc_bytes += len; | 1355 | BTRFS_I(inode)->delalloc_bytes += len; |
@@ -1372,10 +1379,13 @@ static int btrfs_clear_bit_hook(struct inode *inode, | |||
1372 | u64 len = state->end + 1 - state->start; | 1379 | u64 len = state->end + 1 - state->start; |
1373 | bool do_list = !is_free_space_inode(root, inode); | 1380 | bool do_list = !is_free_space_inode(root, inode); |
1374 | 1381 | ||
1375 | if (*bits & EXTENT_FIRST_DELALLOC) | 1382 | if (*bits & EXTENT_FIRST_DELALLOC) { |
1376 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1383 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1377 | else if (!(*bits & EXTENT_DO_ACCOUNTING)) | 1384 | } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { |
1378 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 1385 | spin_lock(&BTRFS_I(inode)->lock); |
1386 | BTRFS_I(inode)->outstanding_extents--; | ||
1387 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1388 | } | ||
1379 | 1389 | ||
1380 | if (*bits & EXTENT_DO_ACCOUNTING) | 1390 | if (*bits & EXTENT_DO_ACCOUNTING) |
1381 | btrfs_delalloc_release_metadata(inode, len); | 1391 | btrfs_delalloc_release_metadata(inode, len); |
@@ -6735,8 +6745,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6735 | ei->index_cnt = (u64)-1; | 6745 | ei->index_cnt = (u64)-1; |
6736 | ei->last_unlink_trans = 0; | 6746 | ei->last_unlink_trans = 0; |
6737 | 6747 | ||
6738 | atomic_set(&ei->outstanding_extents, 0); | 6748 | spin_lock_init(&ei->lock); |
6739 | atomic_set(&ei->reserved_extents, 0); | 6749 | ei->outstanding_extents = 0; |
6750 | ei->reserved_extents = 0; | ||
6740 | 6751 | ||
6741 | ei->ordered_data_close = 0; | 6752 | ei->ordered_data_close = 0; |
6742 | ei->orphan_meta_reserved = 0; | 6753 | ei->orphan_meta_reserved = 0; |
@@ -6774,8 +6785,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6774 | 6785 | ||
6775 | WARN_ON(!list_empty(&inode->i_dentry)); | 6786 | WARN_ON(!list_empty(&inode->i_dentry)); |
6776 | WARN_ON(inode->i_data.nrpages); | 6787 | WARN_ON(inode->i_data.nrpages); |
6777 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | 6788 | WARN_ON(BTRFS_I(inode)->outstanding_extents); |
6778 | WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); | 6789 | WARN_ON(BTRFS_I(inode)->reserved_extents); |
6779 | 6790 | ||
6780 | /* | 6791 | /* |
6781 | * This can happen where we create an inode, but somebody else also | 6792 | * This can happen where we create an inode, but somebody else also |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 09c9a8d26ee9..fd252fff4c66 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -938,7 +938,9 @@ again: | |||
938 | GFP_NOFS); | 938 | GFP_NOFS); |
939 | 939 | ||
940 | if (i_done != num_pages) { | 940 | if (i_done != num_pages) { |
941 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 941 | spin_lock(&BTRFS_I(inode)->lock); |
942 | BTRFS_I(inode)->outstanding_extents++; | ||
943 | spin_unlock(&BTRFS_I(inode)->lock); | ||
942 | btrfs_delalloc_release_space(inode, | 944 | btrfs_delalloc_release_space(inode, |
943 | (num_pages - i_done) << PAGE_CACHE_SHIFT); | 945 | (num_pages - i_done) << PAGE_CACHE_SHIFT); |
944 | } | 946 | } |