aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2015-05-20 09:01:54 -0400
committerChris Mason <clm@fb.com>2015-06-03 07:03:04 -0400
commit4fbcdf6694544fd9d2aedbc1e73e52b90a4fcc20 (patch)
tree2d5979c0e29cc6f21a04fa521b261bfb75b04168 /fs/btrfs/extent-tree.c
parent0d2b2372e097cd3b4150d3ec91e79ac3c5cc750e (diff)
Btrfs: fix -ENOSPC when finishing block group creation
While creating a block group, we often end up getting ENOSPC while updating the chunk tree, which leads to a transaction abortion that produces a trace like the following: [30670.116368] WARNING: CPU: 4 PID: 20735 at fs/btrfs/super.c:260 __btrfs_abort_transaction+0x52/0x106 [btrfs]() [30670.117777] BTRFS: Transaction aborted (error -28) (...) [30670.163567] Call Trace: [30670.163906] [<ffffffff8142fa46>] dump_stack+0x4f/0x7b [30670.164522] [<ffffffff8108b6a2>] ? console_unlock+0x361/0x3ad [30670.165171] [<ffffffff81045ea5>] warn_slowpath_common+0xa1/0xbb [30670.166323] [<ffffffffa035daa7>] ? __btrfs_abort_transaction+0x52/0x106 [btrfs] [30670.167213] [<ffffffff81045f05>] warn_slowpath_fmt+0x46/0x48 [30670.167862] [<ffffffffa035daa7>] __btrfs_abort_transaction+0x52/0x106 [btrfs] [30670.169116] [<ffffffffa03743d7>] btrfs_create_pending_block_groups+0x101/0x130 [btrfs] [30670.170593] [<ffffffffa038426a>] __btrfs_end_transaction+0x84/0x366 [btrfs] [30670.171960] [<ffffffffa038455c>] btrfs_end_transaction+0x10/0x12 [btrfs] [30670.174649] [<ffffffffa036eb6b>] btrfs_check_data_free_space+0x11f/0x27c [btrfs] [30670.176092] [<ffffffffa039450d>] btrfs_fallocate+0x7c8/0xb96 [btrfs] [30670.177218] [<ffffffff812459f2>] ? __this_cpu_preempt_check+0x13/0x15 [30670.178622] [<ffffffff81152447>] vfs_fallocate+0x14c/0x1de [30670.179642] [<ffffffff8116b915>] ? __fget_light+0x2d/0x4f [30670.180692] [<ffffffff81152863>] SyS_fallocate+0x47/0x62 [30670.186737] [<ffffffff81435b32>] system_call_fastpath+0x12/0x17 [30670.187792] ---[ end trace 0373e6b491c4a8cc ]--- This is because we don't do proper space reservation for the chunk block reserve when we have multiple tasks allocating chunks in parallel. So block group creation has 2 phases, and the first phase essentially checks if there is enough space in the system space_info, allocating a new system chunk if there isn't, while the second phase updates the device, extent and chunk trees. However, because the updates to the chunk tree happen in the second phase, if we have N tasks, each with its own transaction handle, allocating new chunks in parallel and if there is only enough space in the system space_info to allocate M chunks, where M < N, none of the tasks ends up allocating a new system chunk in the first phase and N - M tasks will get -ENOSPC when attempting to update the chunk tree in phase 2 if they need to COW any nodes/leafs from the chunk tree. Fix this by doing proper reservation in the chunk block reserve. The issue could be reproduced by running fstests generic/038 in a loop, which eventually triggered the problem. Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c44
1 files changed, 42 insertions, 2 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1cbc71d8cb96..4e08e47ace30 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4116,11 +4116,19 @@ static void check_system_chunk(struct btrfs_trans_handle *trans,
4116 struct btrfs_space_info *info; 4116 struct btrfs_space_info *info;
4117 u64 left; 4117 u64 left;
4118 u64 thresh; 4118 u64 thresh;
4119 int ret = 0;
4120
4121 /*
4122 * Needed because we can end up allocating a system chunk and for an
4123 * atomic and race free space reservation in the chunk block reserve.
4124 */
4125 ASSERT(mutex_is_locked(&root->fs_info->chunk_mutex));
4119 4126
4120 info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM); 4127 info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4121 spin_lock(&info->lock); 4128 spin_lock(&info->lock);
4122 left = info->total_bytes - info->bytes_used - info->bytes_pinned - 4129 left = info->total_bytes - info->bytes_used - info->bytes_pinned -
4123 info->bytes_reserved - info->bytes_readonly; 4130 info->bytes_reserved - info->bytes_readonly -
4131 info->bytes_may_use;
4124 spin_unlock(&info->lock); 4132 spin_unlock(&info->lock);
4125 4133
4126 thresh = get_system_chunk_thresh(root, type); 4134 thresh = get_system_chunk_thresh(root, type);
@@ -4134,7 +4142,21 @@ static void check_system_chunk(struct btrfs_trans_handle *trans,
4134 u64 flags; 4142 u64 flags;
4135 4143
4136 flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0); 4144 flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
4137 btrfs_alloc_chunk(trans, root, flags); 4145 /*
4146 * Ignore failure to create system chunk. We might end up not
4147 * needing it, as we might not need to COW all nodes/leafs from
4148 * the paths we visit in the chunk tree (they were already COWed
4149 * or created in the current transaction for example).
4150 */
4151 ret = btrfs_alloc_chunk(trans, root, flags);
4152 }
4153
4154 if (!ret) {
4155 ret = btrfs_block_rsv_add(root->fs_info->chunk_root,
4156 &root->fs_info->chunk_block_rsv,
4157 thresh, BTRFS_RESERVE_NO_FLUSH);
4158 if (!ret)
4159 trans->chunk_bytes_reserved += thresh;
4138 } 4160 }
4139} 4161}
4140 4162
@@ -5192,6 +5214,24 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
5192 trans->bytes_reserved = 0; 5214 trans->bytes_reserved = 0;
5193} 5215}
5194 5216
5217/*
5218 * To be called after all the new block groups attached to the transaction
5219 * handle have been created (btrfs_create_pending_block_groups()).
5220 */
5221void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
5222{
5223 struct btrfs_fs_info *fs_info = trans->root->fs_info;
5224
5225 if (!trans->chunk_bytes_reserved)
5226 return;
5227
5228 WARN_ON_ONCE(!list_empty(&trans->new_bgs));
5229
5230 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
5231 trans->chunk_bytes_reserved);
5232 trans->chunk_bytes_reserved = 0;
5233}
5234
5195/* Can only return 0 or -ENOSPC */ 5235/* Can only return 0 or -ENOSPC */
5196int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, 5236int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
5197 struct inode *inode) 5237 struct inode *inode)