diff options
author | Filipe Manana <fdmanana@suse.com> | 2015-11-19 05:57:20 -0500 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2015-11-25 08:19:51 -0500 |
commit | 020d5b7366fc03e4bf84142ae6f63031ac504e33 (patch) | |
tree | 8f2c36c78f0d213d379e172f3a824c9d90ab1b2b | |
parent | 31388ab2edac833defa4193172edc1d409868bfb (diff) |
Btrfs: fix race between scrub and block group deletion
Scrub can race with the cleaner kthread deleting block groups that are
unused (and with relocation too) leading to a failure with error -EINVAL
that gets returned to user space.
The following diagram illustrates how it happens:
CPU 1 CPU 2
cleaner kthread
btrfs_delete_unused_bgs()
gets block group X from
fs_info->unused_bgs
sets block group to RO
btrfs_remove_chunk(bg X)
deletes device extents
scrub_enumerate_chunks()
searches device tree using
its commit root
finds device extent for
block group X
gets block group X from the tree
fs_info->block_group_cache_tree
(via btrfs_lookup_block_group())
sets bg X to RO (again)
btrfs_remove_block_group(bg X)
deletes block group from
fs_info->block_group_cache_tree
removes extent map from
fs_info->mapping_tree
scrub_chunk(offset X)
searches fs_info->mapping_tree
for extent map starting at
offset X
--> doesn't find any such
extent map
--> returns -EINVAL and scrub
errors out to userspace
with -EINVAL
Fix this by dealing with an extent map lookup failure as an indicator of
block group deletion.
Issue reproduced with fstest btrfs/071.
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r-- | fs/btrfs/scrub.c | 20 |
1 files changed, 16 insertions, 4 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6b3fd51d9a99..68af3169d527 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -3432,7 +3432,9 @@ out: | |||
3432 | static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, | 3432 | static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, |
3433 | struct btrfs_device *scrub_dev, | 3433 | struct btrfs_device *scrub_dev, |
3434 | u64 chunk_offset, u64 length, | 3434 | u64 chunk_offset, u64 length, |
3435 | u64 dev_offset, int is_dev_replace) | 3435 | u64 dev_offset, |
3436 | struct btrfs_block_group_cache *cache, | ||
3437 | int is_dev_replace) | ||
3436 | { | 3438 | { |
3437 | struct btrfs_mapping_tree *map_tree = | 3439 | struct btrfs_mapping_tree *map_tree = |
3438 | &sctx->dev_root->fs_info->mapping_tree; | 3440 | &sctx->dev_root->fs_info->mapping_tree; |
@@ -3445,8 +3447,18 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, | |||
3445 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); | 3447 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); |
3446 | read_unlock(&map_tree->map_tree.lock); | 3448 | read_unlock(&map_tree->map_tree.lock); |
3447 | 3449 | ||
3448 | if (!em) | 3450 | if (!em) { |
3449 | return -EINVAL; | 3451 | /* |
3452 | * Might have been an unused block group deleted by the cleaner | ||
3453 | * kthread or relocation. | ||
3454 | */ | ||
3455 | spin_lock(&cache->lock); | ||
3456 | if (!cache->removed) | ||
3457 | ret = -EINVAL; | ||
3458 | spin_unlock(&cache->lock); | ||
3459 | |||
3460 | return ret; | ||
3461 | } | ||
3450 | 3462 | ||
3451 | map = (struct map_lookup *)em->bdev; | 3463 | map = (struct map_lookup *)em->bdev; |
3452 | if (em->start != chunk_offset) | 3464 | if (em->start != chunk_offset) |
@@ -3592,7 +3604,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
3592 | dev_replace->cursor_left = found_key.offset; | 3604 | dev_replace->cursor_left = found_key.offset; |
3593 | dev_replace->item_needs_writeback = 1; | 3605 | dev_replace->item_needs_writeback = 1; |
3594 | ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, | 3606 | ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, |
3595 | found_key.offset, is_dev_replace); | 3607 | found_key.offset, cache, is_dev_replace); |
3596 | 3608 | ||
3597 | /* | 3609 | /* |
3598 | * flush, submit all pending read and write bios, afterwards | 3610 | * flush, submit all pending read and write bios, afterwards |