diff options
author | Zhaolei <zhaolei@cn.fujitsu.com> | 2015-11-17 05:46:17 -0500 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2015-11-25 08:19:51 -0500 |
commit | 76a8efa171bf6cf37ffb83d3f62fed2e47e2abc8 (patch) | |
tree | 4039ddb49bef2996add0327a8d63d6df1338d826 | |
parent | da02c6898952a2bc251dd51ed9f897e0a72a853e (diff) |
btrfs: Continue replace when set_block_ro failed
xfstests/011 failed in node with small_size filesystem.
Can be reproduced by following script:
DEV_LIST="/dev/vdd /dev/vde"
DEV_REPLACE="/dev/vdf"
do_test()
{
local mkfs_opt="$1"
local size="$2"
dmesg -c >/dev/null
umount $SCRATCH_MNT &>/dev/null
echo mkfs.btrfs -f $mkfs_opt "${DEV_LIST[*]}"
mkfs.btrfs -f $mkfs_opt "${DEV_LIST[@]}" || return 1
mount "${DEV_LIST[0]}" $SCRATCH_MNT
echo -n "Writing big files"
dd if=/dev/urandom of=$SCRATCH_MNT/t0 bs=1M count=1 >/dev/null 2>&1
for ((i = 1; i <= size; i++)); do
echo -n .
/bin/cp $SCRATCH_MNT/t0 $SCRATCH_MNT/t$i || return 1
done
echo
echo Start replace
btrfs replace start -Bf "${DEV_LIST[0]}" "$DEV_REPLACE" $SCRATCH_MNT || {
dmesg
return 1
}
return 0
}
# Set size to value near fs size
# for example, 1897 can trigger this bug in 2.6G device.
#
./do_test "-d raid1 -m raid1" 1897
System will report replace fail with following warning in dmesg:
[ 134.710853] BTRFS: dev_replace from /dev/vdd (devid 1) to /dev/vdf started
[ 135.542390] BTRFS: btrfs_scrub_dev(/dev/vdd, 1, /dev/vdf) failed -28
[ 135.543505] ------------[ cut here ]------------
[ 135.544127] WARNING: CPU: 0 PID: 4080 at fs/btrfs/dev-replace.c:428 btrfs_dev_replace_start+0x398/0x440()
[ 135.545276] Modules linked in:
[ 135.545681] CPU: 0 PID: 4080 Comm: btrfs Not tainted 4.3.0 #256
[ 135.546439] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014
[ 135.547798] ffffffff81c5bfcf ffff88003cbb3d28 ffffffff817fe7b5 0000000000000000
[ 135.548774] ffff88003cbb3d60 ffffffff810a88f1 ffff88002b030000 00000000ffffffe4
[ 135.549774] ffff88003c080000 ffff88003c082588 ffff88003c28ab60 ffff88003cbb3d70
[ 135.550758] Call Trace:
[ 135.551086] [<ffffffff817fe7b5>] dump_stack+0x44/0x55
[ 135.551737] [<ffffffff810a88f1>] warn_slowpath_common+0x81/0xc0
[ 135.552487] [<ffffffff810a89e5>] warn_slowpath_null+0x15/0x20
[ 135.553211] [<ffffffff81448c88>] btrfs_dev_replace_start+0x398/0x440
[ 135.554051] [<ffffffff81412c3e>] btrfs_ioctl+0x1d2e/0x25c0
[ 135.554722] [<ffffffff8114c7ba>] ? __audit_syscall_entry+0xaa/0xf0
[ 135.555506] [<ffffffff8111ab36>] ? current_kernel_time64+0x56/0xa0
[ 135.556304] [<ffffffff81201e3d>] do_vfs_ioctl+0x30d/0x580
[ 135.557009] [<ffffffff8114c7ba>] ? __audit_syscall_entry+0xaa/0xf0
[ 135.557855] [<ffffffff810011d1>] ? do_audit_syscall_entry+0x61/0x70
[ 135.558669] [<ffffffff8120d1c1>] ? __fget_light+0x61/0x90
[ 135.559374] [<ffffffff81202124>] SyS_ioctl+0x74/0x80
[ 135.559987] [<ffffffff81809857>] entry_SYSCALL_64_fastpath+0x12/0x6f
[ 135.560842] ---[ end trace 2a5c1fc3205abbdd ]---
Reason:
When big data writen to fs, the whole free space will be allocated
for data chunk.
And operation as scrub need to set_block_ro(), and when there is
only one metadata chunk in system(or other metadata chunks
are all full), the function will try to allocate a new chunk,
and failed because no space in device.
Fix:
When set_block_ro failed for metadata chunk, it is not a problem
because scrub_lock paused commit_trancaction in same time, and
metadata are always cowed, so the on-the-fly writepages will not
write data into same place with scrub/replace.
Let replace continue in this case is no problem.
Tested by above script, and xfstests/011, plus 100 times xfstests/070.
Changelog v1->v2:
1: Add detail comments in source and commit-message.
2: Add dmesg detail into commit-message.
3: Limit return value of -ENOSPC to be passed.
All suggested by: Filipe Manana <fdmanana@gmail.com>
Suggested-by: Filipe Manana <fdmanana@gmail.com>
Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r-- | fs/btrfs/scrub.c | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2907a77fb1f6..6b3fd51d9a99 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -3483,6 +3483,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
3483 | u64 length; | 3483 | u64 length; |
3484 | u64 chunk_offset; | 3484 | u64 chunk_offset; |
3485 | int ret = 0; | 3485 | int ret = 0; |
3486 | int ro_set; | ||
3486 | int slot; | 3487 | int slot; |
3487 | struct extent_buffer *l; | 3488 | struct extent_buffer *l; |
3488 | struct btrfs_key key; | 3489 | struct btrfs_key key; |
@@ -3568,7 +3569,21 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
3568 | scrub_pause_on(fs_info); | 3569 | scrub_pause_on(fs_info); |
3569 | ret = btrfs_inc_block_group_ro(root, cache); | 3570 | ret = btrfs_inc_block_group_ro(root, cache); |
3570 | scrub_pause_off(fs_info); | 3571 | scrub_pause_off(fs_info); |
3571 | if (ret) { | 3572 | |
3573 | if (ret == 0) { | ||
3574 | ro_set = 1; | ||
3575 | } else if (ret == -ENOSPC) { | ||
3576 | /* | ||
3577 | * btrfs_inc_block_group_ro return -ENOSPC when it | ||
3578 | * failed in creating new chunk for metadata. | ||
3579 | * It is not a problem for scrub/replace, because | ||
3580 | * metadata are always cowed, and our scrub paused | ||
3581 | * commit_transactions. | ||
3582 | */ | ||
3583 | ro_set = 0; | ||
3584 | } else { | ||
3585 | btrfs_warn(fs_info, "failed setting block group ro, ret=%d\n", | ||
3586 | ret); | ||
3572 | btrfs_put_block_group(cache); | 3587 | btrfs_put_block_group(cache); |
3573 | break; | 3588 | break; |
3574 | } | 3589 | } |
@@ -3611,7 +3626,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
3611 | 3626 | ||
3612 | scrub_pause_off(fs_info); | 3627 | scrub_pause_off(fs_info); |
3613 | 3628 | ||
3614 | btrfs_dec_block_group_ro(root, cache); | 3629 | if (ro_set) |
3630 | btrfs_dec_block_group_ro(root, cache); | ||
3615 | 3631 | ||
3616 | btrfs_put_block_group(cache); | 3632 | btrfs_put_block_group(cache); |
3617 | if (ret) | 3633 | if (ret) |