summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/send.c
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2019-07-17 08:23:39 -0400
committerDavid Sterba <dsterba@suse.com>2019-07-30 12:25:11 -0400
commitb4f9a1a87a48c255bb90d8a6c3d555a1abb88130 (patch)
tree4b457fe67c568c1d2c0bd65dc8bed80ed180fb74 /fs/btrfs/send.c
parenta3b46b86ca76d7f9d487e6a0b594fd1984e0796e (diff)
Btrfs: fix incremental send failure after deduplication
When doing an incremental send operation we can fail if we previously did deduplication operations against a file that exists in both snapshots. In that case we will fail the send operation with -EIO and print a message to dmesg/syslog like the following: BTRFS error (device sdc): Send: inconsistent snapshot, found updated \ extent for inode 257 without updated inode item, send root is 258, \ parent root is 257 This requires that we deduplicate to the same file in both snapshots for the same amount of times on each snapshot. The issue happens because a deduplication only updates the iversion of an inode and does not update any other field of the inode, therefore if we deduplicate the file on each snapshot for the same amount of time, the inode will have the same iversion value (stored as the "sequence" field on the inode item) on both snapshots, therefore it will be seen as unchanged between in the send snapshot while there are new/updated/deleted extent items when comparing to the parent snapshot. This makes the send operation return -EIO and print an error message. Example reproducer: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt # Create our first file. The first half of the file has several 64Kb # extents while the second half as a single 512Kb extent. $ xfs_io -f -s -c "pwrite -S 0xb8 -b 64K 0 512K" /mnt/foo $ xfs_io -c "pwrite -S 0xb8 512K 512K" /mnt/foo # Create the base snapshot and the parent send stream from it. $ btrfs subvolume snapshot -r /mnt /mnt/mysnap1 $ btrfs send -f /tmp/1.snap /mnt/mysnap1 # Create our second file, that has exactly the same data as the first # file. $ xfs_io -f -c "pwrite -S 0xb8 0 1M" /mnt/bar # Create the second snapshot, used for the incremental send, before # doing the file deduplication. $ btrfs subvolume snapshot -r /mnt /mnt/mysnap2 # Now before creating the incremental send stream: # # 1) Deduplicate into a subrange of file foo in snapshot mysnap1. This # will drop several extent items and add a new one, also updating # the inode's iversion (sequence field in inode item) by 1, but not # any other field of the inode; # # 2) Deduplicate into a different subrange of file foo in snapshot # mysnap2. This will replace an extent item with a new one, also # updating the inode's iversion by 1 but not any other field of the # inode. # # After these two deduplication operations, the inode items, for file # foo, are identical in both snapshots, but we have different extent # items for this inode in both snapshots. We want to check this doesn't # cause send to fail with an error or produce an incorrect stream. $ xfs_io -r -c "dedupe /mnt/bar 0 0 512K" /mnt/mysnap1/foo $ xfs_io -r -c "dedupe /mnt/bar 512K 512K 512K" /mnt/mysnap2/foo # Create the incremental send stream. $ btrfs send -p /mnt/mysnap1 -f /tmp/2.snap /mnt/mysnap2 ERROR: send ioctl failed with -5: Input/output error This issue started happening back in 2015 when deduplication was updated to not update the inode's ctime and mtime and update only the iversion. Back then we would hit a BUG_ON() in send, but later in 2016 send was updated to return -EIO and print the error message instead of doing the BUG_ON(). A test case for fstests follows soon. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203933 Fixes: 1c919a5e13702c ("btrfs: don't update mtime/ctime on deduped inodes") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/send.c')
-rw-r--r--fs/btrfs/send.c77
1 files changed, 15 insertions, 62 deletions
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 69b59bf75882..c3c0c064c25d 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6322,68 +6322,21 @@ static int changed_extent(struct send_ctx *sctx,
6322{ 6322{
6323 int ret = 0; 6323 int ret = 0;
6324 6324
6325 if (sctx->cur_ino != sctx->cmp_key->objectid) { 6325 /*
6326 6326 * We have found an extent item that changed without the inode item
6327 if (result == BTRFS_COMPARE_TREE_CHANGED) { 6327 * having changed. This can happen either after relocation (where the
6328 struct extent_buffer *leaf_l; 6328 * disk_bytenr of an extent item is replaced at
6329 struct extent_buffer *leaf_r; 6329 * relocation.c:replace_file_extents()) or after deduplication into a
6330 struct btrfs_file_extent_item *ei_l; 6330 * file in both the parent and send snapshots (where an extent item can
6331 struct btrfs_file_extent_item *ei_r; 6331 * get modified or replaced with a new one). Note that deduplication
6332 6332 * updates the inode item, but it only changes the iversion (sequence
6333 leaf_l = sctx->left_path->nodes[0]; 6333 * field in the inode item) of the inode, so if a file is deduplicated
6334 leaf_r = sctx->right_path->nodes[0]; 6334 * the same amount of times in both the parent and send snapshots, its
6335 ei_l = btrfs_item_ptr(leaf_l, 6335 * iversion becames the same in both snapshots, whence the inode item is
6336 sctx->left_path->slots[0], 6336 * the same on both snapshots.
6337 struct btrfs_file_extent_item); 6337 */
6338 ei_r = btrfs_item_ptr(leaf_r, 6338 if (sctx->cur_ino != sctx->cmp_key->objectid)
6339 sctx->right_path->slots[0], 6339 return 0;
6340 struct btrfs_file_extent_item);
6341
6342 /*
6343 * We may have found an extent item that has changed
6344 * only its disk_bytenr field and the corresponding
6345 * inode item was not updated. This case happens due to
6346 * very specific timings during relocation when a leaf
6347 * that contains file extent items is COWed while
6348 * relocation is ongoing and its in the stage where it
6349 * updates data pointers. So when this happens we can
6350 * safely ignore it since we know it's the same extent,
6351 * but just at different logical and physical locations
6352 * (when an extent is fully replaced with a new one, we
6353 * know the generation number must have changed too,
6354 * since snapshot creation implies committing the current
6355 * transaction, and the inode item must have been updated
6356 * as well).
6357 * This replacement of the disk_bytenr happens at
6358 * relocation.c:replace_file_extents() through
6359 * relocation.c:btrfs_reloc_cow_block().
6360 */
6361 if (btrfs_file_extent_generation(leaf_l, ei_l) ==
6362 btrfs_file_extent_generation(leaf_r, ei_r) &&
6363 btrfs_file_extent_ram_bytes(leaf_l, ei_l) ==
6364 btrfs_file_extent_ram_bytes(leaf_r, ei_r) &&
6365 btrfs_file_extent_compression(leaf_l, ei_l) ==
6366 btrfs_file_extent_compression(leaf_r, ei_r) &&
6367 btrfs_file_extent_encryption(leaf_l, ei_l) ==
6368 btrfs_file_extent_encryption(leaf_r, ei_r) &&
6369 btrfs_file_extent_other_encoding(leaf_l, ei_l) ==
6370 btrfs_file_extent_other_encoding(leaf_r, ei_r) &&
6371 btrfs_file_extent_type(leaf_l, ei_l) ==
6372 btrfs_file_extent_type(leaf_r, ei_r) &&
6373 btrfs_file_extent_disk_bytenr(leaf_l, ei_l) !=
6374 btrfs_file_extent_disk_bytenr(leaf_r, ei_r) &&
6375 btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) ==
6376 btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) &&
6377 btrfs_file_extent_offset(leaf_l, ei_l) ==
6378 btrfs_file_extent_offset(leaf_r, ei_r) &&
6379 btrfs_file_extent_num_bytes(leaf_l, ei_l) ==
6380 btrfs_file_extent_num_bytes(leaf_r, ei_r))
6381 return 0;
6382 }
6383
6384 inconsistent_snapshot_error(sctx, result, "extent");
6385 return -EIO;
6386 }
6387 6340
6388 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 6341 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
6389 if (result != BTRFS_COMPARE_TREE_DELETED) 6342 if (result != BTRFS_COMPARE_TREE_DELETED)