Btrfs: incremental send, fix unnecessary hole writes for sparse files

When using the NO_HOLES feature, during an incremental send we often issue write operations for holes when we should not, because that range is already a hole in the destination snapshot. While that does not change the contents of the file at the receiver, it avoids preservation of file holes, leading to wasted disk space and extra IO during send/receive. A couple examples where the holes are not preserved follows. $ mkfs.btrfs -O no-holes -f /dev/sdb $ mount /dev/sdb /mnt $ xfs_io -f -c "pwrite -S 0xaa 0 4K" /mnt/foo $ xfs_io -f -c "pwrite -S 0xaa 0 4K" -c "pwrite -S 0xbb 1028K 4K" /mnt/bar $ btrfs subvolume snapshot -r /mnt /mnt/snap1 # Now add one new extent to our first test file, increasing its size and # leaving a 1Mb hole between the first extent and this new extent. $ xfs_io -c "pwrite -S 0xbb 1028K 4K" /mnt/foo # Now overwrite the last extent of our second test file. $ xfs_io -c "pwrite -S 0xcc 1028K 4K" /mnt/bar $ btrfs subvolume snapshot -r /mnt /mnt/snap2 $ xfs_io -r -c "fiemap -v" /mnt/snap2/foo /mnt/snap2/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..7]: 25088..25095 8 0x2000 1: [8..2055]: hole 2048 2: [2056..2063]: 24576..24583 8 0x2001 $ xfs_io -r -c "fiemap -v" /mnt/snap2/bar /mnt/snap2/bar: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..7]: 25096..25103 8 0x2000 1: [8..2055]: hole 2048 2: [2056..2063]: 24584..24591 8 0x2001 $ btrfs send /mnt/snap1 -f /tmp/1.snap $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/2.snap $ umount /mnt # It's not relevant to enable no-holes in the new filesystem. $ mkfs.btrfs -O no-holes -f /dev/sdc $ mount /dev/sdc /mnt $ btrfs receive /mnt -f /tmp/1.snap $ btrfs receive /mnt -f /tmp/2.snap $ xfs_io -r -c "fiemap -v" /mnt/snap2/foo /mnt/snap2/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..7]: 24576..24583 8 0x2000 1: [8..2063]: 25624..27679 2056 0x1 $ xfs_io -r -c "fiemap -v" /mnt/snap2/bar /mnt/snap2/bar: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..7]: 24584..24591 8 0x2000 1: [8..2063]: 27680..29735 2056 0x1 The holes do not exist in the second filesystem and they were replaced with extents filled with the byte 0x00, making each file take 1032Kb of space instead of 8Kb. So fix this by not issuing the write operations consisting of buffers filled with the byte 0x00 when the destination snapshot already has a hole for the respective range. A test case for fstests will follow soon. Signed-off-by: Filipe Manana <fdmanana@suse.com>
author: Filipe Manana <fdmanana@suse.com> 2017-02-14 12:56:32 -0500
committer: Filipe Manana <fdmanana@suse.com> 2017-02-23 19:39:21 -0500
commit: 82bfb2e7b645c8f228dc3b6d3b27b0b10125ca4f (patch)
tree: 9ef2c4aa759b10d7da32bed5f9d46e6ae78c203a
parent: a9b9477db2937934e469db800317ec3ef7e81b51 (diff)
1 files changed, 86 insertions, 2 deletions
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 712922ea64d2..456c8901489b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5306,6 +5306,81 @@ out:
        return ret;
 }
+static int range_is_hole_in_parent(struct send_ctx *sctx,
+                                   const u64 start,
+                                   const u64 end)
+{
+        struct btrfs_path *path;
+        struct btrfs_key key;
+        struct btrfs_root *root = sctx->parent_root;
+        u64 search_start = start;
+        int ret;
+        path = alloc_path_for_send();
+        if (!path)
+                return -ENOMEM;
+        key.objectid = sctx->cur_ino;
+        key.type = BTRFS_EXTENT_DATA_KEY;
+        key.offset = search_start;
+        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+        if (ret < 0)
+                goto out;
+        if (ret > 0 && path->slots[0] > 0)
+                path->slots[0]--;
+        while (search_start < end) {
+                struct extent_buffer *leaf = path->nodes[0];
+                int slot = path->slots[0];
+                struct btrfs_file_extent_item *fi;
+                u64 extent_end;
+                if (slot >= btrfs_header_nritems(leaf)) {
+                        ret = btrfs_next_leaf(root, path);
+                        if (ret < 0)
+                                goto out;
+                        else if (ret > 0)
+                                break;
+                        continue;
+                }
+                btrfs_item_key_to_cpu(leaf, &key, slot);
+                if (key.objectid < sctx->cur_ino ||
+                    key.type < BTRFS_EXTENT_DATA_KEY)
+                        goto next;
+                if (key.objectid > sctx->cur_ino ||
+                    key.type > BTRFS_EXTENT_DATA_KEY ||
+                    key.offset >= end)
+                        break;
+                fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+                if (btrfs_file_extent_type(leaf, fi) ==
+                    BTRFS_FILE_EXTENT_INLINE) {
+                        u64 size = btrfs_file_extent_inline_len(leaf, slot, fi);
+                        extent_end = ALIGN(key.offset + size,
+                                           root->fs_info->sectorsize);
+                } else {
+                        extent_end = key.offset +
+                                btrfs_file_extent_num_bytes(leaf, fi);
+                }
+                if (extent_end <= start)
+                        goto next;
+                if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
+                        search_start = extent_end;
+                        goto next;
+                }
+                ret = 0;
+                goto out;
+next:
+                path->slots[0]++;
+        }
+        ret = 1;
+out:
+        btrfs_free_path(path);
+        return ret;
+}
 static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
                           struct btrfs_key *key)
 {
@@ -5350,8 +5425,17 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
                        return ret;
        }
-        if (sctx->cur_inode_last_extent < key->offset)
+        if (sctx->cur_inode_last_extent < key->offset) {
-                ret = send_hole(sctx, key->offset);
+                ret = range_is_hole_in_parent(sctx,
+                                              sctx->cur_inode_last_extent,
+                                              key->offset);
+                if (ret < 0)
+                        return ret;
+                else if (ret == 0)
+                        ret = send_hole(sctx, key->offset);
+                else
+                        ret = 0;
+        }
        sctx->cur_inode_last_extent = extent_end;
        return ret;
 }
author	Filipe Manana <fdmanana@suse.com>	2017-02-14 12:56:32 -0500
committer	Filipe Manana <fdmanana@suse.com>	2017-02-23 19:39:21 -0500
commit	82bfb2e7b645c8f228dc3b6d3b27b0b10125ca4f (patch)
tree	9ef2c4aa759b10d7da32bed5f9d46e6ae78c203a
parent	a9b9477db2937934e469db800317ec3ef7e81b51 (diff)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 712922ea64d2..456c8901489b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c
@@ -5306,6 +5306,81 @@ out:
5306	return ret;	5306	return ret;
5307	}	5307	}
5308		5308
		5309	static int range_is_hole_in_parent(struct send_ctx *sctx,
		5310	const u64 start,
		5311	const u64 end)
		5312	{
		5313	struct btrfs_path *path;
		5314	struct btrfs_key key;
		5315	struct btrfs_root *root = sctx->parent_root;
		5316	u64 search_start = start;
		5317	int ret;
		5318
		5319	path = alloc_path_for_send();
		5320	if (!path)
		5321	return -ENOMEM;
		5322
		5323	key.objectid = sctx->cur_ino;
		5324	key.type = BTRFS_EXTENT_DATA_KEY;
		5325	key.offset = search_start;
		5326	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
		5327	if (ret < 0)
		5328	goto out;
		5329	if (ret > 0 && path->slots[0] > 0)
		5330	path->slots[0]--;
		5331
		5332	while (search_start < end) {
		5333	struct extent_buffer *leaf = path->nodes[0];
		5334	int slot = path->slots[0];
		5335	struct btrfs_file_extent_item *fi;
		5336	u64 extent_end;
		5337
		5338	if (slot >= btrfs_header_nritems(leaf)) {
		5339	ret = btrfs_next_leaf(root, path);
		5340	if (ret < 0)
		5341	goto out;
		5342	else if (ret > 0)
		5343	break;
		5344	continue;
		5345	}
		5346
		5347	btrfs_item_key_to_cpu(leaf, &key, slot);
		5348	if (key.objectid < sctx->cur_ino \|\|
		5349	key.type < BTRFS_EXTENT_DATA_KEY)
		5350	goto next;
		5351	if (key.objectid > sctx->cur_ino \|\|
		5352	key.type > BTRFS_EXTENT_DATA_KEY \|\|
		5353	key.offset >= end)
		5354	break;
		5355
		5356	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
		5357	if (btrfs_file_extent_type(leaf, fi) ==
		5358	BTRFS_FILE_EXTENT_INLINE) {
		5359	u64 size = btrfs_file_extent_inline_len(leaf, slot, fi);
		5360
		5361	extent_end = ALIGN(key.offset + size,
		5362	root->fs_info->sectorsize);
		5363	} else {
		5364	extent_end = key.offset +
		5365	btrfs_file_extent_num_bytes(leaf, fi);
		5366	}
		5367	if (extent_end <= start)
		5368	goto next;
		5369	if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
		5370	search_start = extent_end;
		5371	goto next;
		5372	}
		5373	ret = 0;
		5374	goto out;
		5375	next:
		5376	path->slots[0]++;
		5377	}
		5378	ret = 1;
		5379	out:
		5380	btrfs_free_path(path);
		5381	return ret;
		5382	}
		5383
5309	static int maybe_send_hole(struct send_ctx sctx, struct btrfs_path path,	5384	static int maybe_send_hole(struct send_ctx sctx, struct btrfs_path path,
5310	struct btrfs_key *key)	5385	struct btrfs_key *key)
5311	{	5386	{
@@ -5350,8 +5425,17 @@ static int maybe_send_hole(struct send_ctx sctx, struct btrfs_path path,
5350	return ret;	5425	return ret;
5351	}	5426	}
5352		5427
5353	if (sctx->cur_inode_last_extent < key->offset)	5428	if (sctx->cur_inode_last_extent < key->offset) {
5354	ret = send_hole(sctx, key->offset);	5429	ret = range_is_hole_in_parent(sctx,
		5430	sctx->cur_inode_last_extent,
		5431	key->offset);
		5432	if (ret < 0)
		5433	return ret;
		5434	else if (ret == 0)
		5435	ret = send_hole(sctx, key->offset);
		5436	else
		5437	ret = 0;
		5438	}
5355	sctx->cur_inode_last_extent = extent_end;	5439	sctx->cur_inode_last_extent = extent_end;
5356	return ret;	5440	return ret;
5357	}	5441	}