aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2018-07-24 06:54:04 -0400
committerDavid Sterba <dsterba@suse.com>2018-08-06 07:12:59 -0400
commit46b2f4590aab71d31088a265c86026b1e96c9de4 (patch)
tree2fb710b223424ae1a78e676e3d1142ae11a18c95
parent0d836392cadd5535f4184d46d901a82eb276ed62 (diff)
Btrfs: fix send failure when root has deleted files still open
The more common use case of send involves creating a RO snapshot and then use it for a send operation. In this case it's not possible to have inodes in the snapshot that have a link count of zero (inode with an orphan item) since during snapshot creation we do the orphan cleanup. However, other less common use cases for send can end up seeing inodes with a link count of zero and in this case the send operation fails with a ENOENT error because any attempt to generate a path for the inode, with the purpose of creating it or updating it at the receiver, fails since there are no inode reference items. One use case it to use a regular subvolume for a send operation after turning it to RO mode or turning a RW snapshot into RO mode and then using it for a send operation. In both cases, if a file gets all its hard links deleted while there is an open file descriptor before turning the subvolume/snapshot into RO mode, the send operation will encounter an inode with a link count of zero and then fail with errno ENOENT. Example using a full send with a subvolume: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ btrfs subvolume create /mnt/sv1 $ touch /mnt/sv1/foo $ touch /mnt/sv1/bar # keep an open file descriptor on file bar $ exec 73</mnt/sv1/bar $ unlink /mnt/sv1/bar # Turn the subvolume to RO mode and use it for a full send, while # holding the open file descriptor. $ btrfs property set /mnt/sv1 ro true $ btrfs send -f /tmp/full.send /mnt/sv1 At subvol /mnt/sv1 ERROR: send ioctl failed with -2: No such file or directory Example using an incremental send with snapshots: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ btrfs subvolume create /mnt/sv1 $ touch /mnt/sv1/foo $ touch /mnt/sv1/bar $ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap1 $ echo "hello world" >> /mnt/sv1/bar $ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap2 # Turn the second snapshot to RW mode and delete file foo while # holding an open file descriptor on it. $ btrfs property set /mnt/snap2 ro false $ exec 73</mnt/snap2/foo $ unlink /mnt/snap2/foo # Set the second snapshot back to RO mode and do an incremental send. $ btrfs property set /mnt/snap2 ro true $ btrfs send -f /tmp/inc.send -p /mnt/snap1 /mnt/snap2 At subvol /mnt/snap2 ERROR: send ioctl failed with -2: No such file or directory So fix this by ignoring inodes with a link count of zero if we are either doing a full send or if they do not exist in the parent snapshot (they are new in the send snapshot), and unlink all paths found in the parent snapshot when doing an incremental send (and ignoring all other inode items, such as xattrs and extents). A test case for fstests follows soon. CC: stable@vger.kernel.org # 4.4+ Reported-by: Martin Wilck <martin.wilck@suse.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/send.c137
1 files changed, 129 insertions, 8 deletions
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 42e04cd3cd95..551294a6c9e2 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -100,6 +100,7 @@ struct send_ctx {
100 u64 cur_inode_rdev; 100 u64 cur_inode_rdev;
101 u64 cur_inode_last_extent; 101 u64 cur_inode_last_extent;
102 u64 cur_inode_next_write_offset; 102 u64 cur_inode_next_write_offset;
103 bool ignore_cur_inode;
103 104
104 u64 send_progress; 105 u64 send_progress;
105 106
@@ -5796,6 +5797,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
5796 int pending_move = 0; 5797 int pending_move = 0;
5797 int refs_processed = 0; 5798 int refs_processed = 0;
5798 5799
5800 if (sctx->ignore_cur_inode)
5801 return 0;
5802
5799 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, 5803 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
5800 &refs_processed); 5804 &refs_processed);
5801 if (ret < 0) 5805 if (ret < 0)
@@ -5914,6 +5918,93 @@ out:
5914 return ret; 5918 return ret;
5915} 5919}
5916 5920
5921struct parent_paths_ctx {
5922 struct list_head *refs;
5923 struct send_ctx *sctx;
5924};
5925
5926static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name,
5927 void *ctx)
5928{
5929 struct parent_paths_ctx *ppctx = ctx;
5930
5931 return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx,
5932 ppctx->refs);
5933}
5934
5935/*
5936 * Issue unlink operations for all paths of the current inode found in the
5937 * parent snapshot.
5938 */
5939static int btrfs_unlink_all_paths(struct send_ctx *sctx)
5940{
5941 LIST_HEAD(deleted_refs);
5942 struct btrfs_path *path;
5943 struct btrfs_key key;
5944 struct parent_paths_ctx ctx;
5945 int ret;
5946
5947 path = alloc_path_for_send();
5948 if (!path)
5949 return -ENOMEM;
5950
5951 key.objectid = sctx->cur_ino;
5952 key.type = BTRFS_INODE_REF_KEY;
5953 key.offset = 0;
5954 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
5955 if (ret < 0)
5956 goto out;
5957
5958 ctx.refs = &deleted_refs;
5959 ctx.sctx = sctx;
5960
5961 while (true) {
5962 struct extent_buffer *eb = path->nodes[0];
5963 int slot = path->slots[0];
5964
5965 if (slot >= btrfs_header_nritems(eb)) {
5966 ret = btrfs_next_leaf(sctx->parent_root, path);
5967 if (ret < 0)
5968 goto out;
5969 else if (ret > 0)
5970 break;
5971 continue;
5972 }
5973
5974 btrfs_item_key_to_cpu(eb, &key, slot);
5975 if (key.objectid != sctx->cur_ino)
5976 break;
5977 if (key.type != BTRFS_INODE_REF_KEY &&
5978 key.type != BTRFS_INODE_EXTREF_KEY)
5979 break;
5980
5981 ret = iterate_inode_ref(sctx->parent_root, path, &key, 1,
5982 record_parent_ref, &ctx);
5983 if (ret < 0)
5984 goto out;
5985
5986 path->slots[0]++;
5987 }
5988
5989 while (!list_empty(&deleted_refs)) {
5990 struct recorded_ref *ref;
5991
5992 ref = list_first_entry(&deleted_refs, struct recorded_ref, list);
5993 ret = send_unlink(sctx, ref->full_path);
5994 if (ret < 0)
5995 goto out;
5996 fs_path_free(ref->full_path);
5997 list_del(&ref->list);
5998 kfree(ref);
5999 }
6000 ret = 0;
6001out:
6002 btrfs_free_path(path);
6003 if (ret)
6004 __free_recorded_refs(&deleted_refs);
6005 return ret;
6006}
6007
5917static int changed_inode(struct send_ctx *sctx, 6008static int changed_inode(struct send_ctx *sctx,
5918 enum btrfs_compare_tree_result result) 6009 enum btrfs_compare_tree_result result)
5919{ 6010{
@@ -5928,6 +6019,7 @@ static int changed_inode(struct send_ctx *sctx,
5928 sctx->cur_inode_new_gen = 0; 6019 sctx->cur_inode_new_gen = 0;
5929 sctx->cur_inode_last_extent = (u64)-1; 6020 sctx->cur_inode_last_extent = (u64)-1;
5930 sctx->cur_inode_next_write_offset = 0; 6021 sctx->cur_inode_next_write_offset = 0;
6022 sctx->ignore_cur_inode = false;
5931 6023
5932 /* 6024 /*
5933 * Set send_progress to current inode. This will tell all get_cur_xxx 6025 * Set send_progress to current inode. This will tell all get_cur_xxx
@@ -5968,6 +6060,33 @@ static int changed_inode(struct send_ctx *sctx,
5968 sctx->cur_inode_new_gen = 1; 6060 sctx->cur_inode_new_gen = 1;
5969 } 6061 }
5970 6062
6063 /*
6064 * Normally we do not find inodes with a link count of zero (orphans)
6065 * because the most common case is to create a snapshot and use it
6066 * for a send operation. However other less common use cases involve
6067 * using a subvolume and send it after turning it to RO mode just
6068 * after deleting all hard links of a file while holding an open
6069 * file descriptor against it or turning a RO snapshot into RW mode,
6070 * keep an open file descriptor against a file, delete it and then
6071 * turn the snapshot back to RO mode before using it for a send
6072 * operation. So if we find such cases, ignore the inode and all its
6073 * items completely if it's a new inode, or if it's a changed inode
6074 * make sure all its previous paths (from the parent snapshot) are all
6075 * unlinked and all other the inode items are ignored.
6076 */
6077 if (result == BTRFS_COMPARE_TREE_NEW ||
6078 result == BTRFS_COMPARE_TREE_CHANGED) {
6079 u32 nlinks;
6080
6081 nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
6082 if (nlinks == 0) {
6083 sctx->ignore_cur_inode = true;
6084 if (result == BTRFS_COMPARE_TREE_CHANGED)
6085 ret = btrfs_unlink_all_paths(sctx);
6086 goto out;
6087 }
6088 }
6089
5971 if (result == BTRFS_COMPARE_TREE_NEW) { 6090 if (result == BTRFS_COMPARE_TREE_NEW) {
5972 sctx->cur_inode_gen = left_gen; 6091 sctx->cur_inode_gen = left_gen;
5973 sctx->cur_inode_new = 1; 6092 sctx->cur_inode_new = 1;
@@ -6306,15 +6425,17 @@ static int changed_cb(struct btrfs_path *left_path,
6306 key->objectid == BTRFS_FREE_SPACE_OBJECTID) 6425 key->objectid == BTRFS_FREE_SPACE_OBJECTID)
6307 goto out; 6426 goto out;
6308 6427
6309 if (key->type == BTRFS_INODE_ITEM_KEY) 6428 if (key->type == BTRFS_INODE_ITEM_KEY) {
6310 ret = changed_inode(sctx, result); 6429 ret = changed_inode(sctx, result);
6311 else if (key->type == BTRFS_INODE_REF_KEY || 6430 } else if (!sctx->ignore_cur_inode) {
6312 key->type == BTRFS_INODE_EXTREF_KEY) 6431 if (key->type == BTRFS_INODE_REF_KEY ||
6313 ret = changed_ref(sctx, result); 6432 key->type == BTRFS_INODE_EXTREF_KEY)
6314 else if (key->type == BTRFS_XATTR_ITEM_KEY) 6433 ret = changed_ref(sctx, result);
6315 ret = changed_xattr(sctx, result); 6434 else if (key->type == BTRFS_XATTR_ITEM_KEY)
6316 else if (key->type == BTRFS_EXTENT_DATA_KEY) 6435 ret = changed_xattr(sctx, result);
6317 ret = changed_extent(sctx, result); 6436 else if (key->type == BTRFS_EXTENT_DATA_KEY)
6437 ret = changed_extent(sctx, result);
6438 }
6318 6439
6319out: 6440out:
6320 return ret; 6441 return ret;