summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobbie Ko <robbieko@synology.com>2015-06-23 06:39:46 -0400
committerFilipe Manana <fdmanana@suse.com>2016-08-01 02:23:10 -0400
commit801bec365e0e19f2ba066cd3e25a67dee21b4aae (patch)
tree4100fee71bf0d989a4bfa3094ddd89a24b3dda6a
parent0596a9048bf2aca2a74b312493f39e4d5ac3b653 (diff)
Btrfs: send, fix failure to move directories with the same name around
When doing an incremental send we can end up not moving directories that have the same name. This happens when the same parent directory has different child directories with the same name in the parent and send snapshots. For example, consider the following scenario: Parent snapshot: . (ino 256) |---- d/ (ino 257) | |--- p1/ (ino 258) | |---- p1/ (ino 259) Send snapshot: . (ino 256) |--- d/ (ino 257) |--- p1/ (ino 259) |--- p1/ (ino 258) The directory named "d" (inode 257) has in both snapshots an entry with the name "p1" but it refers to different inodes in both snapshots (inode 258 in the parent snapshot and inode 259 in the send snapshot). When attempting to move inode 258, the operation is delayed because its new parent, inode 259, was not yet moved/renamed (as the stream is currently processing inode 258). Then when processing inode 259, we also end up delaying its move/rename operation so that it happens after inode 258 is moved/renamed. This decision to delay the move/rename rename operation of inode 259 is due to the fact that the new parent inode (257) still has inode 258 as its child, which has the same name has inode 259. So we end up with inode 258 move/rename operation waiting for inode's 259 move/rename operation, which in turn it waiting for inode's 258 move/rename. This results in ending the send stream without issuing move/rename operations for inodes 258 and 259 and generating the following warnings in syslog/dmesg: [148402.979747] ------------[ cut here ]------------ [148402.980588] WARNING: CPU: 14 PID: 4117 at fs/btrfs/send.c:6177 btrfs_ioctl_send+0xe03/0xe51 [btrfs] [148402.981928] Modules linked in: btrfs crc32c_generic xor raid6_pq acpi_cpufreq tpm_tis ppdev tpm parport_pc psmouse parport sg pcspkr i2c_piix4 i2c_core evdev processor serio_raw button loop autofs4 ext4 crc16 jbd2 mbcache sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring virtio e1000 scsi_mod floppy [last unloaded: btrfs] [148402.986999] CPU: 14 PID: 4117 Comm: btrfs Tainted: G W 4.6.0-rc7-btrfs-next-31+ #1 [148402.988136] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014 [148402.988136] 0000000000000000 ffff88022139fca8 ffffffff8126b42c 0000000000000000 [148402.988136] 0000000000000000 ffff88022139fce8 ffffffff81052b14 000018212139fac8 [148402.988136] ffff88022b0db400 0000000000000000 0000000000000001 0000000000000000 [148402.988136] Call Trace: [148402.988136] [<ffffffff8126b42c>] dump_stack+0x67/0x90 [148402.988136] [<ffffffff81052b14>] __warn+0xc2/0xdd [148402.988136] [<ffffffff81052beb>] warn_slowpath_null+0x1d/0x1f [148402.988136] [<ffffffffa04bc831>] btrfs_ioctl_send+0xe03/0xe51 [btrfs] [148402.988136] [<ffffffffa048b358>] btrfs_ioctl+0x14f/0x1f81 [btrfs] [148402.988136] [<ffffffff8108e456>] ? arch_local_irq_save+0x9/0xc [148402.988136] [<ffffffff8108eb51>] ? __lock_is_held+0x3c/0x57 [148402.988136] [<ffffffff8118da05>] vfs_ioctl+0x18/0x34 [148402.988136] [<ffffffff8118e00c>] do_vfs_ioctl+0x550/0x5be [148402.988136] [<ffffffff81196f0c>] ? __fget+0x6b/0x77 [148402.988136] [<ffffffff81196fa1>] ? __fget_light+0x62/0x71 [148402.988136] [<ffffffff8118e0d1>] SyS_ioctl+0x57/0x79 [148402.988136] [<ffffffff8149e025>] entry_SYSCALL_64_fastpath+0x18/0xa8 [148402.988136] [<ffffffff8108e89d>] ? trace_hardirqs_off_caller+0x3f/0xaa [148403.011373] ---[ end trace a4539270c8056f8b ]--- [148403.012296] ------------[ cut here ]------------ [148403.013071] WARNING: CPU: 14 PID: 4117 at fs/btrfs/send.c:6194 btrfs_ioctl_send+0xe19/0xe51 [btrfs] [148403.014447] Modules linked in: btrfs crc32c_generic xor raid6_pq acpi_cpufreq tpm_tis ppdev tpm parport_pc psmouse parport sg pcspkr i2c_piix4 i2c_core evdev processor serio_raw button loop autofs4 ext4 crc16 jbd2 mbcache sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring virtio e1000 scsi_mod floppy [last unloaded: btrfs] [148403.019708] CPU: 14 PID: 4117 Comm: btrfs Tainted: G W 4.6.0-rc7-btrfs-next-31+ #1 [148403.020104] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014 [148403.020104] 0000000000000000 ffff88022139fca8 ffffffff8126b42c 0000000000000000 [148403.020104] 0000000000000000 ffff88022139fce8 ffffffff81052b14 000018322139fac8 [148403.020104] ffff88022b0db400 0000000000000000 0000000000000001 0000000000000000 [148403.020104] Call Trace: [148403.020104] [<ffffffff8126b42c>] dump_stack+0x67/0x90 [148403.020104] [<ffffffff81052b14>] __warn+0xc2/0xdd [148403.020104] [<ffffffff81052beb>] warn_slowpath_null+0x1d/0x1f [148403.020104] [<ffffffffa04bc847>] btrfs_ioctl_send+0xe19/0xe51 [btrfs] [148403.020104] [<ffffffffa048b358>] btrfs_ioctl+0x14f/0x1f81 [btrfs] [148403.020104] [<ffffffff8108e456>] ? arch_local_irq_save+0x9/0xc [148403.020104] [<ffffffff8108eb51>] ? __lock_is_held+0x3c/0x57 [148403.020104] [<ffffffff8118da05>] vfs_ioctl+0x18/0x34 [148403.020104] [<ffffffff8118e00c>] do_vfs_ioctl+0x550/0x5be [148403.020104] [<ffffffff81196f0c>] ? __fget+0x6b/0x77 [148403.020104] [<ffffffff81196fa1>] ? __fget_light+0x62/0x71 [148403.020104] [<ffffffff8118e0d1>] SyS_ioctl+0x57/0x79 [148403.020104] [<ffffffff8149e025>] entry_SYSCALL_64_fastpath+0x18/0xa8 [148403.020104] [<ffffffff8108e89d>] ? trace_hardirqs_off_caller+0x3f/0xaa [148403.038981] ---[ end trace a4539270c8056f8c ]--- There's another issue caused by similar (but more complex) changes in the directory hierarchy that makes move/rename operations fail, described with the following example: Parent snapshot: . |---- a/ (ino 262) | |---- c/ (ino 268) | |---- d/ (ino 263) |---- ance/ (ino 267) |---- e/ (ino 264) |---- f/ (ino 265) |---- ance/ (ino 266) Send snapshot: . |---- a/ (ino 262) |---- c/ (ino 268) | |---- ance/ (ino 267) | |---- d/ (ino 263) | |---- ance/ (ino 266) | |---- f/ (ino 265) |---- e/ (ino 264) When the inode 265 is processed, the path for inode 267 is computed, which at that time corresponds to "d/ance", and it's stored in the names cache. Later on when processing inode 266, we end up orphanizing (renaming to a name matching the pattern o<ino>-<gen>-<seq>) inode 267 because it has the same name as inode 266 and it's currently a child of the new parent directory (inode 263) for inode 266. After the orphanization and while we are still processing inode 266, a rename operation for inode 266 is generated. However the source path for that rename operation is incorrect because it ends up using the old, pre-orphanization, name of inode 267. The no longer valid name for inode 267 was previously cached when processing inode 265 and it remains usable and considered valid until the inode currently being processed has a number greater than 267. This resulted in the receiving side failing with the following error: ERROR: rename d/ance/ance -> d/ance failed: No such file or directory So fix these issues by detecting such circular dependencies for rename operations and by clearing the cached name of an inode once the inode is orphanized. A test case for fstests will follow soon. Signed-off-by: Robbie Ko <robbieko@synology.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> [Rewrote change log to be more detailed and organized, and improved comments] Signed-off-by: Filipe Manana <fdmanana@suse.com>
-rw-r--r--fs/btrfs/send.c100
1 files changed, 95 insertions, 5 deletions
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index b71dd298385c..993e1bab0a6b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -231,7 +231,6 @@ struct pending_dir_move {
231 u64 parent_ino; 231 u64 parent_ino;
232 u64 ino; 232 u64 ino;
233 u64 gen; 233 u64 gen;
234 bool is_orphan;
235 struct list_head update_refs; 234 struct list_head update_refs;
236}; 235};
237 236
@@ -1861,7 +1860,8 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1861 * was already unlinked/moved, so we can safely assume that we will not 1860 * was already unlinked/moved, so we can safely assume that we will not
1862 * overwrite anything at this point in time. 1861 * overwrite anything at this point in time.
1863 */ 1862 */
1864 if (other_inode > sctx->send_progress) { 1863 if (other_inode > sctx->send_progress ||
1864 is_waiting_for_move(sctx, other_inode)) {
1865 ret = get_inode_info(sctx->parent_root, other_inode, NULL, 1865 ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1866 who_gen, NULL, NULL, NULL, NULL); 1866 who_gen, NULL, NULL, NULL, NULL);
1867 if (ret < 0) 1867 if (ret < 0)
@@ -3047,7 +3047,6 @@ static int add_pending_dir_move(struct send_ctx *sctx,
3047 pm->parent_ino = parent_ino; 3047 pm->parent_ino = parent_ino;
3048 pm->ino = ino; 3048 pm->ino = ino;
3049 pm->gen = ino_gen; 3049 pm->gen = ino_gen;
3050 pm->is_orphan = is_orphan;
3051 INIT_LIST_HEAD(&pm->list); 3050 INIT_LIST_HEAD(&pm->list);
3052 INIT_LIST_HEAD(&pm->update_refs); 3051 INIT_LIST_HEAD(&pm->update_refs);
3053 RB_CLEAR_NODE(&pm->node); 3052 RB_CLEAR_NODE(&pm->node);
@@ -3113,6 +3112,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
3113 return NULL; 3112 return NULL;
3114} 3113}
3115 3114
3115static int path_loop(struct send_ctx *sctx, struct fs_path *name,
3116 u64 ino, u64 gen, u64 *ancestor_ino)
3117{
3118 int ret = 0;
3119 u64 parent_inode = 0;
3120 u64 parent_gen = 0;
3121 u64 start_ino = ino;
3122
3123 *ancestor_ino = 0;
3124 while (ino != BTRFS_FIRST_FREE_OBJECTID) {
3125 fs_path_reset(name);
3126
3127 if (is_waiting_for_rm(sctx, ino))
3128 break;
3129 if (is_waiting_for_move(sctx, ino)) {
3130 if (*ancestor_ino == 0)
3131 *ancestor_ino = ino;
3132 ret = get_first_ref(sctx->parent_root, ino,
3133 &parent_inode, &parent_gen, name);
3134 } else {
3135 ret = __get_cur_name_and_parent(sctx, ino, gen,
3136 &parent_inode,
3137 &parent_gen, name);
3138 if (ret > 0) {
3139 ret = 0;
3140 break;
3141 }
3142 }
3143 if (ret < 0)
3144 break;
3145 if (parent_inode == start_ino) {
3146 ret = 1;
3147 if (*ancestor_ino == 0)
3148 *ancestor_ino = ino;
3149 break;
3150 }
3151 ino = parent_inode;
3152 gen = parent_gen;
3153 }
3154 return ret;
3155}
3156
3116static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) 3157static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3117{ 3158{
3118 struct fs_path *from_path = NULL; 3159 struct fs_path *from_path = NULL;
@@ -3123,6 +3164,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3123 u64 parent_ino, parent_gen; 3164 u64 parent_ino, parent_gen;
3124 struct waiting_dir_move *dm = NULL; 3165 struct waiting_dir_move *dm = NULL;
3125 u64 rmdir_ino = 0; 3166 u64 rmdir_ino = 0;
3167 u64 ancestor;
3168 bool is_orphan;
3126 int ret; 3169 int ret;
3127 3170
3128 name = fs_path_alloc(); 3171 name = fs_path_alloc();
@@ -3135,9 +3178,10 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3135 dm = get_waiting_dir_move(sctx, pm->ino); 3178 dm = get_waiting_dir_move(sctx, pm->ino);
3136 ASSERT(dm); 3179 ASSERT(dm);
3137 rmdir_ino = dm->rmdir_ino; 3180 rmdir_ino = dm->rmdir_ino;
3181 is_orphan = dm->orphanized;
3138 free_waiting_dir_move(sctx, dm); 3182 free_waiting_dir_move(sctx, dm);
3139 3183
3140 if (pm->is_orphan) { 3184 if (is_orphan) {
3141 ret = gen_unique_name(sctx, pm->ino, 3185 ret = gen_unique_name(sctx, pm->ino,
3142 pm->gen, from_path); 3186 pm->gen, from_path);
3143 } else { 3187 } else {
@@ -3155,6 +3199,22 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3155 goto out; 3199 goto out;
3156 3200
3157 sctx->send_progress = sctx->cur_ino + 1; 3201 sctx->send_progress = sctx->cur_ino + 1;
3202 ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
3203 if (ret) {
3204 LIST_HEAD(deleted_refs);
3205 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
3206 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
3207 &pm->update_refs, &deleted_refs,
3208 is_orphan);
3209 if (ret < 0)
3210 goto out;
3211 if (rmdir_ino) {
3212 dm = get_waiting_dir_move(sctx, pm->ino);
3213 ASSERT(dm);
3214 dm->rmdir_ino = rmdir_ino;
3215 }
3216 goto out;
3217 }
3158 fs_path_reset(name); 3218 fs_path_reset(name);
3159 to_path = name; 3219 to_path = name;
3160 name = NULL; 3220 name = NULL;
@@ -3325,6 +3385,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
3325 u64 left_gen; 3385 u64 left_gen;
3326 u64 right_gen; 3386 u64 right_gen;
3327 int ret = 0; 3387 int ret = 0;
3388 struct waiting_dir_move *wdm;
3328 3389
3329 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) 3390 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
3330 return 0; 3391 return 0;
@@ -3383,7 +3444,8 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
3383 goto out; 3444 goto out;
3384 } 3445 }
3385 3446
3386 if (is_waiting_for_move(sctx, di_key.objectid)) { 3447 wdm = get_waiting_dir_move(sctx, di_key.objectid);
3448 if (wdm && !wdm->orphanized) {
3387 ret = add_pending_dir_move(sctx, 3449 ret = add_pending_dir_move(sctx,
3388 sctx->cur_ino, 3450 sctx->cur_ino,
3389 sctx->cur_inode_gen, 3451 sctx->cur_inode_gen,
@@ -3643,11 +3705,26 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3643 goto out; 3705 goto out;
3644 if (ret) { 3706 if (ret) {
3645 struct name_cache_entry *nce; 3707 struct name_cache_entry *nce;
3708 struct waiting_dir_move *wdm;
3646 3709
3647 ret = orphanize_inode(sctx, ow_inode, ow_gen, 3710 ret = orphanize_inode(sctx, ow_inode, ow_gen,
3648 cur->full_path); 3711 cur->full_path);
3649 if (ret < 0) 3712 if (ret < 0)
3650 goto out; 3713 goto out;
3714
3715 /*
3716 * If ow_inode has its rename operation delayed
3717 * make sure that its orphanized name is used in
3718 * the source path when performing its rename
3719 * operation.
3720 */
3721 if (is_waiting_for_move(sctx, ow_inode)) {
3722 wdm = get_waiting_dir_move(sctx,
3723 ow_inode);
3724 ASSERT(wdm);
3725 wdm->orphanized = true;
3726 }
3727
3651 /* 3728 /*
3652 * Make sure we clear our orphanized inode's 3729 * Make sure we clear our orphanized inode's
3653 * name from the name cache. This is because the 3730 * name from the name cache. This is because the
@@ -3663,6 +3740,19 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3663 name_cache_delete(sctx, nce); 3740 name_cache_delete(sctx, nce);
3664 kfree(nce); 3741 kfree(nce);
3665 } 3742 }
3743
3744 /*
3745 * ow_inode might currently be an ancestor of
3746 * cur_ino, therefore compute valid_path (the
3747 * current path of cur_ino) again because it
3748 * might contain the pre-orphanization name of
3749 * ow_inode, which is no longer valid.
3750 */
3751 fs_path_reset(valid_path);
3752 ret = get_cur_path(sctx, sctx->cur_ino,
3753 sctx->cur_inode_gen, valid_path);
3754 if (ret < 0)
3755 goto out;
3666 } else { 3756 } else {
3667 ret = send_unlink(sctx, cur->full_path); 3757 ret = send_unlink(sctx, cur->full_path);
3668 if (ret < 0) 3758 if (ret < 0)