aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorFilipe David Borba Manana <fdmanana@gmail.com>2014-01-22 05:00:53 -0500
committerChris Mason <clm@fb.com>2014-01-29 10:06:22 -0500
commit9f03740a956d7ac6a1b8f8c455da6fa5cae11c22 (patch)
treee5d56d50c7e21994df479d9d2c557834233877c4 /fs
parent2365dd3ca02bbb6d3412404482e1d85752549953 (diff)
Btrfs: fix infinite path build loops in incremental send
The send operation processes inodes by their ascending number, and assumes that any rename/move operation can be successfully performed (sent to the caller) once all previous inodes (those with a smaller inode number than the one we're currently processing) were processed. This is not true when an incremental send had to process an hierarchical change between 2 snapshots where the parent-children relationship between directory inodes was reversed - that is, parents became children and children became parents. This situation made the path building code go into an infinite loop, which kept allocating more and more memory that eventually lead to a krealloc warning being displayed in dmesg: WARNING: CPU: 1 PID: 5705 at mm/page_alloc.c:2477 __alloc_pages_nodemask+0x365/0xad0() Modules linked in: btrfs raid6_pq xor pci_stub vboxpci(O) vboxnetadp(O) vboxnetflt(O) vboxdrv(O) snd_hda_codec_hdmi snd_hda_codec_realtek joydev radeon snd_hda_intel snd_hda_codec snd_hwdep snd_seq_midi snd_pcm psmouse i915 snd_rawmidi serio_raw snd_seq_midi_event lpc_ich snd_seq snd_timer ttm snd_seq_device rfcomm drm_kms_helper parport_pc bnep bluetooth drm ppdev snd soundcore i2c_algo_bit snd_page_alloc binfmt_misc video lp parport r8169 mii hid_generic usbhid hid CPU: 1 PID: 5705 Comm: btrfs Tainted: G O 3.13.0-rc7-fdm-btrfs-next-18+ #3 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./Z77 Pro4, BIOS P1.50 09/04/2012 [ 5381.660441] 00000000000009ad ffff8806f6f2f4e8 ffffffff81777434 0000000000000007 [ 5381.660447] 0000000000000000 ffff8806f6f2f528 ffffffff8104a9ec ffff8807038f36f0 [ 5381.660452] 0000000000000000 0000000000000206 ffff8807038f2490 ffff8807038f36f0 [ 5381.660457] Call Trace: [ 5381.660464] [<ffffffff81777434>] dump_stack+0x4e/0x68 [ 5381.660471] [<ffffffff8104a9ec>] warn_slowpath_common+0x8c/0xc0 [ 5381.660476] [<ffffffff8104aa3a>] warn_slowpath_null+0x1a/0x20 [ 5381.660480] [<ffffffff81144995>] __alloc_pages_nodemask+0x365/0xad0 [ 5381.660487] [<ffffffff8108313f>] ? local_clock+0x4f/0x60 [ 5381.660491] [<ffffffff811430e8>] ? free_one_page+0x98/0x440 [ 5381.660495] [<ffffffff8108313f>] ? local_clock+0x4f/0x60 [ 5381.660502] [<ffffffff8113fae4>] ? __get_free_pages+0x14/0x50 [ 5381.660508] [<ffffffff81095fb8>] ? trace_hardirqs_off_caller+0x28/0xd0 [ 5381.660515] [<ffffffff81183caf>] alloc_pages_current+0x10f/0x1f0 [ 5381.660520] [<ffffffff8113fae4>] ? __get_free_pages+0x14/0x50 [ 5381.660524] [<ffffffff8113fae4>] __get_free_pages+0x14/0x50 [ 5381.660530] [<ffffffff8115dace>] kmalloc_order_trace+0x3e/0x100 [ 5381.660536] [<ffffffff81191ea0>] __kmalloc_track_caller+0x220/0x230 [ 5381.660560] [<ffffffffa0729fdb>] ? fs_path_ensure_buf.part.12+0x6b/0x200 [btrfs] [ 5381.660564] [<ffffffff8178085c>] ? retint_restore_args+0xe/0xe [ 5381.660569] [<ffffffff811580ef>] krealloc+0x6f/0xb0 [ 5381.660586] [<ffffffffa0729fdb>] fs_path_ensure_buf.part.12+0x6b/0x200 [btrfs] [ 5381.660601] [<ffffffffa072a208>] fs_path_prepare_for_add+0x98/0xb0 [btrfs] [ 5381.660615] [<ffffffffa072a2bc>] fs_path_add_path+0x2c/0x60 [btrfs] [ 5381.660628] [<ffffffffa072c55c>] get_cur_path+0x7c/0x1c0 [btrfs] Even without this loop, the incremental send couldn't succeed, because it would attempt to send a rename/move operation for the lower inode before the highest inode number was renamed/move. This issue is easy to trigger with the following steps: $ mkfs.btrfs -f /dev/sdb3 $ mount /dev/sdb3 /mnt/btrfs $ mkdir -p /mnt/btrfs/a/b/c/d $ mkdir /mnt/btrfs/a/b/c2 $ btrfs subvol snapshot -r /mnt/btrfs /mnt/btrfs/snap1 $ mv /mnt/btrfs/a/b/c/d /mnt/btrfs/a/b/c2/d2 $ mv /mnt/btrfs/a/b/c /mnt/btrfs/a/b/c2/d2/cc $ btrfs subvol snapshot -r /mnt/btrfs /mnt/btrfs/snap2 $ btrfs send -p /mnt/btrfs/snap1 /mnt/btrfs/snap2 > /tmp/incremental.send The structure of the filesystem when the first snapshot is taken is: . (ino 256) |-- a (ino 257) |-- b (ino 258) |-- c (ino 259) | |-- d (ino 260) | |-- c2 (ino 261) And its structure when the second snapshot is taken is: . (ino 256) |-- a (ino 257) |-- b (ino 258) |-- c2 (ino 261) |-- d2 (ino 260) |-- cc (ino 259) Before the move/rename operation is performed for the inode 259, the move/rename for inode 260 must be performed, since 259 is now a child of 260. A test case for xfstests, with a more complex scenario, will follow soon. Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com> Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/send.c539
1 files changed, 518 insertions, 21 deletions
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fc1f0abb8fe4..c96e879bcb16 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -121,6 +121,74 @@ struct send_ctx {
121 int name_cache_size; 121 int name_cache_size;
122 122
123 char *read_buf; 123 char *read_buf;
124
125 /*
126 * We process inodes by their increasing order, so if before an
127 * incremental send we reverse the parent/child relationship of
128 * directories such that a directory with a lower inode number was
129 * the parent of a directory with a higher inode number, and the one
130 * becoming the new parent got renamed too, we can't rename/move the
131 * directory with lower inode number when we finish processing it - we
132 * must process the directory with higher inode number first, then
133 * rename/move it and then rename/move the directory with lower inode
134 * number. Example follows.
135 *
136 * Tree state when the first send was performed:
137 *
138 * .
139 * |-- a (ino 257)
140 * |-- b (ino 258)
141 * |
142 * |
143 * |-- c (ino 259)
144 * | |-- d (ino 260)
145 * |
146 * |-- c2 (ino 261)
147 *
148 * Tree state when the second (incremental) send is performed:
149 *
150 * .
151 * |-- a (ino 257)
152 * |-- b (ino 258)
153 * |-- c2 (ino 261)
154 * |-- d2 (ino 260)
155 * |-- cc (ino 259)
156 *
157 * The sequence of steps that lead to the second state was:
158 *
159 * mv /a/b/c/d /a/b/c2/d2
160 * mv /a/b/c /a/b/c2/d2/cc
161 *
162 * "c" has lower inode number, but we can't move it (2nd mv operation)
163 * before we move "d", which has higher inode number.
164 *
165 * So we just memorize which move/rename operations must be performed
166 * later when their respective parent is processed and moved/renamed.
167 */
168
169 /* Indexed by parent directory inode number. */
170 struct rb_root pending_dir_moves;
171
172 /*
173 * Reverse index, indexed by the inode number of a directory that
174 * is waiting for the move/rename of its immediate parent before its
175 * own move/rename can be performed.
176 */
177 struct rb_root waiting_dir_moves;
178};
179
180struct pending_dir_move {
181 struct rb_node node;
182 struct list_head list;
183 u64 parent_ino;
184 u64 ino;
185 u64 gen;
186 struct list_head update_refs;
187};
188
189struct waiting_dir_move {
190 struct rb_node node;
191 u64 ino;
124}; 192};
125 193
126struct name_cache_entry { 194struct name_cache_entry {
@@ -144,6 +212,8 @@ struct name_cache_entry {
144 char name[]; 212 char name[];
145}; 213};
146 214
215static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
216
147static int need_send_hole(struct send_ctx *sctx) 217static int need_send_hole(struct send_ctx *sctx)
148{ 218{
149 return (sctx->parent_root && !sctx->cur_inode_new && 219 return (sctx->parent_root && !sctx->cur_inode_new &&
@@ -1897,6 +1967,7 @@ static void name_cache_free(struct send_ctx *sctx)
1897 */ 1967 */
1898static int __get_cur_name_and_parent(struct send_ctx *sctx, 1968static int __get_cur_name_and_parent(struct send_ctx *sctx,
1899 u64 ino, u64 gen, 1969 u64 ino, u64 gen,
1970 int skip_name_cache,
1900 u64 *parent_ino, 1971 u64 *parent_ino,
1901 u64 *parent_gen, 1972 u64 *parent_gen,
1902 struct fs_path *dest) 1973 struct fs_path *dest)
@@ -1906,6 +1977,8 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
1906 struct btrfs_path *path = NULL; 1977 struct btrfs_path *path = NULL;
1907 struct name_cache_entry *nce = NULL; 1978 struct name_cache_entry *nce = NULL;
1908 1979
1980 if (skip_name_cache)
1981 goto get_ref;
1909 /* 1982 /*
1910 * First check if we already did a call to this function with the same 1983 * First check if we already did a call to this function with the same
1911 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes 1984 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
@@ -1950,11 +2023,12 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
1950 goto out_cache; 2023 goto out_cache;
1951 } 2024 }
1952 2025
2026get_ref:
1953 /* 2027 /*
1954 * Depending on whether the inode was already processed or not, use 2028 * Depending on whether the inode was already processed or not, use
1955 * send_root or parent_root for ref lookup. 2029 * send_root or parent_root for ref lookup.
1956 */ 2030 */
1957 if (ino < sctx->send_progress) 2031 if (ino < sctx->send_progress && !skip_name_cache)
1958 ret = get_first_ref(sctx->send_root, ino, 2032 ret = get_first_ref(sctx->send_root, ino,
1959 parent_ino, parent_gen, dest); 2033 parent_ino, parent_gen, dest);
1960 else 2034 else
@@ -1978,6 +2052,8 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
1978 goto out; 2052 goto out;
1979 ret = 1; 2053 ret = 1;
1980 } 2054 }
2055 if (skip_name_cache)
2056 goto out;
1981 2057
1982out_cache: 2058out_cache:
1983 /* 2059 /*
@@ -2045,6 +2121,9 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2045 u64 parent_inode = 0; 2121 u64 parent_inode = 0;
2046 u64 parent_gen = 0; 2122 u64 parent_gen = 0;
2047 int stop = 0; 2123 int stop = 0;
2124 u64 start_ino = ino;
2125 u64 start_gen = gen;
2126 int skip_name_cache = 0;
2048 2127
2049 name = fs_path_alloc(); 2128 name = fs_path_alloc();
2050 if (!name) { 2129 if (!name) {
@@ -2052,19 +2131,32 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2052 goto out; 2131 goto out;
2053 } 2132 }
2054 2133
2134 if (is_waiting_for_move(sctx, ino))
2135 skip_name_cache = 1;
2136
2137again:
2055 dest->reversed = 1; 2138 dest->reversed = 1;
2056 fs_path_reset(dest); 2139 fs_path_reset(dest);
2057 2140
2058 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { 2141 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2059 fs_path_reset(name); 2142 fs_path_reset(name);
2060 2143
2061 ret = __get_cur_name_and_parent(sctx, ino, gen, 2144 ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache,
2062 &parent_inode, &parent_gen, name); 2145 &parent_inode, &parent_gen, name);
2063 if (ret < 0) 2146 if (ret < 0)
2064 goto out; 2147 goto out;
2065 if (ret) 2148 if (ret)
2066 stop = 1; 2149 stop = 1;
2067 2150
2151 if (!skip_name_cache &&
2152 is_waiting_for_move(sctx, parent_inode)) {
2153 ino = start_ino;
2154 gen = start_gen;
2155 stop = 0;
2156 skip_name_cache = 1;
2157 goto again;
2158 }
2159
2068 ret = fs_path_add_path(dest, name); 2160 ret = fs_path_add_path(dest, name);
2069 if (ret < 0) 2161 if (ret < 0)
2070 goto out; 2162 goto out;
@@ -2636,10 +2728,349 @@ out:
2636 return ret; 2728 return ret;
2637} 2729}
2638 2730
2731static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
2732{
2733 struct rb_node *n = sctx->waiting_dir_moves.rb_node;
2734 struct waiting_dir_move *entry;
2735
2736 while (n) {
2737 entry = rb_entry(n, struct waiting_dir_move, node);
2738 if (ino < entry->ino)
2739 n = n->rb_left;
2740 else if (ino > entry->ino)
2741 n = n->rb_right;
2742 else
2743 return 1;
2744 }
2745 return 0;
2746}
2747
2748static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2749{
2750 struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
2751 struct rb_node *parent = NULL;
2752 struct waiting_dir_move *entry, *dm;
2753
2754 dm = kmalloc(sizeof(*dm), GFP_NOFS);
2755 if (!dm)
2756 return -ENOMEM;
2757 dm->ino = ino;
2758
2759 while (*p) {
2760 parent = *p;
2761 entry = rb_entry(parent, struct waiting_dir_move, node);
2762 if (ino < entry->ino) {
2763 p = &(*p)->rb_left;
2764 } else if (ino > entry->ino) {
2765 p = &(*p)->rb_right;
2766 } else {
2767 kfree(dm);
2768 return -EEXIST;
2769 }
2770 }
2771
2772 rb_link_node(&dm->node, parent, p);
2773 rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
2774 return 0;
2775}
2776
2777#ifdef CONFIG_BTRFS_ASSERT
2778
2779static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2780{
2781 struct rb_node *n = sctx->waiting_dir_moves.rb_node;
2782 struct waiting_dir_move *entry;
2783
2784 while (n) {
2785 entry = rb_entry(n, struct waiting_dir_move, node);
2786 if (ino < entry->ino) {
2787 n = n->rb_left;
2788 } else if (ino > entry->ino) {
2789 n = n->rb_right;
2790 } else {
2791 rb_erase(&entry->node, &sctx->waiting_dir_moves);
2792 kfree(entry);
2793 return 0;
2794 }
2795 }
2796 return -ENOENT;
2797}
2798
2799#endif
2800
2801static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino)
2802{
2803 struct rb_node **p = &sctx->pending_dir_moves.rb_node;
2804 struct rb_node *parent = NULL;
2805 struct pending_dir_move *entry, *pm;
2806 struct recorded_ref *cur;
2807 int exists = 0;
2808 int ret;
2809
2810 pm = kmalloc(sizeof(*pm), GFP_NOFS);
2811 if (!pm)
2812 return -ENOMEM;
2813 pm->parent_ino = parent_ino;
2814 pm->ino = sctx->cur_ino;
2815 pm->gen = sctx->cur_inode_gen;
2816 INIT_LIST_HEAD(&pm->list);
2817 INIT_LIST_HEAD(&pm->update_refs);
2818 RB_CLEAR_NODE(&pm->node);
2819
2820 while (*p) {
2821 parent = *p;
2822 entry = rb_entry(parent, struct pending_dir_move, node);
2823 if (parent_ino < entry->parent_ino) {
2824 p = &(*p)->rb_left;
2825 } else if (parent_ino > entry->parent_ino) {
2826 p = &(*p)->rb_right;
2827 } else {
2828 exists = 1;
2829 break;
2830 }
2831 }
2832
2833 list_for_each_entry(cur, &sctx->deleted_refs, list) {
2834 ret = dup_ref(cur, &pm->update_refs);
2835 if (ret < 0)
2836 goto out;
2837 }
2838 list_for_each_entry(cur, &sctx->new_refs, list) {
2839 ret = dup_ref(cur, &pm->update_refs);
2840 if (ret < 0)
2841 goto out;
2842 }
2843
2844 ret = add_waiting_dir_move(sctx, pm->ino);
2845 if (ret)
2846 goto out;
2847
2848 if (exists) {
2849 list_add_tail(&pm->list, &entry->list);
2850 } else {
2851 rb_link_node(&pm->node, parent, p);
2852 rb_insert_color(&pm->node, &sctx->pending_dir_moves);
2853 }
2854 ret = 0;
2855out:
2856 if (ret) {
2857 __free_recorded_refs(&pm->update_refs);
2858 kfree(pm);
2859 }
2860 return ret;
2861}
2862
2863static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
2864 u64 parent_ino)
2865{
2866 struct rb_node *n = sctx->pending_dir_moves.rb_node;
2867 struct pending_dir_move *entry;
2868
2869 while (n) {
2870 entry = rb_entry(n, struct pending_dir_move, node);
2871 if (parent_ino < entry->parent_ino)
2872 n = n->rb_left;
2873 else if (parent_ino > entry->parent_ino)
2874 n = n->rb_right;
2875 else
2876 return entry;
2877 }
2878 return NULL;
2879}
2880
2881static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
2882{
2883 struct fs_path *from_path = NULL;
2884 struct fs_path *to_path = NULL;
2885 u64 orig_progress = sctx->send_progress;
2886 struct recorded_ref *cur;
2887 int ret;
2888
2889 from_path = fs_path_alloc();
2890 if (!from_path)
2891 return -ENOMEM;
2892
2893 sctx->send_progress = pm->ino;
2894 ret = get_cur_path(sctx, pm->ino, pm->gen, from_path);
2895 if (ret < 0)
2896 goto out;
2897
2898 to_path = fs_path_alloc();
2899 if (!to_path) {
2900 ret = -ENOMEM;
2901 goto out;
2902 }
2903
2904 sctx->send_progress = sctx->cur_ino + 1;
2905 ASSERT(del_waiting_dir_move(sctx, pm->ino) == 0);
2906 ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
2907 if (ret < 0)
2908 goto out;
2909
2910 ret = send_rename(sctx, from_path, to_path);
2911 if (ret < 0)
2912 goto out;
2913
2914 ret = send_utimes(sctx, pm->ino, pm->gen);
2915 if (ret < 0)
2916 goto out;
2917
2918 /*
2919 * After rename/move, need to update the utimes of both new parent(s)
2920 * and old parent(s).
2921 */
2922 list_for_each_entry(cur, &pm->update_refs, list) {
2923 ret = send_utimes(sctx, cur->dir, cur->dir_gen);
2924 if (ret < 0)
2925 goto out;
2926 }
2927
2928out:
2929 fs_path_free(from_path);
2930 fs_path_free(to_path);
2931 sctx->send_progress = orig_progress;
2932
2933 return ret;
2934}
2935
2936static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
2937{
2938 if (!list_empty(&m->list))
2939 list_del(&m->list);
2940 if (!RB_EMPTY_NODE(&m->node))
2941 rb_erase(&m->node, &sctx->pending_dir_moves);
2942 __free_recorded_refs(&m->update_refs);
2943 kfree(m);
2944}
2945
2946static void tail_append_pending_moves(struct pending_dir_move *moves,
2947 struct list_head *stack)
2948{
2949 if (list_empty(&moves->list)) {
2950 list_add_tail(&moves->list, stack);
2951 } else {
2952 LIST_HEAD(list);
2953 list_splice_init(&moves->list, &list);
2954 list_add_tail(&moves->list, stack);
2955 list_splice_tail(&list, stack);
2956 }
2957}
2958
2959static int apply_children_dir_moves(struct send_ctx *sctx)
2960{
2961 struct pending_dir_move *pm;
2962 struct list_head stack;
2963 u64 parent_ino = sctx->cur_ino;
2964 int ret = 0;
2965
2966 pm = get_pending_dir_moves(sctx, parent_ino);
2967 if (!pm)
2968 return 0;
2969
2970 INIT_LIST_HEAD(&stack);
2971 tail_append_pending_moves(pm, &stack);
2972
2973 while (!list_empty(&stack)) {
2974 pm = list_first_entry(&stack, struct pending_dir_move, list);
2975 parent_ino = pm->ino;
2976 ret = apply_dir_move(sctx, pm);
2977 free_pending_move(sctx, pm);
2978 if (ret)
2979 goto out;
2980 pm = get_pending_dir_moves(sctx, parent_ino);
2981 if (pm)
2982 tail_append_pending_moves(pm, &stack);
2983 }
2984 return 0;
2985
2986out:
2987 while (!list_empty(&stack)) {
2988 pm = list_first_entry(&stack, struct pending_dir_move, list);
2989 free_pending_move(sctx, pm);
2990 }
2991 return ret;
2992}
2993
2994static int wait_for_parent_move(struct send_ctx *sctx,
2995 struct recorded_ref *parent_ref)
2996{
2997 int ret;
2998 u64 ino = parent_ref->dir;
2999 u64 parent_ino_before, parent_ino_after;
3000 u64 new_gen, old_gen;
3001 struct fs_path *path_before = NULL;
3002 struct fs_path *path_after = NULL;
3003 int len1, len2;
3004
3005 if (parent_ref->dir <= sctx->cur_ino)
3006 return 0;
3007
3008 if (is_waiting_for_move(sctx, ino))
3009 return 1;
3010
3011 ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen,
3012 NULL, NULL, NULL, NULL);
3013 if (ret == -ENOENT)
3014 return 0;
3015 else if (ret < 0)
3016 return ret;
3017
3018 ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen,
3019 NULL, NULL, NULL, NULL);
3020 if (ret < 0)
3021 return ret;
3022
3023 if (new_gen != old_gen)
3024 return 0;
3025
3026 path_before = fs_path_alloc();
3027 if (!path_before)
3028 return -ENOMEM;
3029
3030 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
3031 NULL, path_before);
3032 if (ret == -ENOENT) {
3033 ret = 0;
3034 goto out;
3035 } else if (ret < 0) {
3036 goto out;
3037 }
3038
3039 path_after = fs_path_alloc();
3040 if (!path_after) {
3041 ret = -ENOMEM;
3042 goto out;
3043 }
3044
3045 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
3046 NULL, path_after);
3047 if (ret == -ENOENT) {
3048 ret = 0;
3049 goto out;
3050 } else if (ret < 0) {
3051 goto out;
3052 }
3053
3054 len1 = fs_path_len(path_before);
3055 len2 = fs_path_len(path_after);
3056 if ((parent_ino_before != parent_ino_after) && (len1 != len2 ||
3057 memcmp(path_before->start, path_after->start, len1))) {
3058 ret = 1;
3059 goto out;
3060 }
3061 ret = 0;
3062
3063out:
3064 fs_path_free(path_before);
3065 fs_path_free(path_after);
3066
3067 return ret;
3068}
3069
2639/* 3070/*
2640 * This does all the move/link/unlink/rmdir magic. 3071 * This does all the move/link/unlink/rmdir magic.
2641 */ 3072 */
2642static int process_recorded_refs(struct send_ctx *sctx) 3073static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
2643{ 3074{
2644 int ret = 0; 3075 int ret = 0;
2645 struct recorded_ref *cur; 3076 struct recorded_ref *cur;
@@ -2788,11 +3219,17 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2788 * dirs, we always have one new and one deleted 3219 * dirs, we always have one new and one deleted
2789 * ref. The deleted ref is ignored later. 3220 * ref. The deleted ref is ignored later.
2790 */ 3221 */
2791 ret = send_rename(sctx, valid_path, 3222 if (wait_for_parent_move(sctx, cur)) {
2792 cur->full_path); 3223 ret = add_pending_dir_move(sctx,
2793 if (ret < 0) 3224 cur->dir);
2794 goto out; 3225 *pending_move = 1;
2795 ret = fs_path_copy(valid_path, cur->full_path); 3226 } else {
3227 ret = send_rename(sctx, valid_path,
3228 cur->full_path);
3229 if (!ret)
3230 ret = fs_path_copy(valid_path,
3231 cur->full_path);
3232 }
2796 if (ret < 0) 3233 if (ret < 0)
2797 goto out; 3234 goto out;
2798 } else { 3235 } else {
@@ -3161,6 +3598,7 @@ static int process_all_refs(struct send_ctx *sctx,
3161 struct extent_buffer *eb; 3598 struct extent_buffer *eb;
3162 int slot; 3599 int slot;
3163 iterate_inode_ref_t cb; 3600 iterate_inode_ref_t cb;
3601 int pending_move = 0;
3164 3602
3165 path = alloc_path_for_send(); 3603 path = alloc_path_for_send();
3166 if (!path) 3604 if (!path)
@@ -3204,7 +3642,9 @@ static int process_all_refs(struct send_ctx *sctx,
3204 } 3642 }
3205 btrfs_release_path(path); 3643 btrfs_release_path(path);
3206 3644
3207 ret = process_recorded_refs(sctx); 3645 ret = process_recorded_refs(sctx, &pending_move);
3646 /* Only applicable to an incremental send. */
3647 ASSERT(pending_move == 0);
3208 3648
3209out: 3649out:
3210 btrfs_free_path(path); 3650 btrfs_free_path(path);
@@ -4165,7 +4605,9 @@ out:
4165 return ret; 4605 return ret;
4166} 4606}
4167 4607
4168static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end) 4608static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
4609 int *pending_move,
4610 int *refs_processed)
4169{ 4611{
4170 int ret = 0; 4612 int ret = 0;
4171 4613
@@ -4177,17 +4619,11 @@ static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
4177 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) 4619 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
4178 goto out; 4620 goto out;
4179 4621
4180 ret = process_recorded_refs(sctx); 4622 ret = process_recorded_refs(sctx, pending_move);
4181 if (ret < 0) 4623 if (ret < 0)
4182 goto out; 4624 goto out;
4183 4625
4184 /* 4626 *refs_processed = 1;
4185 * We have processed the refs and thus need to advance send_progress.
4186 * Now, calls to get_cur_xxx will take the updated refs of the current
4187 * inode into account.
4188 */
4189 sctx->send_progress = sctx->cur_ino + 1;
4190
4191out: 4627out:
4192 return ret; 4628 return ret;
4193} 4629}
@@ -4203,11 +4639,29 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4203 u64 right_gid; 4639 u64 right_gid;
4204 int need_chmod = 0; 4640 int need_chmod = 0;
4205 int need_chown = 0; 4641 int need_chown = 0;
4642 int pending_move = 0;
4643 int refs_processed = 0;
4206 4644
4207 ret = process_recorded_refs_if_needed(sctx, at_end); 4645 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
4646 &refs_processed);
4208 if (ret < 0) 4647 if (ret < 0)
4209 goto out; 4648 goto out;
4210 4649
4650 /*
4651 * We have processed the refs and thus need to advance send_progress.
4652 * Now, calls to get_cur_xxx will take the updated refs of the current
4653 * inode into account.
4654 *
4655 * On the other hand, if our current inode is a directory and couldn't
4656 * be moved/renamed because its parent was renamed/moved too and it has
4657 * a higher inode number, we can only move/rename our current inode
4658 * after we moved/renamed its parent. Therefore in this case operate on
4659 * the old path (pre move/rename) of our current inode, and the
4660 * move/rename will be performed later.
4661 */
4662 if (refs_processed && !pending_move)
4663 sctx->send_progress = sctx->cur_ino + 1;
4664
4211 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) 4665 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
4212 goto out; 4666 goto out;
4213 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino) 4667 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
@@ -4269,9 +4723,21 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4269 } 4723 }
4270 4724
4271 /* 4725 /*
4272 * Need to send that every time, no matter if it actually changed 4726 * If other directory inodes depended on our current directory
4273 * between the two trees as we have done changes to the inode before. 4727 * inode's move/rename, now do their move/rename operations.
4274 */ 4728 */
4729 if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
4730 ret = apply_children_dir_moves(sctx);
4731 if (ret)
4732 goto out;
4733 }
4734
4735 /*
4736 * Need to send that every time, no matter if it actually
4737 * changed between the two trees as we have done changes to
4738 * the inode before.
4739 */
4740 sctx->send_progress = sctx->cur_ino + 1;
4275 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); 4741 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
4276 if (ret < 0) 4742 if (ret < 0)
4277 goto out; 4743 goto out;
@@ -4839,6 +5305,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4839 goto out; 5305 goto out;
4840 } 5306 }
4841 5307
5308 sctx->pending_dir_moves = RB_ROOT;
5309 sctx->waiting_dir_moves = RB_ROOT;
5310
4842 sctx->clone_roots = vzalloc(sizeof(struct clone_root) * 5311 sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
4843 (arg->clone_sources_count + 1)); 5312 (arg->clone_sources_count + 1));
4844 if (!sctx->clone_roots) { 5313 if (!sctx->clone_roots) {
@@ -4947,6 +5416,34 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4947 } 5416 }
4948 5417
4949out: 5418out:
5419 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
5420 while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
5421 struct rb_node *n;
5422 struct pending_dir_move *pm;
5423
5424 n = rb_first(&sctx->pending_dir_moves);
5425 pm = rb_entry(n, struct pending_dir_move, node);
5426 while (!list_empty(&pm->list)) {
5427 struct pending_dir_move *pm2;
5428
5429 pm2 = list_first_entry(&pm->list,
5430 struct pending_dir_move, list);
5431 free_pending_move(sctx, pm2);
5432 }
5433 free_pending_move(sctx, pm);
5434 }
5435
5436 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
5437 while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
5438 struct rb_node *n;
5439 struct waiting_dir_move *dm;
5440
5441 n = rb_first(&sctx->waiting_dir_moves);
5442 dm = rb_entry(n, struct waiting_dir_move, node);
5443 rb_erase(&dm->node, &sctx->waiting_dir_moves);
5444 kfree(dm);
5445 }
5446
4950 if (sort_clone_roots) { 5447 if (sort_clone_roots) {
4951 for (i = 0; i < sctx->clone_roots_cnt; i++) 5448 for (i = 0; i < sctx->clone_roots_cnt; i++)
4952 btrfs_root_dec_send_in_progress( 5449 btrfs_root_dec_send_in_progress(