aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/send.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/send.c')
-rw-r--r--fs/btrfs/send.c973
1 files changed, 809 insertions, 164 deletions
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 945d1db98f26..9dde9717c1b9 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -24,12 +24,12 @@
24#include <linux/xattr.h> 24#include <linux/xattr.h>
25#include <linux/posix_acl_xattr.h> 25#include <linux/posix_acl_xattr.h>
26#include <linux/radix-tree.h> 26#include <linux/radix-tree.h>
27#include <linux/crc32c.h>
28#include <linux/vmalloc.h> 27#include <linux/vmalloc.h>
29#include <linux/string.h> 28#include <linux/string.h>
30 29
31#include "send.h" 30#include "send.h"
32#include "backref.h" 31#include "backref.h"
32#include "hash.h"
33#include "locking.h" 33#include "locking.h"
34#include "disk-io.h" 34#include "disk-io.h"
35#include "btrfs_inode.h" 35#include "btrfs_inode.h"
@@ -88,8 +88,6 @@ struct send_ctx {
88 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; 88 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
89 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */ 89 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */
90 90
91 struct vfsmount *mnt;
92
93 struct btrfs_root *send_root; 91 struct btrfs_root *send_root;
94 struct btrfs_root *parent_root; 92 struct btrfs_root *parent_root;
95 struct clone_root *clone_roots; 93 struct clone_root *clone_roots;
@@ -111,6 +109,7 @@ struct send_ctx {
111 int cur_inode_deleted; 109 int cur_inode_deleted;
112 u64 cur_inode_size; 110 u64 cur_inode_size;
113 u64 cur_inode_mode; 111 u64 cur_inode_mode;
112 u64 cur_inode_last_extent;
114 113
115 u64 send_progress; 114 u64 send_progress;
116 115
@@ -122,6 +121,74 @@ struct send_ctx {
122 int name_cache_size; 121 int name_cache_size;
123 122
124 char *read_buf; 123 char *read_buf;
124
125 /*
126 * We process inodes by their increasing order, so if before an
127 * incremental send we reverse the parent/child relationship of
128 * directories such that a directory with a lower inode number was
129 * the parent of a directory with a higher inode number, and the one
130 * becoming the new parent got renamed too, we can't rename/move the
131 * directory with lower inode number when we finish processing it - we
132 * must process the directory with higher inode number first, then
133 * rename/move it and then rename/move the directory with lower inode
134 * number. Example follows.
135 *
136 * Tree state when the first send was performed:
137 *
138 * .
139 * |-- a (ino 257)
140 * |-- b (ino 258)
141 * |
142 * |
143 * |-- c (ino 259)
144 * | |-- d (ino 260)
145 * |
146 * |-- c2 (ino 261)
147 *
148 * Tree state when the second (incremental) send is performed:
149 *
150 * .
151 * |-- a (ino 257)
152 * |-- b (ino 258)
153 * |-- c2 (ino 261)
154 * |-- d2 (ino 260)
155 * |-- cc (ino 259)
156 *
157 * The sequence of steps that lead to the second state was:
158 *
159 * mv /a/b/c/d /a/b/c2/d2
160 * mv /a/b/c /a/b/c2/d2/cc
161 *
162 * "c" has lower inode number, but we can't move it (2nd mv operation)
163 * before we move "d", which has higher inode number.
164 *
165 * So we just memorize which move/rename operations must be performed
166 * later when their respective parent is processed and moved/renamed.
167 */
168
169 /* Indexed by parent directory inode number. */
170 struct rb_root pending_dir_moves;
171
172 /*
173 * Reverse index, indexed by the inode number of a directory that
174 * is waiting for the move/rename of its immediate parent before its
175 * own move/rename can be performed.
176 */
177 struct rb_root waiting_dir_moves;
178};
179
180struct pending_dir_move {
181 struct rb_node node;
182 struct list_head list;
183 u64 parent_ino;
184 u64 ino;
185 u64 gen;
186 struct list_head update_refs;
187};
188
189struct waiting_dir_move {
190 struct rb_node node;
191 u64 ino;
125}; 192};
126 193
127struct name_cache_entry { 194struct name_cache_entry {
@@ -145,6 +212,15 @@ struct name_cache_entry {
145 char name[]; 212 char name[];
146}; 213};
147 214
215static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
216
217static int need_send_hole(struct send_ctx *sctx)
218{
219 return (sctx->parent_root && !sctx->cur_inode_new &&
220 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
221 S_ISREG(sctx->cur_inode_mode));
222}
223
148static void fs_path_reset(struct fs_path *p) 224static void fs_path_reset(struct fs_path *p)
149{ 225{
150 if (p->reversed) { 226 if (p->reversed) {
@@ -336,16 +412,6 @@ out:
336 return ret; 412 return ret;
337} 413}
338 414
339#if 0
340static void fs_path_remove(struct fs_path *p)
341{
342 BUG_ON(p->reversed);
343 while (p->start != p->end && *p->end != '/')
344 p->end--;
345 *p->end = 0;
346}
347#endif
348
349static int fs_path_copy(struct fs_path *p, struct fs_path *from) 415static int fs_path_copy(struct fs_path *p, struct fs_path *from)
350{ 416{
351 int ret; 417 int ret;
@@ -436,30 +502,15 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
436 return 0; 502 return 0;
437} 503}
438 504
439#if 0 505#define TLV_PUT_DEFINE_INT(bits) \
440static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value) 506 static int tlv_put_u##bits(struct send_ctx *sctx, \
441{ 507 u##bits attr, u##bits value) \
442 return tlv_put(sctx, attr, &value, sizeof(value)); 508 { \
443} 509 __le##bits __tmp = cpu_to_le##bits(value); \
444 510 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \
445static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value) 511 }
446{
447 __le16 tmp = cpu_to_le16(value);
448 return tlv_put(sctx, attr, &tmp, sizeof(tmp));
449}
450
451static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value)
452{
453 __le32 tmp = cpu_to_le32(value);
454 return tlv_put(sctx, attr, &tmp, sizeof(tmp));
455}
456#endif
457 512
458static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value) 513TLV_PUT_DEFINE_INT(64)
459{
460 __le64 tmp = cpu_to_le64(value);
461 return tlv_put(sctx, attr, &tmp, sizeof(tmp));
462}
463 514
464static int tlv_put_string(struct send_ctx *sctx, u16 attr, 515static int tlv_put_string(struct send_ctx *sctx, u16 attr,
465 const char *str, int len) 516 const char *str, int len)
@@ -475,17 +526,6 @@ static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
475 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); 526 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
476} 527}
477 528
478#if 0
479static int tlv_put_timespec(struct send_ctx *sctx, u16 attr,
480 struct timespec *ts)
481{
482 struct btrfs_timespec bts;
483 bts.sec = cpu_to_le64(ts->tv_sec);
484 bts.nsec = cpu_to_le32(ts->tv_nsec);
485 return tlv_put(sctx, attr, &bts, sizeof(bts));
486}
487#endif
488
489static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, 529static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
490 struct extent_buffer *eb, 530 struct extent_buffer *eb,
491 struct btrfs_timespec *ts) 531 struct btrfs_timespec *ts)
@@ -533,12 +573,6 @@ static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
533 if (ret < 0) \ 573 if (ret < 0) \
534 goto tlv_put_failure; \ 574 goto tlv_put_failure; \
535 } while (0) 575 } while (0)
536#define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \
537 do { \
538 ret = tlv_put_timespec(sctx, attrtype, ts); \
539 if (ret < 0) \
540 goto tlv_put_failure; \
541 } while (0)
542#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ 576#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
543 do { \ 577 do { \
544 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ 578 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
@@ -586,7 +620,7 @@ static int send_cmd(struct send_ctx *sctx)
586 hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); 620 hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
587 hdr->crc = 0; 621 hdr->crc = 0;
588 622
589 crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); 623 crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
590 hdr->crc = cpu_to_le32(crc); 624 hdr->crc = cpu_to_le32(crc);
591 625
592 ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, 626 ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
@@ -1270,7 +1304,7 @@ static int find_extent_clone(struct send_ctx *sctx,
1270 if (!backref_ctx->found_itself) { 1304 if (!backref_ctx->found_itself) {
1271 /* found a bug in backref code? */ 1305 /* found a bug in backref code? */
1272 ret = -EIO; 1306 ret = -EIO;
1273 printk(KERN_ERR "btrfs: ERROR did not find backref in " 1307 btrfs_err(sctx->send_root->fs_info, "did not find backref in "
1274 "send_root. inode=%llu, offset=%llu, " 1308 "send_root. inode=%llu, offset=%llu, "
1275 "disk_byte=%llu found extent=%llu\n", 1309 "disk_byte=%llu found extent=%llu\n",
1276 ino, data_offset, disk_byte, found_key.objectid); 1310 ino, data_offset, disk_byte, found_key.objectid);
@@ -1298,6 +1332,16 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
1298 } 1332 }
1299 1333
1300 if (cur_clone_root) { 1334 if (cur_clone_root) {
1335 if (compressed != BTRFS_COMPRESS_NONE) {
1336 /*
1337 * Offsets given by iterate_extent_inodes() are relative
1338 * to the start of the extent, we need to add logical
1339 * offset from the file extent item.
1340 * (See why at backref.c:check_extent_in_eb())
1341 */
1342 cur_clone_root->offset += btrfs_file_extent_offset(eb,
1343 fi);
1344 }
1301 *found = cur_clone_root; 1345 *found = cur_clone_root;
1302 ret = 0; 1346 ret = 0;
1303 } else { 1347 } else {
@@ -1343,7 +1387,7 @@ static int read_symlink(struct btrfs_root *root,
1343 BUG_ON(compression); 1387 BUG_ON(compression);
1344 1388
1345 off = btrfs_file_extent_inline_start(ei); 1389 off = btrfs_file_extent_inline_start(ei);
1346 len = btrfs_file_extent_inline_len(path->nodes[0], ei); 1390 len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei);
1347 1391
1348 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); 1392 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1349 1393
@@ -1372,7 +1416,7 @@ static int gen_unique_name(struct send_ctx *sctx,
1372 return -ENOMEM; 1416 return -ENOMEM;
1373 1417
1374 while (1) { 1418 while (1) {
1375 len = snprintf(tmp, sizeof(tmp) - 1, "o%llu-%llu-%llu", 1419 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
1376 ino, gen, idx); 1420 ino, gen, idx);
1377 if (len >= sizeof(tmp)) { 1421 if (len >= sizeof(tmp)) {
1378 /* should really not happen */ 1422 /* should really not happen */
@@ -1933,6 +1977,7 @@ static void name_cache_free(struct send_ctx *sctx)
1933 */ 1977 */
1934static int __get_cur_name_and_parent(struct send_ctx *sctx, 1978static int __get_cur_name_and_parent(struct send_ctx *sctx,
1935 u64 ino, u64 gen, 1979 u64 ino, u64 gen,
1980 int skip_name_cache,
1936 u64 *parent_ino, 1981 u64 *parent_ino,
1937 u64 *parent_gen, 1982 u64 *parent_gen,
1938 struct fs_path *dest) 1983 struct fs_path *dest)
@@ -1942,6 +1987,8 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
1942 struct btrfs_path *path = NULL; 1987 struct btrfs_path *path = NULL;
1943 struct name_cache_entry *nce = NULL; 1988 struct name_cache_entry *nce = NULL;
1944 1989
1990 if (skip_name_cache)
1991 goto get_ref;
1945 /* 1992 /*
1946 * First check if we already did a call to this function with the same 1993 * First check if we already did a call to this function with the same
1947 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes 1994 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
@@ -1986,11 +2033,12 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
1986 goto out_cache; 2033 goto out_cache;
1987 } 2034 }
1988 2035
2036get_ref:
1989 /* 2037 /*
1990 * Depending on whether the inode was already processed or not, use 2038 * Depending on whether the inode was already processed or not, use
1991 * send_root or parent_root for ref lookup. 2039 * send_root or parent_root for ref lookup.
1992 */ 2040 */
1993 if (ino < sctx->send_progress) 2041 if (ino < sctx->send_progress && !skip_name_cache)
1994 ret = get_first_ref(sctx->send_root, ino, 2042 ret = get_first_ref(sctx->send_root, ino,
1995 parent_ino, parent_gen, dest); 2043 parent_ino, parent_gen, dest);
1996 else 2044 else
@@ -2014,6 +2062,8 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
2014 goto out; 2062 goto out;
2015 ret = 1; 2063 ret = 1;
2016 } 2064 }
2065 if (skip_name_cache)
2066 goto out;
2017 2067
2018out_cache: 2068out_cache:
2019 /* 2069 /*
@@ -2081,6 +2131,9 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2081 u64 parent_inode = 0; 2131 u64 parent_inode = 0;
2082 u64 parent_gen = 0; 2132 u64 parent_gen = 0;
2083 int stop = 0; 2133 int stop = 0;
2134 u64 start_ino = ino;
2135 u64 start_gen = gen;
2136 int skip_name_cache = 0;
2084 2137
2085 name = fs_path_alloc(); 2138 name = fs_path_alloc();
2086 if (!name) { 2139 if (!name) {
@@ -2088,19 +2141,32 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2088 goto out; 2141 goto out;
2089 } 2142 }
2090 2143
2144 if (is_waiting_for_move(sctx, ino))
2145 skip_name_cache = 1;
2146
2147again:
2091 dest->reversed = 1; 2148 dest->reversed = 1;
2092 fs_path_reset(dest); 2149 fs_path_reset(dest);
2093 2150
2094 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { 2151 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2095 fs_path_reset(name); 2152 fs_path_reset(name);
2096 2153
2097 ret = __get_cur_name_and_parent(sctx, ino, gen, 2154 ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache,
2098 &parent_inode, &parent_gen, name); 2155 &parent_inode, &parent_gen, name);
2099 if (ret < 0) 2156 if (ret < 0)
2100 goto out; 2157 goto out;
2101 if (ret) 2158 if (ret)
2102 stop = 1; 2159 stop = 1;
2103 2160
2161 if (!skip_name_cache &&
2162 is_waiting_for_move(sctx, parent_inode)) {
2163 ino = start_ino;
2164 gen = start_gen;
2165 stop = 0;
2166 skip_name_cache = 1;
2167 goto again;
2168 }
2169
2104 ret = fs_path_add_path(dest, name); 2170 ret = fs_path_add_path(dest, name);
2105 if (ret < 0) 2171 if (ret < 0)
2106 goto out; 2172 goto out;
@@ -2131,7 +2197,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
2131 char *name = NULL; 2197 char *name = NULL;
2132 int namelen; 2198 int namelen;
2133 2199
2134 path = alloc_path_for_send(); 2200 path = btrfs_alloc_path();
2135 if (!path) 2201 if (!path)
2136 return -ENOMEM; 2202 return -ENOMEM;
2137 2203
@@ -2180,12 +2246,12 @@ static int send_subvol_begin(struct send_ctx *sctx)
2180 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, 2246 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2181 sctx->send_root->root_item.uuid); 2247 sctx->send_root->root_item.uuid);
2182 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, 2248 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2183 sctx->send_root->root_item.ctransid); 2249 le64_to_cpu(sctx->send_root->root_item.ctransid));
2184 if (parent_root) { 2250 if (parent_root) {
2185 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 2251 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2186 sctx->parent_root->root_item.uuid); 2252 sctx->parent_root->root_item.uuid);
2187 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 2253 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2188 sctx->parent_root->root_item.ctransid); 2254 le64_to_cpu(sctx->parent_root->root_item.ctransid));
2189 } 2255 }
2190 2256
2191 ret = send_cmd(sctx); 2257 ret = send_cmd(sctx);
@@ -2672,10 +2738,347 @@ out:
2672 return ret; 2738 return ret;
2673} 2739}
2674 2740
2741static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
2742{
2743 struct rb_node *n = sctx->waiting_dir_moves.rb_node;
2744 struct waiting_dir_move *entry;
2745
2746 while (n) {
2747 entry = rb_entry(n, struct waiting_dir_move, node);
2748 if (ino < entry->ino)
2749 n = n->rb_left;
2750 else if (ino > entry->ino)
2751 n = n->rb_right;
2752 else
2753 return 1;
2754 }
2755 return 0;
2756}
2757
2758static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2759{
2760 struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
2761 struct rb_node *parent = NULL;
2762 struct waiting_dir_move *entry, *dm;
2763
2764 dm = kmalloc(sizeof(*dm), GFP_NOFS);
2765 if (!dm)
2766 return -ENOMEM;
2767 dm->ino = ino;
2768
2769 while (*p) {
2770 parent = *p;
2771 entry = rb_entry(parent, struct waiting_dir_move, node);
2772 if (ino < entry->ino) {
2773 p = &(*p)->rb_left;
2774 } else if (ino > entry->ino) {
2775 p = &(*p)->rb_right;
2776 } else {
2777 kfree(dm);
2778 return -EEXIST;
2779 }
2780 }
2781
2782 rb_link_node(&dm->node, parent, p);
2783 rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
2784 return 0;
2785}
2786
2787static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2788{
2789 struct rb_node *n = sctx->waiting_dir_moves.rb_node;
2790 struct waiting_dir_move *entry;
2791
2792 while (n) {
2793 entry = rb_entry(n, struct waiting_dir_move, node);
2794 if (ino < entry->ino) {
2795 n = n->rb_left;
2796 } else if (ino > entry->ino) {
2797 n = n->rb_right;
2798 } else {
2799 rb_erase(&entry->node, &sctx->waiting_dir_moves);
2800 kfree(entry);
2801 return 0;
2802 }
2803 }
2804 return -ENOENT;
2805}
2806
2807static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino)
2808{
2809 struct rb_node **p = &sctx->pending_dir_moves.rb_node;
2810 struct rb_node *parent = NULL;
2811 struct pending_dir_move *entry, *pm;
2812 struct recorded_ref *cur;
2813 int exists = 0;
2814 int ret;
2815
2816 pm = kmalloc(sizeof(*pm), GFP_NOFS);
2817 if (!pm)
2818 return -ENOMEM;
2819 pm->parent_ino = parent_ino;
2820 pm->ino = sctx->cur_ino;
2821 pm->gen = sctx->cur_inode_gen;
2822 INIT_LIST_HEAD(&pm->list);
2823 INIT_LIST_HEAD(&pm->update_refs);
2824 RB_CLEAR_NODE(&pm->node);
2825
2826 while (*p) {
2827 parent = *p;
2828 entry = rb_entry(parent, struct pending_dir_move, node);
2829 if (parent_ino < entry->parent_ino) {
2830 p = &(*p)->rb_left;
2831 } else if (parent_ino > entry->parent_ino) {
2832 p = &(*p)->rb_right;
2833 } else {
2834 exists = 1;
2835 break;
2836 }
2837 }
2838
2839 list_for_each_entry(cur, &sctx->deleted_refs, list) {
2840 ret = dup_ref(cur, &pm->update_refs);
2841 if (ret < 0)
2842 goto out;
2843 }
2844 list_for_each_entry(cur, &sctx->new_refs, list) {
2845 ret = dup_ref(cur, &pm->update_refs);
2846 if (ret < 0)
2847 goto out;
2848 }
2849
2850 ret = add_waiting_dir_move(sctx, pm->ino);
2851 if (ret)
2852 goto out;
2853
2854 if (exists) {
2855 list_add_tail(&pm->list, &entry->list);
2856 } else {
2857 rb_link_node(&pm->node, parent, p);
2858 rb_insert_color(&pm->node, &sctx->pending_dir_moves);
2859 }
2860 ret = 0;
2861out:
2862 if (ret) {
2863 __free_recorded_refs(&pm->update_refs);
2864 kfree(pm);
2865 }
2866 return ret;
2867}
2868
2869static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
2870 u64 parent_ino)
2871{
2872 struct rb_node *n = sctx->pending_dir_moves.rb_node;
2873 struct pending_dir_move *entry;
2874
2875 while (n) {
2876 entry = rb_entry(n, struct pending_dir_move, node);
2877 if (parent_ino < entry->parent_ino)
2878 n = n->rb_left;
2879 else if (parent_ino > entry->parent_ino)
2880 n = n->rb_right;
2881 else
2882 return entry;
2883 }
2884 return NULL;
2885}
2886
2887static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
2888{
2889 struct fs_path *from_path = NULL;
2890 struct fs_path *to_path = NULL;
2891 u64 orig_progress = sctx->send_progress;
2892 struct recorded_ref *cur;
2893 int ret;
2894
2895 from_path = fs_path_alloc();
2896 if (!from_path)
2897 return -ENOMEM;
2898
2899 sctx->send_progress = pm->ino;
2900 ret = get_cur_path(sctx, pm->ino, pm->gen, from_path);
2901 if (ret < 0)
2902 goto out;
2903
2904 to_path = fs_path_alloc();
2905 if (!to_path) {
2906 ret = -ENOMEM;
2907 goto out;
2908 }
2909
2910 sctx->send_progress = sctx->cur_ino + 1;
2911 ret = del_waiting_dir_move(sctx, pm->ino);
2912 ASSERT(ret == 0);
2913
2914 ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
2915 if (ret < 0)
2916 goto out;
2917
2918 ret = send_rename(sctx, from_path, to_path);
2919 if (ret < 0)
2920 goto out;
2921
2922 ret = send_utimes(sctx, pm->ino, pm->gen);
2923 if (ret < 0)
2924 goto out;
2925
2926 /*
2927 * After rename/move, need to update the utimes of both new parent(s)
2928 * and old parent(s).
2929 */
2930 list_for_each_entry(cur, &pm->update_refs, list) {
2931 ret = send_utimes(sctx, cur->dir, cur->dir_gen);
2932 if (ret < 0)
2933 goto out;
2934 }
2935
2936out:
2937 fs_path_free(from_path);
2938 fs_path_free(to_path);
2939 sctx->send_progress = orig_progress;
2940
2941 return ret;
2942}
2943
2944static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
2945{
2946 if (!list_empty(&m->list))
2947 list_del(&m->list);
2948 if (!RB_EMPTY_NODE(&m->node))
2949 rb_erase(&m->node, &sctx->pending_dir_moves);
2950 __free_recorded_refs(&m->update_refs);
2951 kfree(m);
2952}
2953
2954static void tail_append_pending_moves(struct pending_dir_move *moves,
2955 struct list_head *stack)
2956{
2957 if (list_empty(&moves->list)) {
2958 list_add_tail(&moves->list, stack);
2959 } else {
2960 LIST_HEAD(list);
2961 list_splice_init(&moves->list, &list);
2962 list_add_tail(&moves->list, stack);
2963 list_splice_tail(&list, stack);
2964 }
2965}
2966
2967static int apply_children_dir_moves(struct send_ctx *sctx)
2968{
2969 struct pending_dir_move *pm;
2970 struct list_head stack;
2971 u64 parent_ino = sctx->cur_ino;
2972 int ret = 0;
2973
2974 pm = get_pending_dir_moves(sctx, parent_ino);
2975 if (!pm)
2976 return 0;
2977
2978 INIT_LIST_HEAD(&stack);
2979 tail_append_pending_moves(pm, &stack);
2980
2981 while (!list_empty(&stack)) {
2982 pm = list_first_entry(&stack, struct pending_dir_move, list);
2983 parent_ino = pm->ino;
2984 ret = apply_dir_move(sctx, pm);
2985 free_pending_move(sctx, pm);
2986 if (ret)
2987 goto out;
2988 pm = get_pending_dir_moves(sctx, parent_ino);
2989 if (pm)
2990 tail_append_pending_moves(pm, &stack);
2991 }
2992 return 0;
2993
2994out:
2995 while (!list_empty(&stack)) {
2996 pm = list_first_entry(&stack, struct pending_dir_move, list);
2997 free_pending_move(sctx, pm);
2998 }
2999 return ret;
3000}
3001
3002static int wait_for_parent_move(struct send_ctx *sctx,
3003 struct recorded_ref *parent_ref)
3004{
3005 int ret;
3006 u64 ino = parent_ref->dir;
3007 u64 parent_ino_before, parent_ino_after;
3008 u64 new_gen, old_gen;
3009 struct fs_path *path_before = NULL;
3010 struct fs_path *path_after = NULL;
3011 int len1, len2;
3012
3013 if (parent_ref->dir <= sctx->cur_ino)
3014 return 0;
3015
3016 if (is_waiting_for_move(sctx, ino))
3017 return 1;
3018
3019 ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen,
3020 NULL, NULL, NULL, NULL);
3021 if (ret == -ENOENT)
3022 return 0;
3023 else if (ret < 0)
3024 return ret;
3025
3026 ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen,
3027 NULL, NULL, NULL, NULL);
3028 if (ret < 0)
3029 return ret;
3030
3031 if (new_gen != old_gen)
3032 return 0;
3033
3034 path_before = fs_path_alloc();
3035 if (!path_before)
3036 return -ENOMEM;
3037
3038 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
3039 NULL, path_before);
3040 if (ret == -ENOENT) {
3041 ret = 0;
3042 goto out;
3043 } else if (ret < 0) {
3044 goto out;
3045 }
3046
3047 path_after = fs_path_alloc();
3048 if (!path_after) {
3049 ret = -ENOMEM;
3050 goto out;
3051 }
3052
3053 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
3054 NULL, path_after);
3055 if (ret == -ENOENT) {
3056 ret = 0;
3057 goto out;
3058 } else if (ret < 0) {
3059 goto out;
3060 }
3061
3062 len1 = fs_path_len(path_before);
3063 len2 = fs_path_len(path_after);
3064 if ((parent_ino_before != parent_ino_after) && (len1 != len2 ||
3065 memcmp(path_before->start, path_after->start, len1))) {
3066 ret = 1;
3067 goto out;
3068 }
3069 ret = 0;
3070
3071out:
3072 fs_path_free(path_before);
3073 fs_path_free(path_after);
3074
3075 return ret;
3076}
3077
2675/* 3078/*
2676 * This does all the move/link/unlink/rmdir magic. 3079 * This does all the move/link/unlink/rmdir magic.
2677 */ 3080 */
2678static int process_recorded_refs(struct send_ctx *sctx) 3081static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
2679{ 3082{
2680 int ret = 0; 3083 int ret = 0;
2681 struct recorded_ref *cur; 3084 struct recorded_ref *cur;
@@ -2824,11 +3227,17 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2824 * dirs, we always have one new and one deleted 3227 * dirs, we always have one new and one deleted
2825 * ref. The deleted ref is ignored later. 3228 * ref. The deleted ref is ignored later.
2826 */ 3229 */
2827 ret = send_rename(sctx, valid_path, 3230 if (wait_for_parent_move(sctx, cur)) {
2828 cur->full_path); 3231 ret = add_pending_dir_move(sctx,
2829 if (ret < 0) 3232 cur->dir);
2830 goto out; 3233 *pending_move = 1;
2831 ret = fs_path_copy(valid_path, cur->full_path); 3234 } else {
3235 ret = send_rename(sctx, valid_path,
3236 cur->full_path);
3237 if (!ret)
3238 ret = fs_path_copy(valid_path,
3239 cur->full_path);
3240 }
2832 if (ret < 0) 3241 if (ret < 0)
2833 goto out; 3242 goto out;
2834 } else { 3243 } else {
@@ -3197,6 +3606,7 @@ static int process_all_refs(struct send_ctx *sctx,
3197 struct extent_buffer *eb; 3606 struct extent_buffer *eb;
3198 int slot; 3607 int slot;
3199 iterate_inode_ref_t cb; 3608 iterate_inode_ref_t cb;
3609 int pending_move = 0;
3200 3610
3201 path = alloc_path_for_send(); 3611 path = alloc_path_for_send();
3202 if (!path) 3612 if (!path)
@@ -3240,7 +3650,9 @@ static int process_all_refs(struct send_ctx *sctx,
3240 } 3650 }
3241 btrfs_release_path(path); 3651 btrfs_release_path(path);
3242 3652
3243 ret = process_recorded_refs(sctx); 3653 ret = process_recorded_refs(sctx, &pending_move);
3654 /* Only applicable to an incremental send. */
3655 ASSERT(pending_move == 0);
3244 3656
3245out: 3657out:
3246 btrfs_free_path(path); 3658 btrfs_free_path(path);
@@ -3706,7 +4118,7 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
3706 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 4118 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
3707 clone_root->root->root_item.uuid); 4119 clone_root->root->root_item.uuid);
3708 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 4120 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
3709 clone_root->root->root_item.ctransid); 4121 le64_to_cpu(clone_root->root->root_item.ctransid));
3710 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); 4122 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
3711 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, 4123 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
3712 clone_root->offset); 4124 clone_root->offset);
@@ -3752,6 +4164,39 @@ out:
3752 return ret; 4164 return ret;
3753} 4165}
3754 4166
4167static int send_hole(struct send_ctx *sctx, u64 end)
4168{
4169 struct fs_path *p = NULL;
4170 u64 offset = sctx->cur_inode_last_extent;
4171 u64 len;
4172 int ret = 0;
4173
4174 p = fs_path_alloc();
4175 if (!p)
4176 return -ENOMEM;
4177 memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
4178 while (offset < end) {
4179 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
4180
4181 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4182 if (ret < 0)
4183 break;
4184 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4185 if (ret < 0)
4186 break;
4187 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4188 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4189 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
4190 ret = send_cmd(sctx);
4191 if (ret < 0)
4192 break;
4193 offset += len;
4194 }
4195tlv_put_failure:
4196 fs_path_free(p);
4197 return ret;
4198}
4199
3755static int send_write_or_clone(struct send_ctx *sctx, 4200static int send_write_or_clone(struct send_ctx *sctx,
3756 struct btrfs_path *path, 4201 struct btrfs_path *path,
3757 struct btrfs_key *key, 4202 struct btrfs_key *key,
@@ -3764,12 +4209,14 @@ static int send_write_or_clone(struct send_ctx *sctx,
3764 u64 len; 4209 u64 len;
3765 u32 l; 4210 u32 l;
3766 u8 type; 4211 u8 type;
4212 u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
3767 4213
3768 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 4214 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3769 struct btrfs_file_extent_item); 4215 struct btrfs_file_extent_item);
3770 type = btrfs_file_extent_type(path->nodes[0], ei); 4216 type = btrfs_file_extent_type(path->nodes[0], ei);
3771 if (type == BTRFS_FILE_EXTENT_INLINE) { 4217 if (type == BTRFS_FILE_EXTENT_INLINE) {
3772 len = btrfs_file_extent_inline_len(path->nodes[0], ei); 4218 len = btrfs_file_extent_inline_len(path->nodes[0],
4219 path->slots[0], ei);
3773 /* 4220 /*
3774 * it is possible the inline item won't cover the whole page, 4221 * it is possible the inline item won't cover the whole page,
3775 * but there may be items after this page. Make 4222 * but there may be items after this page. Make
@@ -3787,7 +4234,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
3787 goto out; 4234 goto out;
3788 } 4235 }
3789 4236
3790 if (clone_root) { 4237 if (clone_root && IS_ALIGNED(offset + len, bs)) {
3791 ret = send_clone(sctx, offset, len, clone_root); 4238 ret = send_clone(sctx, offset, len, clone_root);
3792 } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) { 4239 } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
3793 ret = send_update_extent(sctx, offset, len); 4240 ret = send_update_extent(sctx, offset, len);
@@ -3979,6 +4426,101 @@ out:
3979 return ret; 4426 return ret;
3980} 4427}
3981 4428
4429static int get_last_extent(struct send_ctx *sctx, u64 offset)
4430{
4431 struct btrfs_path *path;
4432 struct btrfs_root *root = sctx->send_root;
4433 struct btrfs_file_extent_item *fi;
4434 struct btrfs_key key;
4435 u64 extent_end;
4436 u8 type;
4437 int ret;
4438
4439 path = alloc_path_for_send();
4440 if (!path)
4441 return -ENOMEM;
4442
4443 sctx->cur_inode_last_extent = 0;
4444
4445 key.objectid = sctx->cur_ino;
4446 key.type = BTRFS_EXTENT_DATA_KEY;
4447 key.offset = offset;
4448 ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
4449 if (ret < 0)
4450 goto out;
4451 ret = 0;
4452 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4453 if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
4454 goto out;
4455
4456 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
4457 struct btrfs_file_extent_item);
4458 type = btrfs_file_extent_type(path->nodes[0], fi);
4459 if (type == BTRFS_FILE_EXTENT_INLINE) {
4460 u64 size = btrfs_file_extent_inline_len(path->nodes[0],
4461 path->slots[0], fi);
4462 extent_end = ALIGN(key.offset + size,
4463 sctx->send_root->sectorsize);
4464 } else {
4465 extent_end = key.offset +
4466 btrfs_file_extent_num_bytes(path->nodes[0], fi);
4467 }
4468 sctx->cur_inode_last_extent = extent_end;
4469out:
4470 btrfs_free_path(path);
4471 return ret;
4472}
4473
4474static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
4475 struct btrfs_key *key)
4476{
4477 struct btrfs_file_extent_item *fi;
4478 u64 extent_end;
4479 u8 type;
4480 int ret = 0;
4481
4482 if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
4483 return 0;
4484
4485 if (sctx->cur_inode_last_extent == (u64)-1) {
4486 ret = get_last_extent(sctx, key->offset - 1);
4487 if (ret)
4488 return ret;
4489 }
4490
4491 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
4492 struct btrfs_file_extent_item);
4493 type = btrfs_file_extent_type(path->nodes[0], fi);
4494 if (type == BTRFS_FILE_EXTENT_INLINE) {
4495 u64 size = btrfs_file_extent_inline_len(path->nodes[0],
4496 path->slots[0], fi);
4497 extent_end = ALIGN(key->offset + size,
4498 sctx->send_root->sectorsize);
4499 } else {
4500 extent_end = key->offset +
4501 btrfs_file_extent_num_bytes(path->nodes[0], fi);
4502 }
4503
4504 if (path->slots[0] == 0 &&
4505 sctx->cur_inode_last_extent < key->offset) {
4506 /*
4507 * We might have skipped entire leafs that contained only
4508 * file extent items for our current inode. These leafs have
4509 * a generation number smaller (older) than the one in the
4510 * current leaf and the leaf our last extent came from, and
4511 * are located between these 2 leafs.
4512 */
4513 ret = get_last_extent(sctx, key->offset - 1);
4514 if (ret)
4515 return ret;
4516 }
4517
4518 if (sctx->cur_inode_last_extent < key->offset)
4519 ret = send_hole(sctx, key->offset);
4520 sctx->cur_inode_last_extent = extent_end;
4521 return ret;
4522}
4523
3982static int process_extent(struct send_ctx *sctx, 4524static int process_extent(struct send_ctx *sctx,
3983 struct btrfs_path *path, 4525 struct btrfs_path *path,
3984 struct btrfs_key *key) 4526 struct btrfs_key *key)
@@ -3995,7 +4537,7 @@ static int process_extent(struct send_ctx *sctx,
3995 goto out; 4537 goto out;
3996 if (ret) { 4538 if (ret) {
3997 ret = 0; 4539 ret = 0;
3998 goto out; 4540 goto out_hole;
3999 } 4541 }
4000 } else { 4542 } else {
4001 struct btrfs_file_extent_item *ei; 4543 struct btrfs_file_extent_item *ei;
@@ -4031,7 +4573,10 @@ static int process_extent(struct send_ctx *sctx,
4031 goto out; 4573 goto out;
4032 4574
4033 ret = send_write_or_clone(sctx, path, key, found_clone); 4575 ret = send_write_or_clone(sctx, path, key, found_clone);
4034 4576 if (ret)
4577 goto out;
4578out_hole:
4579 ret = maybe_send_hole(sctx, path, key);
4035out: 4580out:
4036 return ret; 4581 return ret;
4037} 4582}
@@ -4054,17 +4599,25 @@ static int process_all_extents(struct send_ctx *sctx)
4054 key.objectid = sctx->cmp_key->objectid; 4599 key.objectid = sctx->cmp_key->objectid;
4055 key.type = BTRFS_EXTENT_DATA_KEY; 4600 key.type = BTRFS_EXTENT_DATA_KEY;
4056 key.offset = 0; 4601 key.offset = 0;
4057 while (1) { 4602 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4058 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); 4603 if (ret < 0)
4059 if (ret < 0) 4604 goto out;
4060 goto out;
4061 if (ret) {
4062 ret = 0;
4063 goto out;
4064 }
4065 4605
4606 while (1) {
4066 eb = path->nodes[0]; 4607 eb = path->nodes[0];
4067 slot = path->slots[0]; 4608 slot = path->slots[0];
4609
4610 if (slot >= btrfs_header_nritems(eb)) {
4611 ret = btrfs_next_leaf(root, path);
4612 if (ret < 0) {
4613 goto out;
4614 } else if (ret > 0) {
4615 ret = 0;
4616 break;
4617 }
4618 continue;
4619 }
4620
4068 btrfs_item_key_to_cpu(eb, &found_key, slot); 4621 btrfs_item_key_to_cpu(eb, &found_key, slot);
4069 4622
4070 if (found_key.objectid != key.objectid || 4623 if (found_key.objectid != key.objectid ||
@@ -4077,8 +4630,7 @@ static int process_all_extents(struct send_ctx *sctx)
4077 if (ret < 0) 4630 if (ret < 0)
4078 goto out; 4631 goto out;
4079 4632
4080 btrfs_release_path(path); 4633 path->slots[0]++;
4081 key.offset = found_key.offset + 1;
4082 } 4634 }
4083 4635
4084out: 4636out:
@@ -4086,7 +4638,9 @@ out:
4086 return ret; 4638 return ret;
4087} 4639}
4088 4640
4089static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end) 4641static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
4642 int *pending_move,
4643 int *refs_processed)
4090{ 4644{
4091 int ret = 0; 4645 int ret = 0;
4092 4646
@@ -4098,17 +4652,11 @@ static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
4098 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) 4652 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
4099 goto out; 4653 goto out;
4100 4654
4101 ret = process_recorded_refs(sctx); 4655 ret = process_recorded_refs(sctx, pending_move);
4102 if (ret < 0) 4656 if (ret < 0)
4103 goto out; 4657 goto out;
4104 4658
4105 /* 4659 *refs_processed = 1;
4106 * We have processed the refs and thus need to advance send_progress.
4107 * Now, calls to get_cur_xxx will take the updated refs of the current
4108 * inode into account.
4109 */
4110 sctx->send_progress = sctx->cur_ino + 1;
4111
4112out: 4660out:
4113 return ret; 4661 return ret;
4114} 4662}
@@ -4124,11 +4672,29 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4124 u64 right_gid; 4672 u64 right_gid;
4125 int need_chmod = 0; 4673 int need_chmod = 0;
4126 int need_chown = 0; 4674 int need_chown = 0;
4675 int pending_move = 0;
4676 int refs_processed = 0;
4127 4677
4128 ret = process_recorded_refs_if_needed(sctx, at_end); 4678 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
4679 &refs_processed);
4129 if (ret < 0) 4680 if (ret < 0)
4130 goto out; 4681 goto out;
4131 4682
4683 /*
4684 * We have processed the refs and thus need to advance send_progress.
4685 * Now, calls to get_cur_xxx will take the updated refs of the current
4686 * inode into account.
4687 *
4688 * On the other hand, if our current inode is a directory and couldn't
4689 * be moved/renamed because its parent was renamed/moved too and it has
4690 * a higher inode number, we can only move/rename our current inode
4691 * after we moved/renamed its parent. Therefore in this case operate on
4692 * the old path (pre move/rename) of our current inode, and the
4693 * move/rename will be performed later.
4694 */
4695 if (refs_processed && !pending_move)
4696 sctx->send_progress = sctx->cur_ino + 1;
4697
4132 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) 4698 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
4133 goto out; 4699 goto out;
4134 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino) 4700 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
@@ -4157,6 +4723,19 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4157 } 4723 }
4158 4724
4159 if (S_ISREG(sctx->cur_inode_mode)) { 4725 if (S_ISREG(sctx->cur_inode_mode)) {
4726 if (need_send_hole(sctx)) {
4727 if (sctx->cur_inode_last_extent == (u64)-1) {
4728 ret = get_last_extent(sctx, (u64)-1);
4729 if (ret)
4730 goto out;
4731 }
4732 if (sctx->cur_inode_last_extent <
4733 sctx->cur_inode_size) {
4734 ret = send_hole(sctx, sctx->cur_inode_size);
4735 if (ret)
4736 goto out;
4737 }
4738 }
4160 ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, 4739 ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4161 sctx->cur_inode_size); 4740 sctx->cur_inode_size);
4162 if (ret < 0) 4741 if (ret < 0)
@@ -4177,9 +4756,21 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4177 } 4756 }
4178 4757
4179 /* 4758 /*
4180 * Need to send that every time, no matter if it actually changed 4759 * If other directory inodes depended on our current directory
4181 * between the two trees as we have done changes to the inode before. 4760 * inode's move/rename, now do their move/rename operations.
4761 */
4762 if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
4763 ret = apply_children_dir_moves(sctx);
4764 if (ret)
4765 goto out;
4766 }
4767
4768 /*
4769 * Need to send that every time, no matter if it actually
4770 * changed between the two trees as we have done changes to
4771 * the inode before.
4182 */ 4772 */
4773 sctx->send_progress = sctx->cur_ino + 1;
4183 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); 4774 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
4184 if (ret < 0) 4775 if (ret < 0)
4185 goto out; 4776 goto out;
@@ -4200,6 +4791,7 @@ static int changed_inode(struct send_ctx *sctx,
4200 4791
4201 sctx->cur_ino = key->objectid; 4792 sctx->cur_ino = key->objectid;
4202 sctx->cur_inode_new_gen = 0; 4793 sctx->cur_inode_new_gen = 0;
4794 sctx->cur_inode_last_extent = (u64)-1;
4203 4795
4204 /* 4796 /*
4205 * Set send_progress to current inode. This will tell all get_cur_xxx 4797 * Set send_progress to current inode. This will tell all get_cur_xxx
@@ -4480,14 +5072,18 @@ static int changed_cb(struct btrfs_root *left_root,
4480 struct send_ctx *sctx = ctx; 5072 struct send_ctx *sctx = ctx;
4481 5073
4482 if (result == BTRFS_COMPARE_TREE_SAME) { 5074 if (result == BTRFS_COMPARE_TREE_SAME) {
4483 if (key->type != BTRFS_INODE_REF_KEY && 5075 if (key->type == BTRFS_INODE_REF_KEY ||
4484 key->type != BTRFS_INODE_EXTREF_KEY) 5076 key->type == BTRFS_INODE_EXTREF_KEY) {
4485 return 0; 5077 ret = compare_refs(sctx, left_path, key);
4486 ret = compare_refs(sctx, left_path, key); 5078 if (!ret)
4487 if (!ret) 5079 return 0;
5080 if (ret < 0)
5081 return ret;
5082 } else if (key->type == BTRFS_EXTENT_DATA_KEY) {
5083 return maybe_send_hole(sctx, left_path, key);
5084 } else {
4488 return 0; 5085 return 0;
4489 if (ret < 0) 5086 }
4490 return ret;
4491 result = BTRFS_COMPARE_TREE_CHANGED; 5087 result = BTRFS_COMPARE_TREE_CHANGED;
4492 ret = 0; 5088 ret = 0;
4493 } 5089 }
@@ -4522,7 +5118,6 @@ out:
4522static int full_send_tree(struct send_ctx *sctx) 5118static int full_send_tree(struct send_ctx *sctx)
4523{ 5119{
4524 int ret; 5120 int ret;
4525 struct btrfs_trans_handle *trans = NULL;
4526 struct btrfs_root *send_root = sctx->send_root; 5121 struct btrfs_root *send_root = sctx->send_root;
4527 struct btrfs_key key; 5122 struct btrfs_key key;
4528 struct btrfs_key found_key; 5123 struct btrfs_key found_key;
@@ -4544,19 +5139,6 @@ static int full_send_tree(struct send_ctx *sctx)
4544 key.type = BTRFS_INODE_ITEM_KEY; 5139 key.type = BTRFS_INODE_ITEM_KEY;
4545 key.offset = 0; 5140 key.offset = 0;
4546 5141
4547join_trans:
4548 /*
4549 * We need to make sure the transaction does not get committed
4550 * while we do anything on commit roots. Join a transaction to prevent
4551 * this.
4552 */
4553 trans = btrfs_join_transaction(send_root);
4554 if (IS_ERR(trans)) {
4555 ret = PTR_ERR(trans);
4556 trans = NULL;
4557 goto out;
4558 }
4559
4560 /* 5142 /*
4561 * Make sure the tree has not changed after re-joining. We detect this 5143 * Make sure the tree has not changed after re-joining. We detect this
4562 * by comparing start_ctransid and ctransid. They should always match. 5144 * by comparing start_ctransid and ctransid. They should always match.
@@ -4566,7 +5148,7 @@ join_trans:
4566 spin_unlock(&send_root->root_item_lock); 5148 spin_unlock(&send_root->root_item_lock);
4567 5149
4568 if (ctransid != start_ctransid) { 5150 if (ctransid != start_ctransid) {
4569 WARN(1, KERN_WARNING "btrfs: the root that you're trying to " 5151 WARN(1, KERN_WARNING "BTRFS: the root that you're trying to "
4570 "send was modified in between. This is " 5152 "send was modified in between. This is "
4571 "probably a bug.\n"); 5153 "probably a bug.\n");
4572 ret = -EIO; 5154 ret = -EIO;
@@ -4580,19 +5162,6 @@ join_trans:
4580 goto out_finish; 5162 goto out_finish;
4581 5163
4582 while (1) { 5164 while (1) {
4583 /*
4584 * When someone want to commit while we iterate, end the
4585 * joined transaction and rejoin.
4586 */
4587 if (btrfs_should_end_transaction(trans, send_root)) {
4588 ret = btrfs_end_transaction(trans, send_root);
4589 trans = NULL;
4590 if (ret < 0)
4591 goto out;
4592 btrfs_release_path(path);
4593 goto join_trans;
4594 }
4595
4596 eb = path->nodes[0]; 5165 eb = path->nodes[0];
4597 slot = path->slots[0]; 5166 slot = path->slots[0];
4598 btrfs_item_key_to_cpu(eb, &found_key, slot); 5167 btrfs_item_key_to_cpu(eb, &found_key, slot);
@@ -4620,12 +5189,6 @@ out_finish:
4620 5189
4621out: 5190out:
4622 btrfs_free_path(path); 5191 btrfs_free_path(path);
4623 if (trans) {
4624 if (!ret)
4625 ret = btrfs_end_transaction(trans, send_root);
4626 else
4627 btrfs_end_transaction(trans, send_root);
4628 }
4629 return ret; 5192 return ret;
4630} 5193}
4631 5194
@@ -4662,6 +5225,21 @@ out:
4662 return ret; 5225 return ret;
4663} 5226}
4664 5227
5228static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
5229{
5230 spin_lock(&root->root_item_lock);
5231 root->send_in_progress--;
5232 /*
5233 * Not much left to do, we don't know why it's unbalanced and
5234 * can't blindly reset it to 0.
5235 */
5236 if (root->send_in_progress < 0)
5237 btrfs_err(root->fs_info,
5238 "send_in_progres unbalanced %d root %llu\n",
5239 root->send_in_progress, root->root_key.objectid);
5240 spin_unlock(&root->root_item_lock);
5241}
5242
4665long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) 5243long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4666{ 5244{
4667 int ret = 0; 5245 int ret = 0;
@@ -4673,6 +5251,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4673 struct send_ctx *sctx = NULL; 5251 struct send_ctx *sctx = NULL;
4674 u32 i; 5252 u32 i;
4675 u64 *clone_sources_tmp = NULL; 5253 u64 *clone_sources_tmp = NULL;
5254 int clone_sources_to_rollback = 0;
5255 int sort_clone_roots = 0;
5256 int index;
4676 5257
4677 if (!capable(CAP_SYS_ADMIN)) 5258 if (!capable(CAP_SYS_ADMIN))
4678 return -EPERM; 5259 return -EPERM;
@@ -4681,38 +5262,26 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4681 fs_info = send_root->fs_info; 5262 fs_info = send_root->fs_info;
4682 5263
4683 /* 5264 /*
5265 * The subvolume must remain read-only during send, protect against
5266 * making it RW.
5267 */
5268 spin_lock(&send_root->root_item_lock);
5269 send_root->send_in_progress++;
5270 spin_unlock(&send_root->root_item_lock);
5271
5272 /*
4684 * This is done when we lookup the root, it should already be complete 5273 * This is done when we lookup the root, it should already be complete
4685 * by the time we get here. 5274 * by the time we get here.
4686 */ 5275 */
4687 WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); 5276 WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
4688 5277
4689 /* 5278 /*
4690 * If we just created this root we need to make sure that the orphan 5279 * Userspace tools do the checks and warn the user if it's
4691 * cleanup has been done and committed since we search the commit root, 5280 * not RO.
4692 * so check its commit root transid with our otransid and if they match
4693 * commit the transaction to make sure everything is updated.
4694 */ 5281 */
4695 down_read(&send_root->fs_info->extent_commit_sem); 5282 if (!btrfs_root_readonly(send_root)) {
4696 if (btrfs_header_generation(send_root->commit_root) == 5283 ret = -EPERM;
4697 btrfs_root_otransid(&send_root->root_item)) { 5284 goto out;
4698 struct btrfs_trans_handle *trans;
4699
4700 up_read(&send_root->fs_info->extent_commit_sem);
4701
4702 trans = btrfs_attach_transaction_barrier(send_root);
4703 if (IS_ERR(trans)) {
4704 if (PTR_ERR(trans) != -ENOENT) {
4705 ret = PTR_ERR(trans);
4706 goto out;
4707 }
4708 /* ENOENT means theres no transaction */
4709 } else {
4710 ret = btrfs_commit_transaction(trans, send_root);
4711 if (ret)
4712 goto out;
4713 }
4714 } else {
4715 up_read(&send_root->fs_info->extent_commit_sem);
4716 } 5285 }
4717 5286
4718 arg = memdup_user(arg_, sizeof(*arg)); 5287 arg = memdup_user(arg_, sizeof(*arg));
@@ -4753,8 +5322,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4753 goto out; 5322 goto out;
4754 } 5323 }
4755 5324
4756 sctx->mnt = mnt_file->f_path.mnt;
4757
4758 sctx->send_root = send_root; 5325 sctx->send_root = send_root;
4759 sctx->clone_roots_cnt = arg->clone_sources_count; 5326 sctx->clone_roots_cnt = arg->clone_sources_count;
4760 5327
@@ -4771,6 +5338,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4771 goto out; 5338 goto out;
4772 } 5339 }
4773 5340
5341 sctx->pending_dir_moves = RB_ROOT;
5342 sctx->waiting_dir_moves = RB_ROOT;
5343
4774 sctx->clone_roots = vzalloc(sizeof(struct clone_root) * 5344 sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
4775 (arg->clone_sources_count + 1)); 5345 (arg->clone_sources_count + 1));
4776 if (!sctx->clone_roots) { 5346 if (!sctx->clone_roots) {
@@ -4798,11 +5368,27 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4798 key.objectid = clone_sources_tmp[i]; 5368 key.objectid = clone_sources_tmp[i];
4799 key.type = BTRFS_ROOT_ITEM_KEY; 5369 key.type = BTRFS_ROOT_ITEM_KEY;
4800 key.offset = (u64)-1; 5370 key.offset = (u64)-1;
5371
5372 index = srcu_read_lock(&fs_info->subvol_srcu);
5373
4801 clone_root = btrfs_read_fs_root_no_name(fs_info, &key); 5374 clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
4802 if (IS_ERR(clone_root)) { 5375 if (IS_ERR(clone_root)) {
5376 srcu_read_unlock(&fs_info->subvol_srcu, index);
4803 ret = PTR_ERR(clone_root); 5377 ret = PTR_ERR(clone_root);
4804 goto out; 5378 goto out;
4805 } 5379 }
5380 clone_sources_to_rollback = i + 1;
5381 spin_lock(&clone_root->root_item_lock);
5382 clone_root->send_in_progress++;
5383 if (!btrfs_root_readonly(clone_root)) {
5384 spin_unlock(&clone_root->root_item_lock);
5385 srcu_read_unlock(&fs_info->subvol_srcu, index);
5386 ret = -EPERM;
5387 goto out;
5388 }
5389 spin_unlock(&clone_root->root_item_lock);
5390 srcu_read_unlock(&fs_info->subvol_srcu, index);
5391
4806 sctx->clone_roots[i].root = clone_root; 5392 sctx->clone_roots[i].root = clone_root;
4807 } 5393 }
4808 vfree(clone_sources_tmp); 5394 vfree(clone_sources_tmp);
@@ -4813,11 +5399,27 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4813 key.objectid = arg->parent_root; 5399 key.objectid = arg->parent_root;
4814 key.type = BTRFS_ROOT_ITEM_KEY; 5400 key.type = BTRFS_ROOT_ITEM_KEY;
4815 key.offset = (u64)-1; 5401 key.offset = (u64)-1;
5402
5403 index = srcu_read_lock(&fs_info->subvol_srcu);
5404
4816 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); 5405 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
4817 if (IS_ERR(sctx->parent_root)) { 5406 if (IS_ERR(sctx->parent_root)) {
5407 srcu_read_unlock(&fs_info->subvol_srcu, index);
4818 ret = PTR_ERR(sctx->parent_root); 5408 ret = PTR_ERR(sctx->parent_root);
4819 goto out; 5409 goto out;
4820 } 5410 }
5411
5412 spin_lock(&sctx->parent_root->root_item_lock);
5413 sctx->parent_root->send_in_progress++;
5414 if (!btrfs_root_readonly(sctx->parent_root)) {
5415 spin_unlock(&sctx->parent_root->root_item_lock);
5416 srcu_read_unlock(&fs_info->subvol_srcu, index);
5417 ret = -EPERM;
5418 goto out;
5419 }
5420 spin_unlock(&sctx->parent_root->root_item_lock);
5421
5422 srcu_read_unlock(&fs_info->subvol_srcu, index);
4821 } 5423 }
4822 5424
4823 /* 5425 /*
@@ -4831,6 +5433,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4831 sort(sctx->clone_roots, sctx->clone_roots_cnt, 5433 sort(sctx->clone_roots, sctx->clone_roots_cnt,
4832 sizeof(*sctx->clone_roots), __clone_root_cmp_sort, 5434 sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
4833 NULL); 5435 NULL);
5436 sort_clone_roots = 1;
4834 5437
4835 ret = send_subvol(sctx); 5438 ret = send_subvol(sctx);
4836 if (ret < 0) 5439 if (ret < 0)
@@ -4846,6 +5449,48 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4846 } 5449 }
4847 5450
4848out: 5451out:
5452 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
5453 while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
5454 struct rb_node *n;
5455 struct pending_dir_move *pm;
5456
5457 n = rb_first(&sctx->pending_dir_moves);
5458 pm = rb_entry(n, struct pending_dir_move, node);
5459 while (!list_empty(&pm->list)) {
5460 struct pending_dir_move *pm2;
5461
5462 pm2 = list_first_entry(&pm->list,
5463 struct pending_dir_move, list);
5464 free_pending_move(sctx, pm2);
5465 }
5466 free_pending_move(sctx, pm);
5467 }
5468
5469 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
5470 while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
5471 struct rb_node *n;
5472 struct waiting_dir_move *dm;
5473
5474 n = rb_first(&sctx->waiting_dir_moves);
5475 dm = rb_entry(n, struct waiting_dir_move, node);
5476 rb_erase(&dm->node, &sctx->waiting_dir_moves);
5477 kfree(dm);
5478 }
5479
5480 if (sort_clone_roots) {
5481 for (i = 0; i < sctx->clone_roots_cnt; i++)
5482 btrfs_root_dec_send_in_progress(
5483 sctx->clone_roots[i].root);
5484 } else {
5485 for (i = 0; sctx && i < clone_sources_to_rollback; i++)
5486 btrfs_root_dec_send_in_progress(
5487 sctx->clone_roots[i].root);
5488
5489 btrfs_root_dec_send_in_progress(send_root);
5490 }
5491 if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
5492 btrfs_root_dec_send_in_progress(sctx->parent_root);
5493
4849 kfree(arg); 5494 kfree(arg);
4850 vfree(clone_sources_tmp); 5495 vfree(clone_sources_tmp);
4851 5496