diff options
Diffstat (limited to 'fs')
35 files changed, 8689 insertions, 631 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 0c4fa2befae..d7fcdba141a 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ | 9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ |
10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ | 10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ |
11 | reada.o backref.o ulist.o | 11 | reada.o backref.o ulist.o qgroup.o send.o |
12 | 12 | ||
13 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o | 13 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o |
14 | btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o | 14 | btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 42704149b72..58b7d14b08e 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -206,10 +206,17 @@ static noinline void run_ordered_completions(struct btrfs_workers *workers, | |||
206 | 206 | ||
207 | work->ordered_func(work); | 207 | work->ordered_func(work); |
208 | 208 | ||
209 | /* now take the lock again and call the freeing code */ | 209 | /* now take the lock again and drop our item from the list */ |
210 | spin_lock(&workers->order_lock); | 210 | spin_lock(&workers->order_lock); |
211 | list_del(&work->order_list); | 211 | list_del(&work->order_list); |
212 | spin_unlock(&workers->order_lock); | ||
213 | |||
214 | /* | ||
215 | * we don't want to call the ordered free functions | ||
216 | * with the lock held though | ||
217 | */ | ||
212 | work->ordered_free(work); | 218 | work->ordered_free(work); |
219 | spin_lock(&workers->order_lock); | ||
213 | } | 220 | } |
214 | 221 | ||
215 | spin_unlock(&workers->order_lock); | 222 | spin_unlock(&workers->order_lock); |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index a383c18e74e..a256f3b2a84 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -773,9 +773,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
773 | */ | 773 | */ |
774 | static int find_parent_nodes(struct btrfs_trans_handle *trans, | 774 | static int find_parent_nodes(struct btrfs_trans_handle *trans, |
775 | struct btrfs_fs_info *fs_info, u64 bytenr, | 775 | struct btrfs_fs_info *fs_info, u64 bytenr, |
776 | u64 delayed_ref_seq, u64 time_seq, | 776 | u64 time_seq, struct ulist *refs, |
777 | struct ulist *refs, struct ulist *roots, | 777 | struct ulist *roots, const u64 *extent_item_pos) |
778 | const u64 *extent_item_pos) | ||
779 | { | 778 | { |
780 | struct btrfs_key key; | 779 | struct btrfs_key key; |
781 | struct btrfs_path *path; | 780 | struct btrfs_path *path; |
@@ -837,7 +836,7 @@ again: | |||
837 | btrfs_put_delayed_ref(&head->node); | 836 | btrfs_put_delayed_ref(&head->node); |
838 | goto again; | 837 | goto again; |
839 | } | 838 | } |
840 | ret = __add_delayed_refs(head, delayed_ref_seq, | 839 | ret = __add_delayed_refs(head, time_seq, |
841 | &prefs_delayed); | 840 | &prefs_delayed); |
842 | mutex_unlock(&head->mutex); | 841 | mutex_unlock(&head->mutex); |
843 | if (ret) { | 842 | if (ret) { |
@@ -981,8 +980,7 @@ static void free_leaf_list(struct ulist *blocks) | |||
981 | */ | 980 | */ |
982 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | 981 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, |
983 | struct btrfs_fs_info *fs_info, u64 bytenr, | 982 | struct btrfs_fs_info *fs_info, u64 bytenr, |
984 | u64 delayed_ref_seq, u64 time_seq, | 983 | u64 time_seq, struct ulist **leafs, |
985 | struct ulist **leafs, | ||
986 | const u64 *extent_item_pos) | 984 | const u64 *extent_item_pos) |
987 | { | 985 | { |
988 | struct ulist *tmp; | 986 | struct ulist *tmp; |
@@ -997,7 +995,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
997 | return -ENOMEM; | 995 | return -ENOMEM; |
998 | } | 996 | } |
999 | 997 | ||
1000 | ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, | 998 | ret = find_parent_nodes(trans, fs_info, bytenr, |
1001 | time_seq, *leafs, tmp, extent_item_pos); | 999 | time_seq, *leafs, tmp, extent_item_pos); |
1002 | ulist_free(tmp); | 1000 | ulist_free(tmp); |
1003 | 1001 | ||
@@ -1024,8 +1022,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
1024 | */ | 1022 | */ |
1025 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 1023 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
1026 | struct btrfs_fs_info *fs_info, u64 bytenr, | 1024 | struct btrfs_fs_info *fs_info, u64 bytenr, |
1027 | u64 delayed_ref_seq, u64 time_seq, | 1025 | u64 time_seq, struct ulist **roots) |
1028 | struct ulist **roots) | ||
1029 | { | 1026 | { |
1030 | struct ulist *tmp; | 1027 | struct ulist *tmp; |
1031 | struct ulist_node *node = NULL; | 1028 | struct ulist_node *node = NULL; |
@@ -1043,7 +1040,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | |||
1043 | 1040 | ||
1044 | ULIST_ITER_INIT(&uiter); | 1041 | ULIST_ITER_INIT(&uiter); |
1045 | while (1) { | 1042 | while (1) { |
1046 | ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, | 1043 | ret = find_parent_nodes(trans, fs_info, bytenr, |
1047 | time_seq, tmp, *roots, NULL); | 1044 | time_seq, tmp, *roots, NULL); |
1048 | if (ret < 0 && ret != -ENOENT) { | 1045 | if (ret < 0 && ret != -ENOENT) { |
1049 | ulist_free(tmp); | 1046 | ulist_free(tmp); |
@@ -1125,10 +1122,10 @@ static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, | |||
1125 | * required for the path to fit into the buffer. in that case, the returned | 1122 | * required for the path to fit into the buffer. in that case, the returned |
1126 | * value will be smaller than dest. callers must check this! | 1123 | * value will be smaller than dest. callers must check this! |
1127 | */ | 1124 | */ |
1128 | static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | 1125 | char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, |
1129 | struct btrfs_inode_ref *iref, | 1126 | struct btrfs_inode_ref *iref, |
1130 | struct extent_buffer *eb_in, u64 parent, | 1127 | struct extent_buffer *eb_in, u64 parent, |
1131 | char *dest, u32 size) | 1128 | char *dest, u32 size) |
1132 | { | 1129 | { |
1133 | u32 len; | 1130 | u32 len; |
1134 | int slot; | 1131 | int slot; |
@@ -1376,11 +1373,9 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1376 | struct ulist *roots = NULL; | 1373 | struct ulist *roots = NULL; |
1377 | struct ulist_node *ref_node = NULL; | 1374 | struct ulist_node *ref_node = NULL; |
1378 | struct ulist_node *root_node = NULL; | 1375 | struct ulist_node *root_node = NULL; |
1379 | struct seq_list seq_elem = {}; | ||
1380 | struct seq_list tree_mod_seq_elem = {}; | 1376 | struct seq_list tree_mod_seq_elem = {}; |
1381 | struct ulist_iterator ref_uiter; | 1377 | struct ulist_iterator ref_uiter; |
1382 | struct ulist_iterator root_uiter; | 1378 | struct ulist_iterator root_uiter; |
1383 | struct btrfs_delayed_ref_root *delayed_refs = NULL; | ||
1384 | 1379 | ||
1385 | pr_debug("resolving all inodes for extent %llu\n", | 1380 | pr_debug("resolving all inodes for extent %llu\n", |
1386 | extent_item_objectid); | 1381 | extent_item_objectid); |
@@ -1391,16 +1386,11 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1391 | trans = btrfs_join_transaction(fs_info->extent_root); | 1386 | trans = btrfs_join_transaction(fs_info->extent_root); |
1392 | if (IS_ERR(trans)) | 1387 | if (IS_ERR(trans)) |
1393 | return PTR_ERR(trans); | 1388 | return PTR_ERR(trans); |
1394 | |||
1395 | delayed_refs = &trans->transaction->delayed_refs; | ||
1396 | spin_lock(&delayed_refs->lock); | ||
1397 | btrfs_get_delayed_seq(delayed_refs, &seq_elem); | ||
1398 | spin_unlock(&delayed_refs->lock); | ||
1399 | btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); | 1389 | btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); |
1400 | } | 1390 | } |
1401 | 1391 | ||
1402 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, | 1392 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, |
1403 | seq_elem.seq, tree_mod_seq_elem.seq, &refs, | 1393 | tree_mod_seq_elem.seq, &refs, |
1404 | &extent_item_pos); | 1394 | &extent_item_pos); |
1405 | if (ret) | 1395 | if (ret) |
1406 | goto out; | 1396 | goto out; |
@@ -1408,8 +1398,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1408 | ULIST_ITER_INIT(&ref_uiter); | 1398 | ULIST_ITER_INIT(&ref_uiter); |
1409 | while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { | 1399 | while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { |
1410 | ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, | 1400 | ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, |
1411 | seq_elem.seq, | 1401 | tree_mod_seq_elem.seq, &roots); |
1412 | tree_mod_seq_elem.seq, &roots); | ||
1413 | if (ret) | 1402 | if (ret) |
1414 | break; | 1403 | break; |
1415 | ULIST_ITER_INIT(&root_uiter); | 1404 | ULIST_ITER_INIT(&root_uiter); |
@@ -1431,7 +1420,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1431 | out: | 1420 | out: |
1432 | if (!search_commit_root) { | 1421 | if (!search_commit_root) { |
1433 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); | 1422 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); |
1434 | btrfs_put_delayed_seq(delayed_refs, &seq_elem); | ||
1435 | btrfs_end_transaction(trans, fs_info->extent_root); | 1423 | btrfs_end_transaction(trans, fs_info->extent_root); |
1436 | } | 1424 | } |
1437 | 1425 | ||
@@ -1543,7 +1531,7 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, | |||
1543 | ipath->fspath->bytes_left - s_ptr : 0; | 1531 | ipath->fspath->bytes_left - s_ptr : 0; |
1544 | 1532 | ||
1545 | fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; | 1533 | fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; |
1546 | fspath = iref_to_path(ipath->fs_root, ipath->btrfs_path, iref, eb, | 1534 | fspath = btrfs_iref_to_path(ipath->fs_root, ipath->btrfs_path, iref, eb, |
1547 | inum, fspath_min, bytes_left); | 1535 | inum, fspath_min, bytes_left); |
1548 | if (IS_ERR(fspath)) | 1536 | if (IS_ERR(fspath)) |
1549 | return PTR_ERR(fspath); | 1537 | return PTR_ERR(fspath); |
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index c18d8ac7b79..032f4dc7eab 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
@@ -21,6 +21,7 @@ | |||
21 | 21 | ||
22 | #include "ioctl.h" | 22 | #include "ioctl.h" |
23 | #include "ulist.h" | 23 | #include "ulist.h" |
24 | #include "extent_io.h" | ||
24 | 25 | ||
25 | #define BTRFS_BACKREF_SEARCH_COMMIT_ROOT ((struct btrfs_trans_handle *)0) | 26 | #define BTRFS_BACKREF_SEARCH_COMMIT_ROOT ((struct btrfs_trans_handle *)0) |
26 | 27 | ||
@@ -58,8 +59,10 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); | |||
58 | 59 | ||
59 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 60 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
60 | struct btrfs_fs_info *fs_info, u64 bytenr, | 61 | struct btrfs_fs_info *fs_info, u64 bytenr, |
61 | u64 delayed_ref_seq, u64 time_seq, | 62 | u64 time_seq, struct ulist **roots); |
62 | struct ulist **roots); | 63 | char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, |
64 | struct btrfs_inode_ref *iref, struct extent_buffer *eb, | ||
65 | u64 parent, char *dest, u32 size); | ||
63 | 66 | ||
64 | struct btrfs_data_container *init_data_container(u32 total_bytes); | 67 | struct btrfs_data_container *init_data_container(u32 total_bytes); |
65 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | 68 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 12394a90d60..5b2ad6bc4fe 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -87,9 +87,6 @@ struct btrfs_inode { | |||
87 | /* node for the red-black tree that links inodes in subvolume root */ | 87 | /* node for the red-black tree that links inodes in subvolume root */ |
88 | struct rb_node rb_node; | 88 | struct rb_node rb_node; |
89 | 89 | ||
90 | /* the space_info for where this inode's data allocations are done */ | ||
91 | struct btrfs_space_info *space_info; | ||
92 | |||
93 | unsigned long runtime_flags; | 90 | unsigned long runtime_flags; |
94 | 91 | ||
95 | /* full 64 bit generation number, struct vfs_inode doesn't have a big | 92 | /* full 64 bit generation number, struct vfs_inode doesn't have a big |
@@ -191,11 +188,14 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size) | |||
191 | BTRFS_I(inode)->disk_i_size = size; | 188 | BTRFS_I(inode)->disk_i_size = size; |
192 | } | 189 | } |
193 | 190 | ||
194 | static inline bool btrfs_is_free_space_inode(struct btrfs_root *root, | 191 | static inline bool btrfs_is_free_space_inode(struct inode *inode) |
195 | struct inode *inode) | ||
196 | { | 192 | { |
197 | if (root == root->fs_info->tree_root || | 193 | struct btrfs_root *root = BTRFS_I(inode)->root; |
198 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) | 194 | |
195 | if (root == root->fs_info->tree_root && | ||
196 | btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID) | ||
197 | return true; | ||
198 | if (BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) | ||
199 | return true; | 199 | return true; |
200 | return false; | 200 | return false; |
201 | } | 201 | } |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index da6e9364a5e..9197e2e3340 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
@@ -1032,6 +1032,7 @@ continue_with_current_leaf_stack_frame: | |||
1032 | struct btrfs_disk_key *disk_key; | 1032 | struct btrfs_disk_key *disk_key; |
1033 | u8 type; | 1033 | u8 type; |
1034 | u32 item_offset; | 1034 | u32 item_offset; |
1035 | u32 item_size; | ||
1035 | 1036 | ||
1036 | if (disk_item_offset + sizeof(struct btrfs_item) > | 1037 | if (disk_item_offset + sizeof(struct btrfs_item) > |
1037 | sf->block_ctx->len) { | 1038 | sf->block_ctx->len) { |
@@ -1047,6 +1048,7 @@ leaf_item_out_of_bounce_error: | |||
1047 | disk_item_offset, | 1048 | disk_item_offset, |
1048 | sizeof(struct btrfs_item)); | 1049 | sizeof(struct btrfs_item)); |
1049 | item_offset = le32_to_cpu(disk_item.offset); | 1050 | item_offset = le32_to_cpu(disk_item.offset); |
1051 | item_size = le32_to_cpu(disk_item.size); | ||
1050 | disk_key = &disk_item.key; | 1052 | disk_key = &disk_item.key; |
1051 | type = disk_key->type; | 1053 | type = disk_key->type; |
1052 | 1054 | ||
@@ -1057,14 +1059,13 @@ leaf_item_out_of_bounce_error: | |||
1057 | 1059 | ||
1058 | root_item_offset = item_offset + | 1060 | root_item_offset = item_offset + |
1059 | offsetof(struct btrfs_leaf, items); | 1061 | offsetof(struct btrfs_leaf, items); |
1060 | if (root_item_offset + | 1062 | if (root_item_offset + item_size > |
1061 | sizeof(struct btrfs_root_item) > | ||
1062 | sf->block_ctx->len) | 1063 | sf->block_ctx->len) |
1063 | goto leaf_item_out_of_bounce_error; | 1064 | goto leaf_item_out_of_bounce_error; |
1064 | btrfsic_read_from_block_data( | 1065 | btrfsic_read_from_block_data( |
1065 | sf->block_ctx, &root_item, | 1066 | sf->block_ctx, &root_item, |
1066 | root_item_offset, | 1067 | root_item_offset, |
1067 | sizeof(struct btrfs_root_item)); | 1068 | item_size); |
1068 | next_bytenr = le64_to_cpu(root_item.bytenr); | 1069 | next_bytenr = le64_to_cpu(root_item.bytenr); |
1069 | 1070 | ||
1070 | sf->error = | 1071 | sf->error = |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 8206b390058..9d7621f271f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -321,7 +321,7 @@ struct tree_mod_root { | |||
321 | struct tree_mod_elem { | 321 | struct tree_mod_elem { |
322 | struct rb_node node; | 322 | struct rb_node node; |
323 | u64 index; /* shifted logical */ | 323 | u64 index; /* shifted logical */ |
324 | struct seq_list elem; | 324 | u64 seq; |
325 | enum mod_log_op op; | 325 | enum mod_log_op op; |
326 | 326 | ||
327 | /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ | 327 | /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ |
@@ -341,20 +341,50 @@ struct tree_mod_elem { | |||
341 | struct tree_mod_root old_root; | 341 | struct tree_mod_root old_root; |
342 | }; | 342 | }; |
343 | 343 | ||
344 | static inline void | 344 | static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info) |
345 | __get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) | ||
346 | { | 345 | { |
347 | elem->seq = atomic_inc_return(&fs_info->tree_mod_seq); | 346 | read_lock(&fs_info->tree_mod_log_lock); |
348 | list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); | ||
349 | } | 347 | } |
350 | 348 | ||
351 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | 349 | static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info) |
352 | struct seq_list *elem) | 350 | { |
351 | read_unlock(&fs_info->tree_mod_log_lock); | ||
352 | } | ||
353 | |||
354 | static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info) | ||
355 | { | ||
356 | write_lock(&fs_info->tree_mod_log_lock); | ||
357 | } | ||
358 | |||
359 | static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info) | ||
360 | { | ||
361 | write_unlock(&fs_info->tree_mod_log_lock); | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * This adds a new blocker to the tree mod log's blocker list if the @elem | ||
366 | * passed does not already have a sequence number set. So when a caller expects | ||
367 | * to record tree modifications, it should ensure to set elem->seq to zero | ||
368 | * before calling btrfs_get_tree_mod_seq. | ||
369 | * Returns a fresh, unused tree log modification sequence number, even if no new | ||
370 | * blocker was added. | ||
371 | */ | ||
372 | u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
373 | struct seq_list *elem) | ||
353 | { | 374 | { |
354 | elem->flags = 1; | 375 | u64 seq; |
376 | |||
377 | tree_mod_log_write_lock(fs_info); | ||
355 | spin_lock(&fs_info->tree_mod_seq_lock); | 378 | spin_lock(&fs_info->tree_mod_seq_lock); |
356 | __get_tree_mod_seq(fs_info, elem); | 379 | if (!elem->seq) { |
380 | elem->seq = btrfs_inc_tree_mod_seq(fs_info); | ||
381 | list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); | ||
382 | } | ||
383 | seq = btrfs_inc_tree_mod_seq(fs_info); | ||
357 | spin_unlock(&fs_info->tree_mod_seq_lock); | 384 | spin_unlock(&fs_info->tree_mod_seq_lock); |
385 | tree_mod_log_write_unlock(fs_info); | ||
386 | |||
387 | return seq; | ||
358 | } | 388 | } |
359 | 389 | ||
360 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | 390 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, |
@@ -371,41 +401,46 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | |||
371 | if (!seq_putting) | 401 | if (!seq_putting) |
372 | return; | 402 | return; |
373 | 403 | ||
374 | BUG_ON(!(elem->flags & 1)); | ||
375 | spin_lock(&fs_info->tree_mod_seq_lock); | 404 | spin_lock(&fs_info->tree_mod_seq_lock); |
376 | list_del(&elem->list); | 405 | list_del(&elem->list); |
406 | elem->seq = 0; | ||
377 | 407 | ||
378 | list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { | 408 | list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { |
379 | if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) { | 409 | if (cur_elem->seq < min_seq) { |
380 | if (seq_putting > cur_elem->seq) { | 410 | if (seq_putting > cur_elem->seq) { |
381 | /* | 411 | /* |
382 | * blocker with lower sequence number exists, we | 412 | * blocker with lower sequence number exists, we |
383 | * cannot remove anything from the log | 413 | * cannot remove anything from the log |
384 | */ | 414 | */ |
385 | goto out; | 415 | spin_unlock(&fs_info->tree_mod_seq_lock); |
416 | return; | ||
386 | } | 417 | } |
387 | min_seq = cur_elem->seq; | 418 | min_seq = cur_elem->seq; |
388 | } | 419 | } |
389 | } | 420 | } |
421 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
422 | |||
423 | /* | ||
424 | * we removed the lowest blocker from the blocker list, so there may be | ||
425 | * more processible delayed refs. | ||
426 | */ | ||
427 | wake_up(&fs_info->tree_mod_seq_wait); | ||
390 | 428 | ||
391 | /* | 429 | /* |
392 | * anything that's lower than the lowest existing (read: blocked) | 430 | * anything that's lower than the lowest existing (read: blocked) |
393 | * sequence number can be removed from the tree. | 431 | * sequence number can be removed from the tree. |
394 | */ | 432 | */ |
395 | write_lock(&fs_info->tree_mod_log_lock); | 433 | tree_mod_log_write_lock(fs_info); |
396 | tm_root = &fs_info->tree_mod_log; | 434 | tm_root = &fs_info->tree_mod_log; |
397 | for (node = rb_first(tm_root); node; node = next) { | 435 | for (node = rb_first(tm_root); node; node = next) { |
398 | next = rb_next(node); | 436 | next = rb_next(node); |
399 | tm = container_of(node, struct tree_mod_elem, node); | 437 | tm = container_of(node, struct tree_mod_elem, node); |
400 | if (tm->elem.seq > min_seq) | 438 | if (tm->seq > min_seq) |
401 | continue; | 439 | continue; |
402 | rb_erase(node, tm_root); | 440 | rb_erase(node, tm_root); |
403 | list_del(&tm->elem.list); | ||
404 | kfree(tm); | 441 | kfree(tm); |
405 | } | 442 | } |
406 | write_unlock(&fs_info->tree_mod_log_lock); | 443 | tree_mod_log_write_unlock(fs_info); |
407 | out: | ||
408 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
409 | } | 444 | } |
410 | 445 | ||
411 | /* | 446 | /* |
@@ -423,11 +458,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) | |||
423 | struct rb_node **new; | 458 | struct rb_node **new; |
424 | struct rb_node *parent = NULL; | 459 | struct rb_node *parent = NULL; |
425 | struct tree_mod_elem *cur; | 460 | struct tree_mod_elem *cur; |
426 | int ret = 0; | ||
427 | 461 | ||
428 | BUG_ON(!tm || !tm->elem.seq); | 462 | BUG_ON(!tm || !tm->seq); |
429 | 463 | ||
430 | write_lock(&fs_info->tree_mod_log_lock); | ||
431 | tm_root = &fs_info->tree_mod_log; | 464 | tm_root = &fs_info->tree_mod_log; |
432 | new = &tm_root->rb_node; | 465 | new = &tm_root->rb_node; |
433 | while (*new) { | 466 | while (*new) { |
@@ -437,88 +470,81 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) | |||
437 | new = &((*new)->rb_left); | 470 | new = &((*new)->rb_left); |
438 | else if (cur->index > tm->index) | 471 | else if (cur->index > tm->index) |
439 | new = &((*new)->rb_right); | 472 | new = &((*new)->rb_right); |
440 | else if (cur->elem.seq < tm->elem.seq) | 473 | else if (cur->seq < tm->seq) |
441 | new = &((*new)->rb_left); | 474 | new = &((*new)->rb_left); |
442 | else if (cur->elem.seq > tm->elem.seq) | 475 | else if (cur->seq > tm->seq) |
443 | new = &((*new)->rb_right); | 476 | new = &((*new)->rb_right); |
444 | else { | 477 | else { |
445 | kfree(tm); | 478 | kfree(tm); |
446 | ret = -EEXIST; | 479 | return -EEXIST; |
447 | goto unlock; | ||
448 | } | 480 | } |
449 | } | 481 | } |
450 | 482 | ||
451 | rb_link_node(&tm->node, parent, new); | 483 | rb_link_node(&tm->node, parent, new); |
452 | rb_insert_color(&tm->node, tm_root); | 484 | rb_insert_color(&tm->node, tm_root); |
453 | unlock: | 485 | return 0; |
454 | write_unlock(&fs_info->tree_mod_log_lock); | ||
455 | return ret; | ||
456 | } | 486 | } |
457 | 487 | ||
488 | /* | ||
489 | * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it | ||
490 | * returns zero with the tree_mod_log_lock acquired. The caller must hold | ||
491 | * this until all tree mod log insertions are recorded in the rb tree and then | ||
492 | * call tree_mod_log_write_unlock() to release. | ||
493 | */ | ||
458 | static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, | 494 | static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, |
459 | struct extent_buffer *eb) { | 495 | struct extent_buffer *eb) { |
460 | smp_mb(); | 496 | smp_mb(); |
461 | if (list_empty(&(fs_info)->tree_mod_seq_list)) | 497 | if (list_empty(&(fs_info)->tree_mod_seq_list)) |
462 | return 1; | 498 | return 1; |
463 | if (!eb) | 499 | if (eb && btrfs_header_level(eb) == 0) |
464 | return 0; | 500 | return 1; |
465 | if (btrfs_header_level(eb) == 0) | 501 | |
502 | tree_mod_log_write_lock(fs_info); | ||
503 | if (list_empty(&fs_info->tree_mod_seq_list)) { | ||
504 | /* | ||
505 | * someone emptied the list while we were waiting for the lock. | ||
506 | * we must not add to the list when no blocker exists. | ||
507 | */ | ||
508 | tree_mod_log_write_unlock(fs_info); | ||
466 | return 1; | 509 | return 1; |
510 | } | ||
511 | |||
467 | return 0; | 512 | return 0; |
468 | } | 513 | } |
469 | 514 | ||
470 | /* | 515 | /* |
471 | * This allocates memory and gets a tree modification sequence number when | 516 | * This allocates memory and gets a tree modification sequence number. |
472 | * needed. | ||
473 | * | 517 | * |
474 | * Returns 0 when no sequence number is needed, < 0 on error. | 518 | * Returns <0 on error. |
475 | * Returns 1 when a sequence number was added. In this case, | 519 | * Returns >0 (the added sequence number) on success. |
476 | * fs_info->tree_mod_seq_lock was acquired and must be released by the caller | ||
477 | * after inserting into the rb tree. | ||
478 | */ | 520 | */ |
479 | static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, | 521 | static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, |
480 | struct tree_mod_elem **tm_ret) | 522 | struct tree_mod_elem **tm_ret) |
481 | { | 523 | { |
482 | struct tree_mod_elem *tm; | 524 | struct tree_mod_elem *tm; |
483 | int seq; | ||
484 | 525 | ||
485 | if (tree_mod_dont_log(fs_info, NULL)) | 526 | /* |
486 | return 0; | 527 | * once we switch from spin locks to something different, we should |
487 | 528 | * honor the flags parameter here. | |
488 | tm = *tm_ret = kzalloc(sizeof(*tm), flags); | 529 | */ |
530 | tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC); | ||
489 | if (!tm) | 531 | if (!tm) |
490 | return -ENOMEM; | 532 | return -ENOMEM; |
491 | 533 | ||
492 | tm->elem.flags = 0; | 534 | tm->seq = btrfs_inc_tree_mod_seq(fs_info); |
493 | spin_lock(&fs_info->tree_mod_seq_lock); | 535 | return tm->seq; |
494 | if (list_empty(&fs_info->tree_mod_seq_list)) { | ||
495 | /* | ||
496 | * someone emptied the list while we were waiting for the lock. | ||
497 | * we must not add to the list, because no blocker exists. items | ||
498 | * are removed from the list only when the existing blocker is | ||
499 | * removed from the list. | ||
500 | */ | ||
501 | kfree(tm); | ||
502 | seq = 0; | ||
503 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
504 | } else { | ||
505 | __get_tree_mod_seq(fs_info, &tm->elem); | ||
506 | seq = tm->elem.seq; | ||
507 | } | ||
508 | |||
509 | return seq; | ||
510 | } | 536 | } |
511 | 537 | ||
512 | static noinline int | 538 | static inline int |
513 | tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, | 539 | __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, |
514 | struct extent_buffer *eb, int slot, | 540 | struct extent_buffer *eb, int slot, |
515 | enum mod_log_op op, gfp_t flags) | 541 | enum mod_log_op op, gfp_t flags) |
516 | { | 542 | { |
517 | struct tree_mod_elem *tm; | ||
518 | int ret; | 543 | int ret; |
544 | struct tree_mod_elem *tm; | ||
519 | 545 | ||
520 | ret = tree_mod_alloc(fs_info, flags, &tm); | 546 | ret = tree_mod_alloc(fs_info, flags, &tm); |
521 | if (ret <= 0) | 547 | if (ret < 0) |
522 | return ret; | 548 | return ret; |
523 | 549 | ||
524 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | 550 | tm->index = eb->start >> PAGE_CACHE_SHIFT; |
@@ -530,8 +556,22 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, | |||
530 | tm->slot = slot; | 556 | tm->slot = slot; |
531 | tm->generation = btrfs_node_ptr_generation(eb, slot); | 557 | tm->generation = btrfs_node_ptr_generation(eb, slot); |
532 | 558 | ||
533 | ret = __tree_mod_log_insert(fs_info, tm); | 559 | return __tree_mod_log_insert(fs_info, tm); |
534 | spin_unlock(&fs_info->tree_mod_seq_lock); | 560 | } |
561 | |||
562 | static noinline int | ||
563 | tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, | ||
564 | struct extent_buffer *eb, int slot, | ||
565 | enum mod_log_op op, gfp_t flags) | ||
566 | { | ||
567 | int ret; | ||
568 | |||
569 | if (tree_mod_dont_log(fs_info, eb)) | ||
570 | return 0; | ||
571 | |||
572 | ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); | ||
573 | |||
574 | tree_mod_log_write_unlock(fs_info); | ||
535 | return ret; | 575 | return ret; |
536 | } | 576 | } |
537 | 577 | ||
@@ -543,6 +583,14 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | |||
543 | } | 583 | } |
544 | 584 | ||
545 | static noinline int | 585 | static noinline int |
586 | tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info, | ||
587 | struct extent_buffer *eb, int slot, | ||
588 | enum mod_log_op op) | ||
589 | { | ||
590 | return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS); | ||
591 | } | ||
592 | |||
593 | static noinline int | ||
546 | tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | 594 | tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, |
547 | struct extent_buffer *eb, int dst_slot, int src_slot, | 595 | struct extent_buffer *eb, int dst_slot, int src_slot, |
548 | int nr_items, gfp_t flags) | 596 | int nr_items, gfp_t flags) |
@@ -555,14 +603,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | |||
555 | return 0; | 603 | return 0; |
556 | 604 | ||
557 | for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { | 605 | for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { |
558 | ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, | 606 | ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, |
559 | MOD_LOG_KEY_REMOVE_WHILE_MOVING); | 607 | MOD_LOG_KEY_REMOVE_WHILE_MOVING); |
560 | BUG_ON(ret < 0); | 608 | BUG_ON(ret < 0); |
561 | } | 609 | } |
562 | 610 | ||
563 | ret = tree_mod_alloc(fs_info, flags, &tm); | 611 | ret = tree_mod_alloc(fs_info, flags, &tm); |
564 | if (ret <= 0) | 612 | if (ret < 0) |
565 | return ret; | 613 | goto out; |
566 | 614 | ||
567 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | 615 | tm->index = eb->start >> PAGE_CACHE_SHIFT; |
568 | tm->slot = src_slot; | 616 | tm->slot = src_slot; |
@@ -571,10 +619,26 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | |||
571 | tm->op = MOD_LOG_MOVE_KEYS; | 619 | tm->op = MOD_LOG_MOVE_KEYS; |
572 | 620 | ||
573 | ret = __tree_mod_log_insert(fs_info, tm); | 621 | ret = __tree_mod_log_insert(fs_info, tm); |
574 | spin_unlock(&fs_info->tree_mod_seq_lock); | 622 | out: |
623 | tree_mod_log_write_unlock(fs_info); | ||
575 | return ret; | 624 | return ret; |
576 | } | 625 | } |
577 | 626 | ||
627 | static inline void | ||
628 | __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) | ||
629 | { | ||
630 | int i; | ||
631 | u32 nritems; | ||
632 | int ret; | ||
633 | |||
634 | nritems = btrfs_header_nritems(eb); | ||
635 | for (i = nritems - 1; i >= 0; i--) { | ||
636 | ret = tree_mod_log_insert_key_locked(fs_info, eb, i, | ||
637 | MOD_LOG_KEY_REMOVE_WHILE_FREEING); | ||
638 | BUG_ON(ret < 0); | ||
639 | } | ||
640 | } | ||
641 | |||
578 | static noinline int | 642 | static noinline int |
579 | tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | 643 | tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, |
580 | struct extent_buffer *old_root, | 644 | struct extent_buffer *old_root, |
@@ -583,9 +647,14 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | |||
583 | struct tree_mod_elem *tm; | 647 | struct tree_mod_elem *tm; |
584 | int ret; | 648 | int ret; |
585 | 649 | ||
650 | if (tree_mod_dont_log(fs_info, NULL)) | ||
651 | return 0; | ||
652 | |||
653 | __tree_mod_log_free_eb(fs_info, old_root); | ||
654 | |||
586 | ret = tree_mod_alloc(fs_info, flags, &tm); | 655 | ret = tree_mod_alloc(fs_info, flags, &tm); |
587 | if (ret <= 0) | 656 | if (ret < 0) |
588 | return ret; | 657 | goto out; |
589 | 658 | ||
590 | tm->index = new_root->start >> PAGE_CACHE_SHIFT; | 659 | tm->index = new_root->start >> PAGE_CACHE_SHIFT; |
591 | tm->old_root.logical = old_root->start; | 660 | tm->old_root.logical = old_root->start; |
@@ -594,7 +663,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | |||
594 | tm->op = MOD_LOG_ROOT_REPLACE; | 663 | tm->op = MOD_LOG_ROOT_REPLACE; |
595 | 664 | ||
596 | ret = __tree_mod_log_insert(fs_info, tm); | 665 | ret = __tree_mod_log_insert(fs_info, tm); |
597 | spin_unlock(&fs_info->tree_mod_seq_lock); | 666 | out: |
667 | tree_mod_log_write_unlock(fs_info); | ||
598 | return ret; | 668 | return ret; |
599 | } | 669 | } |
600 | 670 | ||
@@ -608,7 +678,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, | |||
608 | struct tree_mod_elem *found = NULL; | 678 | struct tree_mod_elem *found = NULL; |
609 | u64 index = start >> PAGE_CACHE_SHIFT; | 679 | u64 index = start >> PAGE_CACHE_SHIFT; |
610 | 680 | ||
611 | read_lock(&fs_info->tree_mod_log_lock); | 681 | tree_mod_log_read_lock(fs_info); |
612 | tm_root = &fs_info->tree_mod_log; | 682 | tm_root = &fs_info->tree_mod_log; |
613 | node = tm_root->rb_node; | 683 | node = tm_root->rb_node; |
614 | while (node) { | 684 | while (node) { |
@@ -617,18 +687,18 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, | |||
617 | node = node->rb_left; | 687 | node = node->rb_left; |
618 | } else if (cur->index > index) { | 688 | } else if (cur->index > index) { |
619 | node = node->rb_right; | 689 | node = node->rb_right; |
620 | } else if (cur->elem.seq < min_seq) { | 690 | } else if (cur->seq < min_seq) { |
621 | node = node->rb_left; | 691 | node = node->rb_left; |
622 | } else if (!smallest) { | 692 | } else if (!smallest) { |
623 | /* we want the node with the highest seq */ | 693 | /* we want the node with the highest seq */ |
624 | if (found) | 694 | if (found) |
625 | BUG_ON(found->elem.seq > cur->elem.seq); | 695 | BUG_ON(found->seq > cur->seq); |
626 | found = cur; | 696 | found = cur; |
627 | node = node->rb_left; | 697 | node = node->rb_left; |
628 | } else if (cur->elem.seq > min_seq) { | 698 | } else if (cur->seq > min_seq) { |
629 | /* we want the node with the smallest seq */ | 699 | /* we want the node with the smallest seq */ |
630 | if (found) | 700 | if (found) |
631 | BUG_ON(found->elem.seq < cur->elem.seq); | 701 | BUG_ON(found->seq < cur->seq); |
632 | found = cur; | 702 | found = cur; |
633 | node = node->rb_right; | 703 | node = node->rb_right; |
634 | } else { | 704 | } else { |
@@ -636,7 +706,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, | |||
636 | break; | 706 | break; |
637 | } | 707 | } |
638 | } | 708 | } |
639 | read_unlock(&fs_info->tree_mod_log_lock); | 709 | tree_mod_log_read_unlock(fs_info); |
640 | 710 | ||
641 | return found; | 711 | return found; |
642 | } | 712 | } |
@@ -664,7 +734,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) | |||
664 | return __tree_mod_log_search(fs_info, start, min_seq, 0); | 734 | return __tree_mod_log_search(fs_info, start, min_seq, 0); |
665 | } | 735 | } |
666 | 736 | ||
667 | static inline void | 737 | static noinline void |
668 | tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | 738 | tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, |
669 | struct extent_buffer *src, unsigned long dst_offset, | 739 | struct extent_buffer *src, unsigned long dst_offset, |
670 | unsigned long src_offset, int nr_items) | 740 | unsigned long src_offset, int nr_items) |
@@ -675,18 +745,23 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | |||
675 | if (tree_mod_dont_log(fs_info, NULL)) | 745 | if (tree_mod_dont_log(fs_info, NULL)) |
676 | return; | 746 | return; |
677 | 747 | ||
678 | if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) | 748 | if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) { |
749 | tree_mod_log_write_unlock(fs_info); | ||
679 | return; | 750 | return; |
751 | } | ||
680 | 752 | ||
681 | /* speed this up by single seq for all operations? */ | ||
682 | for (i = 0; i < nr_items; i++) { | 753 | for (i = 0; i < nr_items; i++) { |
683 | ret = tree_mod_log_insert_key(fs_info, src, i + src_offset, | 754 | ret = tree_mod_log_insert_key_locked(fs_info, src, |
684 | MOD_LOG_KEY_REMOVE); | 755 | i + src_offset, |
756 | MOD_LOG_KEY_REMOVE); | ||
685 | BUG_ON(ret < 0); | 757 | BUG_ON(ret < 0); |
686 | ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset, | 758 | ret = tree_mod_log_insert_key_locked(fs_info, dst, |
687 | MOD_LOG_KEY_ADD); | 759 | i + dst_offset, |
760 | MOD_LOG_KEY_ADD); | ||
688 | BUG_ON(ret < 0); | 761 | BUG_ON(ret < 0); |
689 | } | 762 | } |
763 | |||
764 | tree_mod_log_write_unlock(fs_info); | ||
690 | } | 765 | } |
691 | 766 | ||
692 | static inline void | 767 | static inline void |
@@ -699,7 +774,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | |||
699 | BUG_ON(ret < 0); | 774 | BUG_ON(ret < 0); |
700 | } | 775 | } |
701 | 776 | ||
702 | static inline void | 777 | static noinline void |
703 | tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, | 778 | tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, |
704 | struct extent_buffer *eb, | 779 | struct extent_buffer *eb, |
705 | struct btrfs_disk_key *disk_key, int slot, int atomic) | 780 | struct btrfs_disk_key *disk_key, int slot, int atomic) |
@@ -712,30 +787,22 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, | |||
712 | BUG_ON(ret < 0); | 787 | BUG_ON(ret < 0); |
713 | } | 788 | } |
714 | 789 | ||
715 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | 790 | static noinline void |
716 | struct extent_buffer *eb) | 791 | tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) |
717 | { | 792 | { |
718 | int i; | ||
719 | int ret; | ||
720 | u32 nritems; | ||
721 | |||
722 | if (tree_mod_dont_log(fs_info, eb)) | 793 | if (tree_mod_dont_log(fs_info, eb)) |
723 | return; | 794 | return; |
724 | 795 | ||
725 | nritems = btrfs_header_nritems(eb); | 796 | __tree_mod_log_free_eb(fs_info, eb); |
726 | for (i = nritems - 1; i >= 0; i--) { | 797 | |
727 | ret = tree_mod_log_insert_key(fs_info, eb, i, | 798 | tree_mod_log_write_unlock(fs_info); |
728 | MOD_LOG_KEY_REMOVE_WHILE_FREEING); | ||
729 | BUG_ON(ret < 0); | ||
730 | } | ||
731 | } | 799 | } |
732 | 800 | ||
733 | static inline void | 801 | static noinline void |
734 | tree_mod_log_set_root_pointer(struct btrfs_root *root, | 802 | tree_mod_log_set_root_pointer(struct btrfs_root *root, |
735 | struct extent_buffer *new_root_node) | 803 | struct extent_buffer *new_root_node) |
736 | { | 804 | { |
737 | int ret; | 805 | int ret; |
738 | tree_mod_log_free_eb(root->fs_info, root->node); | ||
739 | ret = tree_mod_log_insert_root(root->fs_info, root->node, | 806 | ret = tree_mod_log_insert_root(root->fs_info, root->node, |
740 | new_root_node, GFP_NOFS); | 807 | new_root_node, GFP_NOFS); |
741 | BUG_ON(ret < 0); | 808 | BUG_ON(ret < 0); |
@@ -1069,7 +1136,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | |||
1069 | unsigned long p_size = sizeof(struct btrfs_key_ptr); | 1136 | unsigned long p_size = sizeof(struct btrfs_key_ptr); |
1070 | 1137 | ||
1071 | n = btrfs_header_nritems(eb); | 1138 | n = btrfs_header_nritems(eb); |
1072 | while (tm && tm->elem.seq >= time_seq) { | 1139 | while (tm && tm->seq >= time_seq) { |
1073 | /* | 1140 | /* |
1074 | * all the operations are recorded with the operator used for | 1141 | * all the operations are recorded with the operator used for |
1075 | * the modification. as we're going backwards, we do the | 1142 | * the modification. as we're going backwards, we do the |
@@ -2722,6 +2789,80 @@ done: | |||
2722 | } | 2789 | } |
2723 | 2790 | ||
2724 | /* | 2791 | /* |
2792 | * helper to use instead of search slot if no exact match is needed but | ||
2793 | * instead the next or previous item should be returned. | ||
2794 | * When find_higher is true, the next higher item is returned, the next lower | ||
2795 | * otherwise. | ||
2796 | * When return_any and find_higher are both true, and no higher item is found, | ||
2797 | * return the next lower instead. | ||
2798 | * When return_any is true and find_higher is false, and no lower item is found, | ||
2799 | * return the next higher instead. | ||
2800 | * It returns 0 if any item is found, 1 if none is found (tree empty), and | ||
2801 | * < 0 on error | ||
2802 | */ | ||
2803 | int btrfs_search_slot_for_read(struct btrfs_root *root, | ||
2804 | struct btrfs_key *key, struct btrfs_path *p, | ||
2805 | int find_higher, int return_any) | ||
2806 | { | ||
2807 | int ret; | ||
2808 | struct extent_buffer *leaf; | ||
2809 | |||
2810 | again: | ||
2811 | ret = btrfs_search_slot(NULL, root, key, p, 0, 0); | ||
2812 | if (ret <= 0) | ||
2813 | return ret; | ||
2814 | /* | ||
2815 | * a return value of 1 means the path is at the position where the | ||
2816 | * item should be inserted. Normally this is the next bigger item, | ||
2817 | * but in case the previous item is the last in a leaf, path points | ||
2818 | * to the first free slot in the previous leaf, i.e. at an invalid | ||
2819 | * item. | ||
2820 | */ | ||
2821 | leaf = p->nodes[0]; | ||
2822 | |||
2823 | if (find_higher) { | ||
2824 | if (p->slots[0] >= btrfs_header_nritems(leaf)) { | ||
2825 | ret = btrfs_next_leaf(root, p); | ||
2826 | if (ret <= 0) | ||
2827 | return ret; | ||
2828 | if (!return_any) | ||
2829 | return 1; | ||
2830 | /* | ||
2831 | * no higher item found, return the next | ||
2832 | * lower instead | ||
2833 | */ | ||
2834 | return_any = 0; | ||
2835 | find_higher = 0; | ||
2836 | btrfs_release_path(p); | ||
2837 | goto again; | ||
2838 | } | ||
2839 | } else { | ||
2840 | if (p->slots[0] == 0) { | ||
2841 | ret = btrfs_prev_leaf(root, p); | ||
2842 | if (ret < 0) | ||
2843 | return ret; | ||
2844 | if (!ret) { | ||
2845 | p->slots[0] = btrfs_header_nritems(leaf) - 1; | ||
2846 | return 0; | ||
2847 | } | ||
2848 | if (!return_any) | ||
2849 | return 1; | ||
2850 | /* | ||
2851 | * no lower item found, return the next | ||
2852 | * higher instead | ||
2853 | */ | ||
2854 | return_any = 0; | ||
2855 | find_higher = 1; | ||
2856 | btrfs_release_path(p); | ||
2857 | goto again; | ||
2858 | } else { | ||
2859 | --p->slots[0]; | ||
2860 | } | ||
2861 | } | ||
2862 | return 0; | ||
2863 | } | ||
2864 | |||
2865 | /* | ||
2725 | * adjust the pointers going up the tree, starting at level | 2866 | * adjust the pointers going up the tree, starting at level |
2726 | * making sure the right key of each node is points to 'key'. | 2867 | * making sure the right key of each node is points to 'key'. |
2727 | * This is used after shifting pointers to the left, so it stops | 2868 | * This is used after shifting pointers to the left, so it stops |
@@ -4931,6 +5072,431 @@ out: | |||
4931 | return ret; | 5072 | return ret; |
4932 | } | 5073 | } |
4933 | 5074 | ||
5075 | static void tree_move_down(struct btrfs_root *root, | ||
5076 | struct btrfs_path *path, | ||
5077 | int *level, int root_level) | ||
5078 | { | ||
5079 | path->nodes[*level - 1] = read_node_slot(root, path->nodes[*level], | ||
5080 | path->slots[*level]); | ||
5081 | path->slots[*level - 1] = 0; | ||
5082 | (*level)--; | ||
5083 | } | ||
5084 | |||
5085 | static int tree_move_next_or_upnext(struct btrfs_root *root, | ||
5086 | struct btrfs_path *path, | ||
5087 | int *level, int root_level) | ||
5088 | { | ||
5089 | int ret = 0; | ||
5090 | int nritems; | ||
5091 | nritems = btrfs_header_nritems(path->nodes[*level]); | ||
5092 | |||
5093 | path->slots[*level]++; | ||
5094 | |||
5095 | while (path->slots[*level] == nritems) { | ||
5096 | if (*level == root_level) | ||
5097 | return -1; | ||
5098 | |||
5099 | /* move upnext */ | ||
5100 | path->slots[*level] = 0; | ||
5101 | free_extent_buffer(path->nodes[*level]); | ||
5102 | path->nodes[*level] = NULL; | ||
5103 | (*level)++; | ||
5104 | path->slots[*level]++; | ||
5105 | |||
5106 | nritems = btrfs_header_nritems(path->nodes[*level]); | ||
5107 | ret = 1; | ||
5108 | } | ||
5109 | return ret; | ||
5110 | } | ||
5111 | |||
5112 | /* | ||
5113 | * Returns 1 if it had to move up and next. 0 is returned if it moved only next | ||
5114 | * or down. | ||
5115 | */ | ||
5116 | static int tree_advance(struct btrfs_root *root, | ||
5117 | struct btrfs_path *path, | ||
5118 | int *level, int root_level, | ||
5119 | int allow_down, | ||
5120 | struct btrfs_key *key) | ||
5121 | { | ||
5122 | int ret; | ||
5123 | |||
5124 | if (*level == 0 || !allow_down) { | ||
5125 | ret = tree_move_next_or_upnext(root, path, level, root_level); | ||
5126 | } else { | ||
5127 | tree_move_down(root, path, level, root_level); | ||
5128 | ret = 0; | ||
5129 | } | ||
5130 | if (ret >= 0) { | ||
5131 | if (*level == 0) | ||
5132 | btrfs_item_key_to_cpu(path->nodes[*level], key, | ||
5133 | path->slots[*level]); | ||
5134 | else | ||
5135 | btrfs_node_key_to_cpu(path->nodes[*level], key, | ||
5136 | path->slots[*level]); | ||
5137 | } | ||
5138 | return ret; | ||
5139 | } | ||
5140 | |||
5141 | static int tree_compare_item(struct btrfs_root *left_root, | ||
5142 | struct btrfs_path *left_path, | ||
5143 | struct btrfs_path *right_path, | ||
5144 | char *tmp_buf) | ||
5145 | { | ||
5146 | int cmp; | ||
5147 | int len1, len2; | ||
5148 | unsigned long off1, off2; | ||
5149 | |||
5150 | len1 = btrfs_item_size_nr(left_path->nodes[0], left_path->slots[0]); | ||
5151 | len2 = btrfs_item_size_nr(right_path->nodes[0], right_path->slots[0]); | ||
5152 | if (len1 != len2) | ||
5153 | return 1; | ||
5154 | |||
5155 | off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]); | ||
5156 | off2 = btrfs_item_ptr_offset(right_path->nodes[0], | ||
5157 | right_path->slots[0]); | ||
5158 | |||
5159 | read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1); | ||
5160 | |||
5161 | cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1); | ||
5162 | if (cmp) | ||
5163 | return 1; | ||
5164 | return 0; | ||
5165 | } | ||
5166 | |||
5167 | #define ADVANCE 1 | ||
5168 | #define ADVANCE_ONLY_NEXT -1 | ||
5169 | |||
5170 | /* | ||
5171 | * This function compares two trees and calls the provided callback for | ||
5172 | * every changed/new/deleted item it finds. | ||
5173 | * If shared tree blocks are encountered, whole subtrees are skipped, making | ||
5174 | * the compare pretty fast on snapshotted subvolumes. | ||
5175 | * | ||
5176 | * This currently works on commit roots only. As commit roots are read only, | ||
5177 | * we don't do any locking. The commit roots are protected with transactions. | ||
5178 | * Transactions are ended and rejoined when a commit is tried in between. | ||
5179 | * | ||
5180 | * This function checks for modifications done to the trees while comparing. | ||
5181 | * If it detects a change, it aborts immediately. | ||
5182 | */ | ||
5183 | int btrfs_compare_trees(struct btrfs_root *left_root, | ||
5184 | struct btrfs_root *right_root, | ||
5185 | btrfs_changed_cb_t changed_cb, void *ctx) | ||
5186 | { | ||
5187 | int ret; | ||
5188 | int cmp; | ||
5189 | struct btrfs_trans_handle *trans = NULL; | ||
5190 | struct btrfs_path *left_path = NULL; | ||
5191 | struct btrfs_path *right_path = NULL; | ||
5192 | struct btrfs_key left_key; | ||
5193 | struct btrfs_key right_key; | ||
5194 | char *tmp_buf = NULL; | ||
5195 | int left_root_level; | ||
5196 | int right_root_level; | ||
5197 | int left_level; | ||
5198 | int right_level; | ||
5199 | int left_end_reached; | ||
5200 | int right_end_reached; | ||
5201 | int advance_left; | ||
5202 | int advance_right; | ||
5203 | u64 left_blockptr; | ||
5204 | u64 right_blockptr; | ||
5205 | u64 left_start_ctransid; | ||
5206 | u64 right_start_ctransid; | ||
5207 | u64 ctransid; | ||
5208 | |||
5209 | left_path = btrfs_alloc_path(); | ||
5210 | if (!left_path) { | ||
5211 | ret = -ENOMEM; | ||
5212 | goto out; | ||
5213 | } | ||
5214 | right_path = btrfs_alloc_path(); | ||
5215 | if (!right_path) { | ||
5216 | ret = -ENOMEM; | ||
5217 | goto out; | ||
5218 | } | ||
5219 | |||
5220 | tmp_buf = kmalloc(left_root->leafsize, GFP_NOFS); | ||
5221 | if (!tmp_buf) { | ||
5222 | ret = -ENOMEM; | ||
5223 | goto out; | ||
5224 | } | ||
5225 | |||
5226 | left_path->search_commit_root = 1; | ||
5227 | left_path->skip_locking = 1; | ||
5228 | right_path->search_commit_root = 1; | ||
5229 | right_path->skip_locking = 1; | ||
5230 | |||
5231 | spin_lock(&left_root->root_times_lock); | ||
5232 | left_start_ctransid = btrfs_root_ctransid(&left_root->root_item); | ||
5233 | spin_unlock(&left_root->root_times_lock); | ||
5234 | |||
5235 | spin_lock(&right_root->root_times_lock); | ||
5236 | right_start_ctransid = btrfs_root_ctransid(&right_root->root_item); | ||
5237 | spin_unlock(&right_root->root_times_lock); | ||
5238 | |||
5239 | trans = btrfs_join_transaction(left_root); | ||
5240 | if (IS_ERR(trans)) { | ||
5241 | ret = PTR_ERR(trans); | ||
5242 | trans = NULL; | ||
5243 | goto out; | ||
5244 | } | ||
5245 | |||
5246 | /* | ||
5247 | * Strategy: Go to the first items of both trees. Then do | ||
5248 | * | ||
5249 | * If both trees are at level 0 | ||
5250 | * Compare keys of current items | ||
5251 | * If left < right treat left item as new, advance left tree | ||
5252 | * and repeat | ||
5253 | * If left > right treat right item as deleted, advance right tree | ||
5254 | * and repeat | ||
5255 | * If left == right do deep compare of items, treat as changed if | ||
5256 | * needed, advance both trees and repeat | ||
5257 | * If both trees are at the same level but not at level 0 | ||
5258 | * Compare keys of current nodes/leafs | ||
5259 | * If left < right advance left tree and repeat | ||
5260 | * If left > right advance right tree and repeat | ||
5261 | * If left == right compare blockptrs of the next nodes/leafs | ||
5262 | * If they match advance both trees but stay at the same level | ||
5263 | * and repeat | ||
5264 | * If they don't match advance both trees while allowing to go | ||
5265 | * deeper and repeat | ||
5266 | * If tree levels are different | ||
5267 | * Advance the tree that needs it and repeat | ||
5268 | * | ||
5269 | * Advancing a tree means: | ||
5270 | * If we are at level 0, try to go to the next slot. If that's not | ||
5271 | * possible, go one level up and repeat. Stop when we found a level | ||
5272 | * where we could go to the next slot. We may at this point be on a | ||
5273 | * node or a leaf. | ||
5274 | * | ||
5275 | * If we are not at level 0 and not on shared tree blocks, go one | ||
5276 | * level deeper. | ||
5277 | * | ||
5278 | * If we are not at level 0 and on shared tree blocks, go one slot to | ||
5279 | * the right if possible or go up and right. | ||
5280 | */ | ||
5281 | |||
5282 | left_level = btrfs_header_level(left_root->commit_root); | ||
5283 | left_root_level = left_level; | ||
5284 | left_path->nodes[left_level] = left_root->commit_root; | ||
5285 | extent_buffer_get(left_path->nodes[left_level]); | ||
5286 | |||
5287 | right_level = btrfs_header_level(right_root->commit_root); | ||
5288 | right_root_level = right_level; | ||
5289 | right_path->nodes[right_level] = right_root->commit_root; | ||
5290 | extent_buffer_get(right_path->nodes[right_level]); | ||
5291 | |||
5292 | if (left_level == 0) | ||
5293 | btrfs_item_key_to_cpu(left_path->nodes[left_level], | ||
5294 | &left_key, left_path->slots[left_level]); | ||
5295 | else | ||
5296 | btrfs_node_key_to_cpu(left_path->nodes[left_level], | ||
5297 | &left_key, left_path->slots[left_level]); | ||
5298 | if (right_level == 0) | ||
5299 | btrfs_item_key_to_cpu(right_path->nodes[right_level], | ||
5300 | &right_key, right_path->slots[right_level]); | ||
5301 | else | ||
5302 | btrfs_node_key_to_cpu(right_path->nodes[right_level], | ||
5303 | &right_key, right_path->slots[right_level]); | ||
5304 | |||
5305 | left_end_reached = right_end_reached = 0; | ||
5306 | advance_left = advance_right = 0; | ||
5307 | |||
5308 | while (1) { | ||
5309 | /* | ||
5310 | * We need to make sure the transaction does not get committed | ||
5311 | * while we do anything on commit roots. This means, we need to | ||
5312 | * join and leave transactions for every item that we process. | ||
5313 | */ | ||
5314 | if (trans && btrfs_should_end_transaction(trans, left_root)) { | ||
5315 | btrfs_release_path(left_path); | ||
5316 | btrfs_release_path(right_path); | ||
5317 | |||
5318 | ret = btrfs_end_transaction(trans, left_root); | ||
5319 | trans = NULL; | ||
5320 | if (ret < 0) | ||
5321 | goto out; | ||
5322 | } | ||
5323 | /* now rejoin the transaction */ | ||
5324 | if (!trans) { | ||
5325 | trans = btrfs_join_transaction(left_root); | ||
5326 | if (IS_ERR(trans)) { | ||
5327 | ret = PTR_ERR(trans); | ||
5328 | trans = NULL; | ||
5329 | goto out; | ||
5330 | } | ||
5331 | |||
5332 | spin_lock(&left_root->root_times_lock); | ||
5333 | ctransid = btrfs_root_ctransid(&left_root->root_item); | ||
5334 | spin_unlock(&left_root->root_times_lock); | ||
5335 | if (ctransid != left_start_ctransid) | ||
5336 | left_start_ctransid = 0; | ||
5337 | |||
5338 | spin_lock(&right_root->root_times_lock); | ||
5339 | ctransid = btrfs_root_ctransid(&right_root->root_item); | ||
5340 | spin_unlock(&right_root->root_times_lock); | ||
5341 | if (ctransid != right_start_ctransid) | ||
5342 | right_start_ctransid = 0; | ||
5343 | |||
5344 | if (!left_start_ctransid || !right_start_ctransid) { | ||
5345 | WARN(1, KERN_WARNING | ||
5346 | "btrfs: btrfs_compare_tree detected " | ||
5347 | "a change in one of the trees while " | ||
5348 | "iterating. This is probably a " | ||
5349 | "bug.\n"); | ||
5350 | ret = -EIO; | ||
5351 | goto out; | ||
5352 | } | ||
5353 | |||
5354 | /* | ||
5355 | * the commit root may have changed, so start again | ||
5356 | * where we stopped | ||
5357 | */ | ||
5358 | left_path->lowest_level = left_level; | ||
5359 | right_path->lowest_level = right_level; | ||
5360 | ret = btrfs_search_slot(NULL, left_root, | ||
5361 | &left_key, left_path, 0, 0); | ||
5362 | if (ret < 0) | ||
5363 | goto out; | ||
5364 | ret = btrfs_search_slot(NULL, right_root, | ||
5365 | &right_key, right_path, 0, 0); | ||
5366 | if (ret < 0) | ||
5367 | goto out; | ||
5368 | } | ||
5369 | |||
5370 | if (advance_left && !left_end_reached) { | ||
5371 | ret = tree_advance(left_root, left_path, &left_level, | ||
5372 | left_root_level, | ||
5373 | advance_left != ADVANCE_ONLY_NEXT, | ||
5374 | &left_key); | ||
5375 | if (ret < 0) | ||
5376 | left_end_reached = ADVANCE; | ||
5377 | advance_left = 0; | ||
5378 | } | ||
5379 | if (advance_right && !right_end_reached) { | ||
5380 | ret = tree_advance(right_root, right_path, &right_level, | ||
5381 | right_root_level, | ||
5382 | advance_right != ADVANCE_ONLY_NEXT, | ||
5383 | &right_key); | ||
5384 | if (ret < 0) | ||
5385 | right_end_reached = ADVANCE; | ||
5386 | advance_right = 0; | ||
5387 | } | ||
5388 | |||
5389 | if (left_end_reached && right_end_reached) { | ||
5390 | ret = 0; | ||
5391 | goto out; | ||
5392 | } else if (left_end_reached) { | ||
5393 | if (right_level == 0) { | ||
5394 | ret = changed_cb(left_root, right_root, | ||
5395 | left_path, right_path, | ||
5396 | &right_key, | ||
5397 | BTRFS_COMPARE_TREE_DELETED, | ||
5398 | ctx); | ||
5399 | if (ret < 0) | ||
5400 | goto out; | ||
5401 | } | ||
5402 | advance_right = ADVANCE; | ||
5403 | continue; | ||
5404 | } else if (right_end_reached) { | ||
5405 | if (left_level == 0) { | ||
5406 | ret = changed_cb(left_root, right_root, | ||
5407 | left_path, right_path, | ||
5408 | &left_key, | ||
5409 | BTRFS_COMPARE_TREE_NEW, | ||
5410 | ctx); | ||
5411 | if (ret < 0) | ||
5412 | goto out; | ||
5413 | } | ||
5414 | advance_left = ADVANCE; | ||
5415 | continue; | ||
5416 | } | ||
5417 | |||
5418 | if (left_level == 0 && right_level == 0) { | ||
5419 | cmp = btrfs_comp_cpu_keys(&left_key, &right_key); | ||
5420 | if (cmp < 0) { | ||
5421 | ret = changed_cb(left_root, right_root, | ||
5422 | left_path, right_path, | ||
5423 | &left_key, | ||
5424 | BTRFS_COMPARE_TREE_NEW, | ||
5425 | ctx); | ||
5426 | if (ret < 0) | ||
5427 | goto out; | ||
5428 | advance_left = ADVANCE; | ||
5429 | } else if (cmp > 0) { | ||
5430 | ret = changed_cb(left_root, right_root, | ||
5431 | left_path, right_path, | ||
5432 | &right_key, | ||
5433 | BTRFS_COMPARE_TREE_DELETED, | ||
5434 | ctx); | ||
5435 | if (ret < 0) | ||
5436 | goto out; | ||
5437 | advance_right = ADVANCE; | ||
5438 | } else { | ||
5439 | ret = tree_compare_item(left_root, left_path, | ||
5440 | right_path, tmp_buf); | ||
5441 | if (ret) { | ||
5442 | ret = changed_cb(left_root, right_root, | ||
5443 | left_path, right_path, | ||
5444 | &left_key, | ||
5445 | BTRFS_COMPARE_TREE_CHANGED, | ||
5446 | ctx); | ||
5447 | if (ret < 0) | ||
5448 | goto out; | ||
5449 | } | ||
5450 | advance_left = ADVANCE; | ||
5451 | advance_right = ADVANCE; | ||
5452 | } | ||
5453 | } else if (left_level == right_level) { | ||
5454 | cmp = btrfs_comp_cpu_keys(&left_key, &right_key); | ||
5455 | if (cmp < 0) { | ||
5456 | advance_left = ADVANCE; | ||
5457 | } else if (cmp > 0) { | ||
5458 | advance_right = ADVANCE; | ||
5459 | } else { | ||
5460 | left_blockptr = btrfs_node_blockptr( | ||
5461 | left_path->nodes[left_level], | ||
5462 | left_path->slots[left_level]); | ||
5463 | right_blockptr = btrfs_node_blockptr( | ||
5464 | right_path->nodes[right_level], | ||
5465 | right_path->slots[right_level]); | ||
5466 | if (left_blockptr == right_blockptr) { | ||
5467 | /* | ||
5468 | * As we're on a shared block, don't | ||
5469 | * allow to go deeper. | ||
5470 | */ | ||
5471 | advance_left = ADVANCE_ONLY_NEXT; | ||
5472 | advance_right = ADVANCE_ONLY_NEXT; | ||
5473 | } else { | ||
5474 | advance_left = ADVANCE; | ||
5475 | advance_right = ADVANCE; | ||
5476 | } | ||
5477 | } | ||
5478 | } else if (left_level < right_level) { | ||
5479 | advance_right = ADVANCE; | ||
5480 | } else { | ||
5481 | advance_left = ADVANCE; | ||
5482 | } | ||
5483 | } | ||
5484 | |||
5485 | out: | ||
5486 | btrfs_free_path(left_path); | ||
5487 | btrfs_free_path(right_path); | ||
5488 | kfree(tmp_buf); | ||
5489 | |||
5490 | if (trans) { | ||
5491 | if (!ret) | ||
5492 | ret = btrfs_end_transaction(trans, left_root); | ||
5493 | else | ||
5494 | btrfs_end_transaction(trans, left_root); | ||
5495 | } | ||
5496 | |||
5497 | return ret; | ||
5498 | } | ||
5499 | |||
4934 | /* | 5500 | /* |
4935 | * this is similar to btrfs_next_leaf, but does not try to preserve | 5501 | * this is similar to btrfs_next_leaf, but does not try to preserve |
4936 | * and fixup the path. It looks for and returns the next key in the | 5502 | * and fixup the path. It looks for and returns the next key in the |
@@ -5127,6 +5693,7 @@ again: | |||
5127 | * locked. To solve this situation, we give up | 5693 | * locked. To solve this situation, we give up |
5128 | * on our lock and cycle. | 5694 | * on our lock and cycle. |
5129 | */ | 5695 | */ |
5696 | free_extent_buffer(next); | ||
5130 | btrfs_release_path(path); | 5697 | btrfs_release_path(path); |
5131 | cond_resched(); | 5698 | cond_resched(); |
5132 | goto again; | 5699 | goto again; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fa5c45b3907..adb1cd7ceb9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -91,6 +91,9 @@ struct btrfs_ordered_sum; | |||
91 | /* for storing balance parameters in the root tree */ | 91 | /* for storing balance parameters in the root tree */ |
92 | #define BTRFS_BALANCE_OBJECTID -4ULL | 92 | #define BTRFS_BALANCE_OBJECTID -4ULL |
93 | 93 | ||
94 | /* holds quota configuration and tracking */ | ||
95 | #define BTRFS_QUOTA_TREE_OBJECTID 8ULL | ||
96 | |||
94 | /* orhpan objectid for tracking unlinked/truncated files */ | 97 | /* orhpan objectid for tracking unlinked/truncated files */ |
95 | #define BTRFS_ORPHAN_OBJECTID -5ULL | 98 | #define BTRFS_ORPHAN_OBJECTID -5ULL |
96 | 99 | ||
@@ -709,6 +712,36 @@ struct btrfs_root_item { | |||
709 | struct btrfs_disk_key drop_progress; | 712 | struct btrfs_disk_key drop_progress; |
710 | u8 drop_level; | 713 | u8 drop_level; |
711 | u8 level; | 714 | u8 level; |
715 | |||
716 | /* | ||
717 | * The following fields appear after subvol_uuids+subvol_times | ||
718 | * were introduced. | ||
719 | */ | ||
720 | |||
721 | /* | ||
722 | * This generation number is used to test if the new fields are valid | ||
723 | * and up to date while reading the root item. Everytime the root item | ||
724 | * is written out, the "generation" field is copied into this field. If | ||
725 | * anyone ever mounted the fs with an older kernel, we will have | ||
726 | * mismatching generation values here and thus must invalidate the | ||
727 | * new fields. See btrfs_update_root and btrfs_find_last_root for | ||
728 | * details. | ||
729 | * the offset of generation_v2 is also used as the start for the memset | ||
730 | * when invalidating the fields. | ||
731 | */ | ||
732 | __le64 generation_v2; | ||
733 | u8 uuid[BTRFS_UUID_SIZE]; | ||
734 | u8 parent_uuid[BTRFS_UUID_SIZE]; | ||
735 | u8 received_uuid[BTRFS_UUID_SIZE]; | ||
736 | __le64 ctransid; /* updated when an inode changes */ | ||
737 | __le64 otransid; /* trans when created */ | ||
738 | __le64 stransid; /* trans when sent. non-zero for received subvol */ | ||
739 | __le64 rtransid; /* trans when received. non-zero for received subvol */ | ||
740 | struct btrfs_timespec ctime; | ||
741 | struct btrfs_timespec otime; | ||
742 | struct btrfs_timespec stime; | ||
743 | struct btrfs_timespec rtime; | ||
744 | __le64 reserved[8]; /* for future */ | ||
712 | } __attribute__ ((__packed__)); | 745 | } __attribute__ ((__packed__)); |
713 | 746 | ||
714 | /* | 747 | /* |
@@ -883,6 +916,72 @@ struct btrfs_block_group_item { | |||
883 | __le64 flags; | 916 | __le64 flags; |
884 | } __attribute__ ((__packed__)); | 917 | } __attribute__ ((__packed__)); |
885 | 918 | ||
919 | /* | ||
920 | * is subvolume quota turned on? | ||
921 | */ | ||
922 | #define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0) | ||
923 | /* | ||
924 | * SCANNING is set during the initialization phase | ||
925 | */ | ||
926 | #define BTRFS_QGROUP_STATUS_FLAG_SCANNING (1ULL << 1) | ||
927 | /* | ||
928 | * Some qgroup entries are known to be out of date, | ||
929 | * either because the configuration has changed in a way that | ||
930 | * makes a rescan necessary, or because the fs has been mounted | ||
931 | * with a non-qgroup-aware version. | ||
932 | * Turning qouta off and on again makes it inconsistent, too. | ||
933 | */ | ||
934 | #define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2) | ||
935 | |||
936 | #define BTRFS_QGROUP_STATUS_VERSION 1 | ||
937 | |||
938 | struct btrfs_qgroup_status_item { | ||
939 | __le64 version; | ||
940 | /* | ||
941 | * the generation is updated during every commit. As older | ||
942 | * versions of btrfs are not aware of qgroups, it will be | ||
943 | * possible to detect inconsistencies by checking the | ||
944 | * generation on mount time | ||
945 | */ | ||
946 | __le64 generation; | ||
947 | |||
948 | /* flag definitions see above */ | ||
949 | __le64 flags; | ||
950 | |||
951 | /* | ||
952 | * only used during scanning to record the progress | ||
953 | * of the scan. It contains a logical address | ||
954 | */ | ||
955 | __le64 scan; | ||
956 | } __attribute__ ((__packed__)); | ||
957 | |||
958 | struct btrfs_qgroup_info_item { | ||
959 | __le64 generation; | ||
960 | __le64 rfer; | ||
961 | __le64 rfer_cmpr; | ||
962 | __le64 excl; | ||
963 | __le64 excl_cmpr; | ||
964 | } __attribute__ ((__packed__)); | ||
965 | |||
966 | /* flags definition for qgroup limits */ | ||
967 | #define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0) | ||
968 | #define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1) | ||
969 | #define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2) | ||
970 | #define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3) | ||
971 | #define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4) | ||
972 | #define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5) | ||
973 | |||
974 | struct btrfs_qgroup_limit_item { | ||
975 | /* | ||
976 | * only updated when any of the other values change | ||
977 | */ | ||
978 | __le64 flags; | ||
979 | __le64 max_rfer; | ||
980 | __le64 max_excl; | ||
981 | __le64 rsv_rfer; | ||
982 | __le64 rsv_excl; | ||
983 | } __attribute__ ((__packed__)); | ||
984 | |||
886 | struct btrfs_space_info { | 985 | struct btrfs_space_info { |
887 | u64 flags; | 986 | u64 flags; |
888 | 987 | ||
@@ -1030,6 +1129,13 @@ struct btrfs_block_group_cache { | |||
1030 | struct list_head cluster_list; | 1129 | struct list_head cluster_list; |
1031 | }; | 1130 | }; |
1032 | 1131 | ||
1132 | /* delayed seq elem */ | ||
1133 | struct seq_list { | ||
1134 | struct list_head list; | ||
1135 | u64 seq; | ||
1136 | }; | ||
1137 | |||
1138 | /* fs_info */ | ||
1033 | struct reloc_control; | 1139 | struct reloc_control; |
1034 | struct btrfs_device; | 1140 | struct btrfs_device; |
1035 | struct btrfs_fs_devices; | 1141 | struct btrfs_fs_devices; |
@@ -1044,6 +1150,7 @@ struct btrfs_fs_info { | |||
1044 | struct btrfs_root *dev_root; | 1150 | struct btrfs_root *dev_root; |
1045 | struct btrfs_root *fs_root; | 1151 | struct btrfs_root *fs_root; |
1046 | struct btrfs_root *csum_root; | 1152 | struct btrfs_root *csum_root; |
1153 | struct btrfs_root *quota_root; | ||
1047 | 1154 | ||
1048 | /* the log root tree is a directory of all the other log roots */ | 1155 | /* the log root tree is a directory of all the other log roots */ |
1049 | struct btrfs_root *log_root_tree; | 1156 | struct btrfs_root *log_root_tree; |
@@ -1144,6 +1251,8 @@ struct btrfs_fs_info { | |||
1144 | spinlock_t tree_mod_seq_lock; | 1251 | spinlock_t tree_mod_seq_lock; |
1145 | atomic_t tree_mod_seq; | 1252 | atomic_t tree_mod_seq; |
1146 | struct list_head tree_mod_seq_list; | 1253 | struct list_head tree_mod_seq_list; |
1254 | struct seq_list tree_mod_seq_elem; | ||
1255 | wait_queue_head_t tree_mod_seq_wait; | ||
1147 | 1256 | ||
1148 | /* this protects tree_mod_log */ | 1257 | /* this protects tree_mod_log */ |
1149 | rwlock_t tree_mod_log_lock; | 1258 | rwlock_t tree_mod_log_lock; |
@@ -1240,6 +1349,8 @@ struct btrfs_fs_info { | |||
1240 | */ | 1349 | */ |
1241 | struct list_head space_info; | 1350 | struct list_head space_info; |
1242 | 1351 | ||
1352 | struct btrfs_space_info *data_sinfo; | ||
1353 | |||
1243 | struct reloc_control *reloc_ctl; | 1354 | struct reloc_control *reloc_ctl; |
1244 | 1355 | ||
1245 | spinlock_t delalloc_lock; | 1356 | spinlock_t delalloc_lock; |
@@ -1296,6 +1407,29 @@ struct btrfs_fs_info { | |||
1296 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 1407 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
1297 | u32 check_integrity_print_mask; | 1408 | u32 check_integrity_print_mask; |
1298 | #endif | 1409 | #endif |
1410 | /* | ||
1411 | * quota information | ||
1412 | */ | ||
1413 | unsigned int quota_enabled:1; | ||
1414 | |||
1415 | /* | ||
1416 | * quota_enabled only changes state after a commit. This holds the | ||
1417 | * next state. | ||
1418 | */ | ||
1419 | unsigned int pending_quota_state:1; | ||
1420 | |||
1421 | /* is qgroup tracking in a consistent state? */ | ||
1422 | u64 qgroup_flags; | ||
1423 | |||
1424 | /* holds configuration and tracking. Protected by qgroup_lock */ | ||
1425 | struct rb_root qgroup_tree; | ||
1426 | spinlock_t qgroup_lock; | ||
1427 | |||
1428 | /* list of dirty qgroups to be written at next commit */ | ||
1429 | struct list_head dirty_qgroups; | ||
1430 | |||
1431 | /* used by btrfs_qgroup_record_ref for an efficient tree traversal */ | ||
1432 | u64 qgroup_seq; | ||
1299 | 1433 | ||
1300 | /* filesystem state */ | 1434 | /* filesystem state */ |
1301 | u64 fs_state; | 1435 | u64 fs_state; |
@@ -1416,6 +1550,8 @@ struct btrfs_root { | |||
1416 | dev_t anon_dev; | 1550 | dev_t anon_dev; |
1417 | 1551 | ||
1418 | int force_cow; | 1552 | int force_cow; |
1553 | |||
1554 | spinlock_t root_times_lock; | ||
1419 | }; | 1555 | }; |
1420 | 1556 | ||
1421 | struct btrfs_ioctl_defrag_range_args { | 1557 | struct btrfs_ioctl_defrag_range_args { |
@@ -1525,6 +1661,30 @@ struct btrfs_ioctl_defrag_range_args { | |||
1525 | #define BTRFS_DEV_ITEM_KEY 216 | 1661 | #define BTRFS_DEV_ITEM_KEY 216 |
1526 | #define BTRFS_CHUNK_ITEM_KEY 228 | 1662 | #define BTRFS_CHUNK_ITEM_KEY 228 |
1527 | 1663 | ||
1664 | /* | ||
1665 | * Records the overall state of the qgroups. | ||
1666 | * There's only one instance of this key present, | ||
1667 | * (0, BTRFS_QGROUP_STATUS_KEY, 0) | ||
1668 | */ | ||
1669 | #define BTRFS_QGROUP_STATUS_KEY 240 | ||
1670 | /* | ||
1671 | * Records the currently used space of the qgroup. | ||
1672 | * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid). | ||
1673 | */ | ||
1674 | #define BTRFS_QGROUP_INFO_KEY 242 | ||
1675 | /* | ||
1676 | * Contains the user configured limits for the qgroup. | ||
1677 | * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid). | ||
1678 | */ | ||
1679 | #define BTRFS_QGROUP_LIMIT_KEY 244 | ||
1680 | /* | ||
1681 | * Records the child-parent relationship of qgroups. For | ||
1682 | * each relation, 2 keys are present: | ||
1683 | * (childid, BTRFS_QGROUP_RELATION_KEY, parentid) | ||
1684 | * (parentid, BTRFS_QGROUP_RELATION_KEY, childid) | ||
1685 | */ | ||
1686 | #define BTRFS_QGROUP_RELATION_KEY 246 | ||
1687 | |||
1528 | #define BTRFS_BALANCE_ITEM_KEY 248 | 1688 | #define BTRFS_BALANCE_ITEM_KEY 248 |
1529 | 1689 | ||
1530 | /* | 1690 | /* |
@@ -1621,13 +1781,54 @@ static inline void btrfs_init_map_token (struct btrfs_map_token *token) | |||
1621 | offsetof(type, member), \ | 1781 | offsetof(type, member), \ |
1622 | sizeof(((type *)0)->member))) | 1782 | sizeof(((type *)0)->member))) |
1623 | 1783 | ||
1624 | #ifndef BTRFS_SETGET_FUNCS | 1784 | #define DECLARE_BTRFS_SETGET_BITS(bits) \ |
1785 | u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \ | ||
1786 | unsigned long off, \ | ||
1787 | struct btrfs_map_token *token); \ | ||
1788 | void btrfs_set_token_##bits(struct extent_buffer *eb, void *ptr, \ | ||
1789 | unsigned long off, u##bits val, \ | ||
1790 | struct btrfs_map_token *token); \ | ||
1791 | static inline u##bits btrfs_get_##bits(struct extent_buffer *eb, void *ptr, \ | ||
1792 | unsigned long off) \ | ||
1793 | { \ | ||
1794 | return btrfs_get_token_##bits(eb, ptr, off, NULL); \ | ||
1795 | } \ | ||
1796 | static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \ | ||
1797 | unsigned long off, u##bits val) \ | ||
1798 | { \ | ||
1799 | btrfs_set_token_##bits(eb, ptr, off, val, NULL); \ | ||
1800 | } | ||
1801 | |||
1802 | DECLARE_BTRFS_SETGET_BITS(8) | ||
1803 | DECLARE_BTRFS_SETGET_BITS(16) | ||
1804 | DECLARE_BTRFS_SETGET_BITS(32) | ||
1805 | DECLARE_BTRFS_SETGET_BITS(64) | ||
1806 | |||
1625 | #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ | 1807 | #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ |
1626 | u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ | 1808 | static inline u##bits btrfs_##name(struct extent_buffer *eb, type *s) \ |
1627 | u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, struct btrfs_map_token *token); \ | 1809 | { \ |
1628 | void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token);\ | 1810 | BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ |
1629 | void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); | 1811 | return btrfs_get_##bits(eb, s, offsetof(type, member)); \ |
1630 | #endif | 1812 | } \ |
1813 | static inline void btrfs_set_##name(struct extent_buffer *eb, type *s, \ | ||
1814 | u##bits val) \ | ||
1815 | { \ | ||
1816 | BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ | ||
1817 | btrfs_set_##bits(eb, s, offsetof(type, member), val); \ | ||
1818 | } \ | ||
1819 | static inline u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, \ | ||
1820 | struct btrfs_map_token *token) \ | ||
1821 | { \ | ||
1822 | BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ | ||
1823 | return btrfs_get_token_##bits(eb, s, offsetof(type, member), token); \ | ||
1824 | } \ | ||
1825 | static inline void btrfs_set_token_##name(struct extent_buffer *eb, \ | ||
1826 | type *s, u##bits val, \ | ||
1827 | struct btrfs_map_token *token) \ | ||
1828 | { \ | ||
1829 | BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ | ||
1830 | btrfs_set_token_##bits(eb, s, offsetof(type, member), val, token); \ | ||
1831 | } | ||
1631 | 1832 | ||
1632 | #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ | 1833 | #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ |
1633 | static inline u##bits btrfs_##name(struct extent_buffer *eb) \ | 1834 | static inline u##bits btrfs_##name(struct extent_buffer *eb) \ |
@@ -2189,6 +2390,16 @@ BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64); | |||
2189 | BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); | 2390 | BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); |
2190 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, | 2391 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, |
2191 | last_snapshot, 64); | 2392 | last_snapshot, 64); |
2393 | BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item, | ||
2394 | generation_v2, 64); | ||
2395 | BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item, | ||
2396 | ctransid, 64); | ||
2397 | BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item, | ||
2398 | otransid, 64); | ||
2399 | BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item, | ||
2400 | stransid, 64); | ||
2401 | BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item, | ||
2402 | rtransid, 64); | ||
2192 | 2403 | ||
2193 | static inline bool btrfs_root_readonly(struct btrfs_root *root) | 2404 | static inline bool btrfs_root_readonly(struct btrfs_root *root) |
2194 | { | 2405 | { |
@@ -2465,6 +2676,49 @@ static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb, | |||
2465 | sizeof(val)); | 2676 | sizeof(val)); |
2466 | } | 2677 | } |
2467 | 2678 | ||
2679 | /* btrfs_qgroup_status_item */ | ||
2680 | BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item, | ||
2681 | generation, 64); | ||
2682 | BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item, | ||
2683 | version, 64); | ||
2684 | BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item, | ||
2685 | flags, 64); | ||
2686 | BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item, | ||
2687 | scan, 64); | ||
2688 | |||
2689 | /* btrfs_qgroup_info_item */ | ||
2690 | BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item, | ||
2691 | generation, 64); | ||
2692 | BTRFS_SETGET_FUNCS(qgroup_info_rfer, struct btrfs_qgroup_info_item, rfer, 64); | ||
2693 | BTRFS_SETGET_FUNCS(qgroup_info_rfer_cmpr, struct btrfs_qgroup_info_item, | ||
2694 | rfer_cmpr, 64); | ||
2695 | BTRFS_SETGET_FUNCS(qgroup_info_excl, struct btrfs_qgroup_info_item, excl, 64); | ||
2696 | BTRFS_SETGET_FUNCS(qgroup_info_excl_cmpr, struct btrfs_qgroup_info_item, | ||
2697 | excl_cmpr, 64); | ||
2698 | |||
2699 | BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation, | ||
2700 | struct btrfs_qgroup_info_item, generation, 64); | ||
2701 | BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer, struct btrfs_qgroup_info_item, | ||
2702 | rfer, 64); | ||
2703 | BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer_cmpr, | ||
2704 | struct btrfs_qgroup_info_item, rfer_cmpr, 64); | ||
2705 | BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl, struct btrfs_qgroup_info_item, | ||
2706 | excl, 64); | ||
2707 | BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl_cmpr, | ||
2708 | struct btrfs_qgroup_info_item, excl_cmpr, 64); | ||
2709 | |||
2710 | /* btrfs_qgroup_limit_item */ | ||
2711 | BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item, | ||
2712 | flags, 64); | ||
2713 | BTRFS_SETGET_FUNCS(qgroup_limit_max_rfer, struct btrfs_qgroup_limit_item, | ||
2714 | max_rfer, 64); | ||
2715 | BTRFS_SETGET_FUNCS(qgroup_limit_max_excl, struct btrfs_qgroup_limit_item, | ||
2716 | max_excl, 64); | ||
2717 | BTRFS_SETGET_FUNCS(qgroup_limit_rsv_rfer, struct btrfs_qgroup_limit_item, | ||
2718 | rsv_rfer, 64); | ||
2719 | BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item, | ||
2720 | rsv_excl, 64); | ||
2721 | |||
2468 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) | 2722 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) |
2469 | { | 2723 | { |
2470 | return sb->s_fs_info; | 2724 | return sb->s_fs_info; |
@@ -2607,7 +2861,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
2607 | struct btrfs_root *root, u64 group_start); | 2861 | struct btrfs_root *root, u64 group_start); |
2608 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2862 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
2609 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | 2863 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); |
2610 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | ||
2611 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2864 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2612 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | 2865 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); |
2613 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); | 2866 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); |
@@ -2661,6 +2914,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2661 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); | 2914 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); |
2662 | 2915 | ||
2663 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | 2916 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); |
2917 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | ||
2918 | struct btrfs_fs_info *fs_info); | ||
2664 | /* ctree.c */ | 2919 | /* ctree.c */ |
2665 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2920 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2666 | int level, int *slot); | 2921 | int level, int *slot); |
@@ -2680,6 +2935,21 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | |||
2680 | struct btrfs_key *max_key, | 2935 | struct btrfs_key *max_key, |
2681 | struct btrfs_path *path, int cache_only, | 2936 | struct btrfs_path *path, int cache_only, |
2682 | u64 min_trans); | 2937 | u64 min_trans); |
2938 | enum btrfs_compare_tree_result { | ||
2939 | BTRFS_COMPARE_TREE_NEW, | ||
2940 | BTRFS_COMPARE_TREE_DELETED, | ||
2941 | BTRFS_COMPARE_TREE_CHANGED, | ||
2942 | }; | ||
2943 | typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root, | ||
2944 | struct btrfs_root *right_root, | ||
2945 | struct btrfs_path *left_path, | ||
2946 | struct btrfs_path *right_path, | ||
2947 | struct btrfs_key *key, | ||
2948 | enum btrfs_compare_tree_result result, | ||
2949 | void *ctx); | ||
2950 | int btrfs_compare_trees(struct btrfs_root *left_root, | ||
2951 | struct btrfs_root *right_root, | ||
2952 | btrfs_changed_cb_t cb, void *ctx); | ||
2683 | int btrfs_cow_block(struct btrfs_trans_handle *trans, | 2953 | int btrfs_cow_block(struct btrfs_trans_handle *trans, |
2684 | struct btrfs_root *root, struct extent_buffer *buf, | 2954 | struct btrfs_root *root, struct extent_buffer *buf, |
2685 | struct extent_buffer *parent, int parent_slot, | 2955 | struct extent_buffer *parent, int parent_slot, |
@@ -2711,6 +2981,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2711 | ins_len, int cow); | 2981 | ins_len, int cow); |
2712 | int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, | 2982 | int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, |
2713 | struct btrfs_path *p, u64 time_seq); | 2983 | struct btrfs_path *p, u64 time_seq); |
2984 | int btrfs_search_slot_for_read(struct btrfs_root *root, | ||
2985 | struct btrfs_key *key, struct btrfs_path *p, | ||
2986 | int find_higher, int return_any); | ||
2714 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, | 2987 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, |
2715 | struct btrfs_root *root, struct extent_buffer *parent, | 2988 | struct btrfs_root *root, struct extent_buffer *parent, |
2716 | int start_slot, int cache_only, u64 *last_ret, | 2989 | int start_slot, int cache_only, u64 *last_ret, |
@@ -2793,11 +3066,22 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) | |||
2793 | kfree(fs_info->chunk_root); | 3066 | kfree(fs_info->chunk_root); |
2794 | kfree(fs_info->dev_root); | 3067 | kfree(fs_info->dev_root); |
2795 | kfree(fs_info->csum_root); | 3068 | kfree(fs_info->csum_root); |
3069 | kfree(fs_info->quota_root); | ||
2796 | kfree(fs_info->super_copy); | 3070 | kfree(fs_info->super_copy); |
2797 | kfree(fs_info->super_for_commit); | 3071 | kfree(fs_info->super_for_commit); |
2798 | kfree(fs_info); | 3072 | kfree(fs_info); |
2799 | } | 3073 | } |
2800 | 3074 | ||
3075 | /* tree mod log functions from ctree.c */ | ||
3076 | u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
3077 | struct seq_list *elem); | ||
3078 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
3079 | struct seq_list *elem); | ||
3080 | static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) | ||
3081 | { | ||
3082 | return atomic_inc_return(&fs_info->tree_mod_seq); | ||
3083 | } | ||
3084 | |||
2801 | /* root-item.c */ | 3085 | /* root-item.c */ |
2802 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 3086 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
2803 | struct btrfs_path *path, | 3087 | struct btrfs_path *path, |
@@ -2819,6 +3103,9 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans, | |||
2819 | struct btrfs_root *root, | 3103 | struct btrfs_root *root, |
2820 | struct btrfs_key *key, | 3104 | struct btrfs_key *key, |
2821 | struct btrfs_root_item *item); | 3105 | struct btrfs_root_item *item); |
3106 | void btrfs_read_root_item(struct btrfs_root *root, | ||
3107 | struct extent_buffer *eb, int slot, | ||
3108 | struct btrfs_root_item *item); | ||
2822 | int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct | 3109 | int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct |
2823 | btrfs_root_item *item, struct btrfs_key *key); | 3110 | btrfs_root_item *item, struct btrfs_key *key); |
2824 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); | 3111 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); |
@@ -2826,6 +3113,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root); | |||
2826 | void btrfs_set_root_node(struct btrfs_root_item *item, | 3113 | void btrfs_set_root_node(struct btrfs_root_item *item, |
2827 | struct extent_buffer *node); | 3114 | struct extent_buffer *node); |
2828 | void btrfs_check_and_init_root_item(struct btrfs_root_item *item); | 3115 | void btrfs_check_and_init_root_item(struct btrfs_root_item *item); |
3116 | void btrfs_update_root_times(struct btrfs_trans_handle *trans, | ||
3117 | struct btrfs_root *root); | ||
2829 | 3118 | ||
2830 | /* dir-item.c */ | 3119 | /* dir-item.c */ |
2831 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, | 3120 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, |
@@ -3061,6 +3350,23 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | |||
3061 | struct btrfs_root *root, const char *function, | 3350 | struct btrfs_root *root, const char *function, |
3062 | unsigned int line, int errno); | 3351 | unsigned int line, int errno); |
3063 | 3352 | ||
3353 | #define btrfs_set_fs_incompat(__fs_info, opt) \ | ||
3354 | __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt) | ||
3355 | |||
3356 | static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, | ||
3357 | u64 flag) | ||
3358 | { | ||
3359 | struct btrfs_super_block *disk_super; | ||
3360 | u64 features; | ||
3361 | |||
3362 | disk_super = fs_info->super_copy; | ||
3363 | features = btrfs_super_incompat_flags(disk_super); | ||
3364 | if (!(features & flag)) { | ||
3365 | features |= flag; | ||
3366 | btrfs_set_super_incompat_flags(disk_super, features); | ||
3367 | } | ||
3368 | } | ||
3369 | |||
3064 | #define btrfs_abort_transaction(trans, root, errno) \ | 3370 | #define btrfs_abort_transaction(trans, root, errno) \ |
3065 | do { \ | 3371 | do { \ |
3066 | __btrfs_abort_transaction(trans, root, __func__, \ | 3372 | __btrfs_abort_transaction(trans, root, __func__, \ |
@@ -3156,17 +3462,49 @@ void btrfs_reada_detach(void *handle); | |||
3156 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | 3462 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, |
3157 | u64 start, int err); | 3463 | u64 start, int err); |
3158 | 3464 | ||
3159 | /* delayed seq elem */ | 3465 | /* qgroup.c */ |
3160 | struct seq_list { | 3466 | struct qgroup_update { |
3161 | struct list_head list; | 3467 | struct list_head list; |
3162 | u64 seq; | 3468 | struct btrfs_delayed_ref_node *node; |
3163 | u32 flags; | 3469 | struct btrfs_delayed_extent_op *extent_op; |
3164 | }; | 3470 | }; |
3165 | 3471 | ||
3166 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | 3472 | int btrfs_quota_enable(struct btrfs_trans_handle *trans, |
3167 | struct seq_list *elem); | 3473 | struct btrfs_fs_info *fs_info); |
3168 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | 3474 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, |
3169 | struct seq_list *elem); | 3475 | struct btrfs_fs_info *fs_info); |
3476 | int btrfs_quota_rescan(struct btrfs_fs_info *fs_info); | ||
3477 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | ||
3478 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | ||
3479 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | ||
3480 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | ||
3481 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, | ||
3482 | struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
3483 | char *name); | ||
3484 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, | ||
3485 | struct btrfs_fs_info *fs_info, u64 qgroupid); | ||
3486 | int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, | ||
3487 | struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
3488 | struct btrfs_qgroup_limit *limit); | ||
3489 | int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); | ||
3490 | void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); | ||
3491 | struct btrfs_delayed_extent_op; | ||
3492 | int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, | ||
3493 | struct btrfs_delayed_ref_node *node, | ||
3494 | struct btrfs_delayed_extent_op *extent_op); | ||
3495 | int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | ||
3496 | struct btrfs_fs_info *fs_info, | ||
3497 | struct btrfs_delayed_ref_node *node, | ||
3498 | struct btrfs_delayed_extent_op *extent_op); | ||
3499 | int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | ||
3500 | struct btrfs_fs_info *fs_info); | ||
3501 | int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | ||
3502 | struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, | ||
3503 | struct btrfs_qgroup_inherit *inherit); | ||
3504 | int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); | ||
3505 | void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); | ||
3506 | |||
3507 | void assert_qgroups_uptodate(struct btrfs_trans_handle *trans); | ||
3170 | 3508 | ||
3171 | static inline int is_fstree(u64 rootid) | 3509 | static inline int is_fstree(u64 rootid) |
3172 | { | 3510 | { |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 2399f408691..335605c8cea 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -62,6 +62,7 @@ static inline void btrfs_init_delayed_node( | |||
62 | INIT_LIST_HEAD(&delayed_node->n_list); | 62 | INIT_LIST_HEAD(&delayed_node->n_list); |
63 | INIT_LIST_HEAD(&delayed_node->p_list); | 63 | INIT_LIST_HEAD(&delayed_node->p_list); |
64 | delayed_node->bytes_reserved = 0; | 64 | delayed_node->bytes_reserved = 0; |
65 | memset(&delayed_node->inode_item, 0, sizeof(delayed_node->inode_item)); | ||
65 | } | 66 | } |
66 | 67 | ||
67 | static inline int btrfs_is_continuous_delayed_item( | 68 | static inline int btrfs_is_continuous_delayed_item( |
@@ -1113,8 +1114,8 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, | |||
1113 | * Returns < 0 on error and returns with an aborted transaction with any | 1114 | * Returns < 0 on error and returns with an aborted transaction with any |
1114 | * outstanding delayed items cleaned up. | 1115 | * outstanding delayed items cleaned up. |
1115 | */ | 1116 | */ |
1116 | int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | 1117 | static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, |
1117 | struct btrfs_root *root) | 1118 | struct btrfs_root *root, int nr) |
1118 | { | 1119 | { |
1119 | struct btrfs_root *curr_root = root; | 1120 | struct btrfs_root *curr_root = root; |
1120 | struct btrfs_delayed_root *delayed_root; | 1121 | struct btrfs_delayed_root *delayed_root; |
@@ -1122,6 +1123,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | |||
1122 | struct btrfs_path *path; | 1123 | struct btrfs_path *path; |
1123 | struct btrfs_block_rsv *block_rsv; | 1124 | struct btrfs_block_rsv *block_rsv; |
1124 | int ret = 0; | 1125 | int ret = 0; |
1126 | bool count = (nr > 0); | ||
1125 | 1127 | ||
1126 | if (trans->aborted) | 1128 | if (trans->aborted) |
1127 | return -EIO; | 1129 | return -EIO; |
@@ -1137,7 +1139,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | |||
1137 | delayed_root = btrfs_get_delayed_root(root); | 1139 | delayed_root = btrfs_get_delayed_root(root); |
1138 | 1140 | ||
1139 | curr_node = btrfs_first_delayed_node(delayed_root); | 1141 | curr_node = btrfs_first_delayed_node(delayed_root); |
1140 | while (curr_node) { | 1142 | while (curr_node && (!count || (count && nr--))) { |
1141 | curr_root = curr_node->root; | 1143 | curr_root = curr_node->root; |
1142 | ret = btrfs_insert_delayed_items(trans, path, curr_root, | 1144 | ret = btrfs_insert_delayed_items(trans, path, curr_root, |
1143 | curr_node); | 1145 | curr_node); |
@@ -1149,6 +1151,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | |||
1149 | path, curr_node); | 1151 | path, curr_node); |
1150 | if (ret) { | 1152 | if (ret) { |
1151 | btrfs_release_delayed_node(curr_node); | 1153 | btrfs_release_delayed_node(curr_node); |
1154 | curr_node = NULL; | ||
1152 | btrfs_abort_transaction(trans, root, ret); | 1155 | btrfs_abort_transaction(trans, root, ret); |
1153 | break; | 1156 | break; |
1154 | } | 1157 | } |
@@ -1158,12 +1161,26 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | |||
1158 | btrfs_release_delayed_node(prev_node); | 1161 | btrfs_release_delayed_node(prev_node); |
1159 | } | 1162 | } |
1160 | 1163 | ||
1164 | if (curr_node) | ||
1165 | btrfs_release_delayed_node(curr_node); | ||
1161 | btrfs_free_path(path); | 1166 | btrfs_free_path(path); |
1162 | trans->block_rsv = block_rsv; | 1167 | trans->block_rsv = block_rsv; |
1163 | 1168 | ||
1164 | return ret; | 1169 | return ret; |
1165 | } | 1170 | } |
1166 | 1171 | ||
1172 | int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | ||
1173 | struct btrfs_root *root) | ||
1174 | { | ||
1175 | return __btrfs_run_delayed_items(trans, root, -1); | ||
1176 | } | ||
1177 | |||
1178 | int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, | ||
1179 | struct btrfs_root *root, int nr) | ||
1180 | { | ||
1181 | return __btrfs_run_delayed_items(trans, root, nr); | ||
1182 | } | ||
1183 | |||
1167 | static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, | 1184 | static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, |
1168 | struct btrfs_delayed_node *node) | 1185 | struct btrfs_delayed_node *node) |
1169 | { | 1186 | { |
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index f5aa4023d3e..4f808e1baee 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h | |||
@@ -107,6 +107,8 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode); | |||
107 | 107 | ||
108 | int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | 108 | int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, |
109 | struct btrfs_root *root); | 109 | struct btrfs_root *root); |
110 | int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, | ||
111 | struct btrfs_root *root, int nr); | ||
110 | 112 | ||
111 | void btrfs_balance_delayed_items(struct btrfs_root *root); | 113 | void btrfs_balance_delayed_items(struct btrfs_root *root); |
112 | 114 | ||
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 13ae7b04790..da7419ed01b 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -233,22 +233,26 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | |||
233 | return 0; | 233 | return 0; |
234 | } | 234 | } |
235 | 235 | ||
236 | int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | 236 | int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, |
237 | struct btrfs_delayed_ref_root *delayed_refs, | ||
237 | u64 seq) | 238 | u64 seq) |
238 | { | 239 | { |
239 | struct seq_list *elem; | 240 | struct seq_list *elem; |
240 | 241 | int ret = 0; | |
241 | assert_spin_locked(&delayed_refs->lock); | 242 | |
242 | if (list_empty(&delayed_refs->seq_head)) | 243 | spin_lock(&fs_info->tree_mod_seq_lock); |
243 | return 0; | 244 | if (!list_empty(&fs_info->tree_mod_seq_list)) { |
244 | 245 | elem = list_first_entry(&fs_info->tree_mod_seq_list, | |
245 | elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list); | 246 | struct seq_list, list); |
246 | if (seq >= elem->seq) { | 247 | if (seq >= elem->seq) { |
247 | pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n", | 248 | pr_debug("holding back delayed_ref %llu, lowest is " |
248 | seq, elem->seq, delayed_refs); | 249 | "%llu (%p)\n", seq, elem->seq, delayed_refs); |
249 | return 1; | 250 | ret = 1; |
251 | } | ||
250 | } | 252 | } |
251 | return 0; | 253 | |
254 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
255 | return ret; | ||
252 | } | 256 | } |
253 | 257 | ||
254 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | 258 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, |
@@ -525,8 +529,8 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
525 | ref->is_head = 0; | 529 | ref->is_head = 0; |
526 | ref->in_tree = 1; | 530 | ref->in_tree = 1; |
527 | 531 | ||
528 | if (is_fstree(ref_root)) | 532 | if (need_ref_seq(for_cow, ref_root)) |
529 | seq = inc_delayed_seq(delayed_refs); | 533 | seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem); |
530 | ref->seq = seq; | 534 | ref->seq = seq; |
531 | 535 | ||
532 | full_ref = btrfs_delayed_node_to_tree_ref(ref); | 536 | full_ref = btrfs_delayed_node_to_tree_ref(ref); |
@@ -584,8 +588,8 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
584 | ref->is_head = 0; | 588 | ref->is_head = 0; |
585 | ref->in_tree = 1; | 589 | ref->in_tree = 1; |
586 | 590 | ||
587 | if (is_fstree(ref_root)) | 591 | if (need_ref_seq(for_cow, ref_root)) |
588 | seq = inc_delayed_seq(delayed_refs); | 592 | seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem); |
589 | ref->seq = seq; | 593 | ref->seq = seq; |
590 | 594 | ||
591 | full_ref = btrfs_delayed_node_to_data_ref(ref); | 595 | full_ref = btrfs_delayed_node_to_data_ref(ref); |
@@ -658,10 +662,12 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
658 | add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, | 662 | add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, |
659 | num_bytes, parent, ref_root, level, action, | 663 | num_bytes, parent, ref_root, level, action, |
660 | for_cow); | 664 | for_cow); |
661 | if (!is_fstree(ref_root) && | 665 | if (!need_ref_seq(for_cow, ref_root) && |
662 | waitqueue_active(&delayed_refs->seq_wait)) | 666 | waitqueue_active(&fs_info->tree_mod_seq_wait)) |
663 | wake_up(&delayed_refs->seq_wait); | 667 | wake_up(&fs_info->tree_mod_seq_wait); |
664 | spin_unlock(&delayed_refs->lock); | 668 | spin_unlock(&delayed_refs->lock); |
669 | if (need_ref_seq(for_cow, ref_root)) | ||
670 | btrfs_qgroup_record_ref(trans, &ref->node, extent_op); | ||
665 | 671 | ||
666 | return 0; | 672 | return 0; |
667 | } | 673 | } |
@@ -707,10 +713,12 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
707 | add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, | 713 | add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, |
708 | num_bytes, parent, ref_root, owner, offset, | 714 | num_bytes, parent, ref_root, owner, offset, |
709 | action, for_cow); | 715 | action, for_cow); |
710 | if (!is_fstree(ref_root) && | 716 | if (!need_ref_seq(for_cow, ref_root) && |
711 | waitqueue_active(&delayed_refs->seq_wait)) | 717 | waitqueue_active(&fs_info->tree_mod_seq_wait)) |
712 | wake_up(&delayed_refs->seq_wait); | 718 | wake_up(&fs_info->tree_mod_seq_wait); |
713 | spin_unlock(&delayed_refs->lock); | 719 | spin_unlock(&delayed_refs->lock); |
720 | if (need_ref_seq(for_cow, ref_root)) | ||
721 | btrfs_qgroup_record_ref(trans, &ref->node, extent_op); | ||
714 | 722 | ||
715 | return 0; | 723 | return 0; |
716 | } | 724 | } |
@@ -736,8 +744,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | |||
736 | num_bytes, BTRFS_UPDATE_DELAYED_HEAD, | 744 | num_bytes, BTRFS_UPDATE_DELAYED_HEAD, |
737 | extent_op->is_data); | 745 | extent_op->is_data); |
738 | 746 | ||
739 | if (waitqueue_active(&delayed_refs->seq_wait)) | 747 | if (waitqueue_active(&fs_info->tree_mod_seq_wait)) |
740 | wake_up(&delayed_refs->seq_wait); | 748 | wake_up(&fs_info->tree_mod_seq_wait); |
741 | spin_unlock(&delayed_refs->lock); | 749 | spin_unlock(&delayed_refs->lock); |
742 | return 0; | 750 | return 0; |
743 | } | 751 | } |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 413927fb995..0d7c90c366b 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -139,26 +139,6 @@ struct btrfs_delayed_ref_root { | |||
139 | int flushing; | 139 | int flushing; |
140 | 140 | ||
141 | u64 run_delayed_start; | 141 | u64 run_delayed_start; |
142 | |||
143 | /* | ||
144 | * seq number of delayed refs. We need to know if a backref was being | ||
145 | * added before the currently processed ref or afterwards. | ||
146 | */ | ||
147 | u64 seq; | ||
148 | |||
149 | /* | ||
150 | * seq_list holds a list of all seq numbers that are currently being | ||
151 | * added to the list. While walking backrefs (btrfs_find_all_roots, | ||
152 | * qgroups), which might take some time, no newer ref must be processed, | ||
153 | * as it might influence the outcome of the walk. | ||
154 | */ | ||
155 | struct list_head seq_head; | ||
156 | |||
157 | /* | ||
158 | * when the only refs we have in the list must not be processed, we want | ||
159 | * to wait for more refs to show up or for the end of backref walking. | ||
160 | */ | ||
161 | wait_queue_head_t seq_wait; | ||
162 | }; | 142 | }; |
163 | 143 | ||
164 | static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) | 144 | static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) |
@@ -195,34 +175,28 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | |||
195 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | 175 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, |
196 | struct list_head *cluster, u64 search_start); | 176 | struct list_head *cluster, u64 search_start); |
197 | 177 | ||
198 | static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) | 178 | int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, |
199 | { | 179 | struct btrfs_delayed_ref_root *delayed_refs, |
200 | assert_spin_locked(&delayed_refs->lock); | 180 | u64 seq); |
201 | ++delayed_refs->seq; | ||
202 | return delayed_refs->seq; | ||
203 | } | ||
204 | 181 | ||
205 | static inline void | 182 | /* |
206 | btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | 183 | * delayed refs with a ref_seq > 0 must be held back during backref walking. |
207 | struct seq_list *elem) | 184 | * this only applies to items in one of the fs-trees. for_cow items never need |
185 | * to be held back, so they won't get a ref_seq number. | ||
186 | */ | ||
187 | static inline int need_ref_seq(int for_cow, u64 rootid) | ||
208 | { | 188 | { |
209 | assert_spin_locked(&delayed_refs->lock); | 189 | if (for_cow) |
210 | elem->seq = delayed_refs->seq; | 190 | return 0; |
211 | list_add_tail(&elem->list, &delayed_refs->seq_head); | ||
212 | } | ||
213 | 191 | ||
214 | static inline void | 192 | if (rootid == BTRFS_FS_TREE_OBJECTID) |
215 | btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | 193 | return 1; |
216 | struct seq_list *elem) | ||
217 | { | ||
218 | spin_lock(&delayed_refs->lock); | ||
219 | list_del(&elem->list); | ||
220 | wake_up(&delayed_refs->seq_wait); | ||
221 | spin_unlock(&delayed_refs->lock); | ||
222 | } | ||
223 | 194 | ||
224 | int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | 195 | if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) |
225 | u64 seq); | 196 | return 1; |
197 | |||
198 | return 0; | ||
199 | } | ||
226 | 200 | ||
227 | /* | 201 | /* |
228 | * a node might live in a head or a regular ref, this lets you | 202 | * a node might live in a head or a regular ref, this lets you |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2936ca49b3b..502b20c56e8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -407,7 +407,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
407 | break; | 407 | break; |
408 | } | 408 | } |
409 | 409 | ||
410 | if (failed && !ret) | 410 | if (failed && !ret && failed_mirror) |
411 | repair_eb_io_failure(root, eb, failed_mirror); | 411 | repair_eb_io_failure(root, eb, failed_mirror); |
412 | 412 | ||
413 | return ret; | 413 | return ret; |
@@ -1182,6 +1182,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1182 | root->defrag_running = 0; | 1182 | root->defrag_running = 0; |
1183 | root->root_key.objectid = objectid; | 1183 | root->root_key.objectid = objectid; |
1184 | root->anon_dev = 0; | 1184 | root->anon_dev = 0; |
1185 | |||
1186 | spin_lock_init(&root->root_times_lock); | ||
1185 | } | 1187 | } |
1186 | 1188 | ||
1187 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, | 1189 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, |
@@ -1225,6 +1227,82 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) | |||
1225 | return root; | 1227 | return root; |
1226 | } | 1228 | } |
1227 | 1229 | ||
1230 | struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | ||
1231 | struct btrfs_fs_info *fs_info, | ||
1232 | u64 objectid) | ||
1233 | { | ||
1234 | struct extent_buffer *leaf; | ||
1235 | struct btrfs_root *tree_root = fs_info->tree_root; | ||
1236 | struct btrfs_root *root; | ||
1237 | struct btrfs_key key; | ||
1238 | int ret = 0; | ||
1239 | u64 bytenr; | ||
1240 | |||
1241 | root = btrfs_alloc_root(fs_info); | ||
1242 | if (!root) | ||
1243 | return ERR_PTR(-ENOMEM); | ||
1244 | |||
1245 | __setup_root(tree_root->nodesize, tree_root->leafsize, | ||
1246 | tree_root->sectorsize, tree_root->stripesize, | ||
1247 | root, fs_info, objectid); | ||
1248 | root->root_key.objectid = objectid; | ||
1249 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
1250 | root->root_key.offset = 0; | ||
1251 | |||
1252 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | ||
1253 | 0, objectid, NULL, 0, 0, 0); | ||
1254 | if (IS_ERR(leaf)) { | ||
1255 | ret = PTR_ERR(leaf); | ||
1256 | goto fail; | ||
1257 | } | ||
1258 | |||
1259 | bytenr = leaf->start; | ||
1260 | memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); | ||
1261 | btrfs_set_header_bytenr(leaf, leaf->start); | ||
1262 | btrfs_set_header_generation(leaf, trans->transid); | ||
1263 | btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); | ||
1264 | btrfs_set_header_owner(leaf, objectid); | ||
1265 | root->node = leaf; | ||
1266 | |||
1267 | write_extent_buffer(leaf, fs_info->fsid, | ||
1268 | (unsigned long)btrfs_header_fsid(leaf), | ||
1269 | BTRFS_FSID_SIZE); | ||
1270 | write_extent_buffer(leaf, fs_info->chunk_tree_uuid, | ||
1271 | (unsigned long)btrfs_header_chunk_tree_uuid(leaf), | ||
1272 | BTRFS_UUID_SIZE); | ||
1273 | btrfs_mark_buffer_dirty(leaf); | ||
1274 | |||
1275 | root->commit_root = btrfs_root_node(root); | ||
1276 | root->track_dirty = 1; | ||
1277 | |||
1278 | |||
1279 | root->root_item.flags = 0; | ||
1280 | root->root_item.byte_limit = 0; | ||
1281 | btrfs_set_root_bytenr(&root->root_item, leaf->start); | ||
1282 | btrfs_set_root_generation(&root->root_item, trans->transid); | ||
1283 | btrfs_set_root_level(&root->root_item, 0); | ||
1284 | btrfs_set_root_refs(&root->root_item, 1); | ||
1285 | btrfs_set_root_used(&root->root_item, leaf->len); | ||
1286 | btrfs_set_root_last_snapshot(&root->root_item, 0); | ||
1287 | btrfs_set_root_dirid(&root->root_item, 0); | ||
1288 | root->root_item.drop_level = 0; | ||
1289 | |||
1290 | key.objectid = objectid; | ||
1291 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
1292 | key.offset = 0; | ||
1293 | ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item); | ||
1294 | if (ret) | ||
1295 | goto fail; | ||
1296 | |||
1297 | btrfs_tree_unlock(leaf); | ||
1298 | |||
1299 | fail: | ||
1300 | if (ret) | ||
1301 | return ERR_PTR(ret); | ||
1302 | |||
1303 | return root; | ||
1304 | } | ||
1305 | |||
1228 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | 1306 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, |
1229 | struct btrfs_fs_info *fs_info) | 1307 | struct btrfs_fs_info *fs_info) |
1230 | { | 1308 | { |
@@ -1326,6 +1404,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1326 | u64 generation; | 1404 | u64 generation; |
1327 | u32 blocksize; | 1405 | u32 blocksize; |
1328 | int ret = 0; | 1406 | int ret = 0; |
1407 | int slot; | ||
1329 | 1408 | ||
1330 | root = btrfs_alloc_root(fs_info); | 1409 | root = btrfs_alloc_root(fs_info); |
1331 | if (!root) | 1410 | if (!root) |
@@ -1352,9 +1431,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1352 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | 1431 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); |
1353 | if (ret == 0) { | 1432 | if (ret == 0) { |
1354 | l = path->nodes[0]; | 1433 | l = path->nodes[0]; |
1355 | read_extent_buffer(l, &root->root_item, | 1434 | slot = path->slots[0]; |
1356 | btrfs_item_ptr_offset(l, path->slots[0]), | 1435 | btrfs_read_root_item(tree_root, l, slot, &root->root_item); |
1357 | sizeof(root->root_item)); | ||
1358 | memcpy(&root->root_key, location, sizeof(*location)); | 1436 | memcpy(&root->root_key, location, sizeof(*location)); |
1359 | } | 1437 | } |
1360 | btrfs_free_path(path); | 1438 | btrfs_free_path(path); |
@@ -1396,6 +1474,9 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | |||
1396 | return fs_info->dev_root; | 1474 | return fs_info->dev_root; |
1397 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) | 1475 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) |
1398 | return fs_info->csum_root; | 1476 | return fs_info->csum_root; |
1477 | if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID) | ||
1478 | return fs_info->quota_root ? fs_info->quota_root : | ||
1479 | ERR_PTR(-ENOENT); | ||
1399 | again: | 1480 | again: |
1400 | spin_lock(&fs_info->fs_roots_radix_lock); | 1481 | spin_lock(&fs_info->fs_roots_radix_lock); |
1401 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | 1482 | root = radix_tree_lookup(&fs_info->fs_roots_radix, |
@@ -1823,6 +1904,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) | |||
1823 | free_extent_buffer(info->extent_root->commit_root); | 1904 | free_extent_buffer(info->extent_root->commit_root); |
1824 | free_extent_buffer(info->csum_root->node); | 1905 | free_extent_buffer(info->csum_root->node); |
1825 | free_extent_buffer(info->csum_root->commit_root); | 1906 | free_extent_buffer(info->csum_root->commit_root); |
1907 | if (info->quota_root) { | ||
1908 | free_extent_buffer(info->quota_root->node); | ||
1909 | free_extent_buffer(info->quota_root->commit_root); | ||
1910 | } | ||
1826 | 1911 | ||
1827 | info->tree_root->node = NULL; | 1912 | info->tree_root->node = NULL; |
1828 | info->tree_root->commit_root = NULL; | 1913 | info->tree_root->commit_root = NULL; |
@@ -1832,6 +1917,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) | |||
1832 | info->extent_root->commit_root = NULL; | 1917 | info->extent_root->commit_root = NULL; |
1833 | info->csum_root->node = NULL; | 1918 | info->csum_root->node = NULL; |
1834 | info->csum_root->commit_root = NULL; | 1919 | info->csum_root->commit_root = NULL; |
1920 | if (info->quota_root) { | ||
1921 | info->quota_root->node = NULL; | ||
1922 | info->quota_root->commit_root = NULL; | ||
1923 | } | ||
1835 | 1924 | ||
1836 | if (chunk_root) { | 1925 | if (chunk_root) { |
1837 | free_extent_buffer(info->chunk_root->node); | 1926 | free_extent_buffer(info->chunk_root->node); |
@@ -1862,6 +1951,7 @@ int open_ctree(struct super_block *sb, | |||
1862 | struct btrfs_root *csum_root; | 1951 | struct btrfs_root *csum_root; |
1863 | struct btrfs_root *chunk_root; | 1952 | struct btrfs_root *chunk_root; |
1864 | struct btrfs_root *dev_root; | 1953 | struct btrfs_root *dev_root; |
1954 | struct btrfs_root *quota_root; | ||
1865 | struct btrfs_root *log_tree_root; | 1955 | struct btrfs_root *log_tree_root; |
1866 | int ret; | 1956 | int ret; |
1867 | int err = -EINVAL; | 1957 | int err = -EINVAL; |
@@ -1873,9 +1963,10 @@ int open_ctree(struct super_block *sb, | |||
1873 | csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info); | 1963 | csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info); |
1874 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); | 1964 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); |
1875 | dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); | 1965 | dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); |
1966 | quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info); | ||
1876 | 1967 | ||
1877 | if (!tree_root || !extent_root || !csum_root || | 1968 | if (!tree_root || !extent_root || !csum_root || |
1878 | !chunk_root || !dev_root) { | 1969 | !chunk_root || !dev_root || !quota_root) { |
1879 | err = -ENOMEM; | 1970 | err = -ENOMEM; |
1880 | goto fail; | 1971 | goto fail; |
1881 | } | 1972 | } |
@@ -1944,6 +2035,8 @@ int open_ctree(struct super_block *sb, | |||
1944 | fs_info->free_chunk_space = 0; | 2035 | fs_info->free_chunk_space = 0; |
1945 | fs_info->tree_mod_log = RB_ROOT; | 2036 | fs_info->tree_mod_log = RB_ROOT; |
1946 | 2037 | ||
2038 | init_waitqueue_head(&fs_info->tree_mod_seq_wait); | ||
2039 | |||
1947 | /* readahead state */ | 2040 | /* readahead state */ |
1948 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); | 2041 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); |
1949 | spin_lock_init(&fs_info->reada_lock); | 2042 | spin_lock_init(&fs_info->reada_lock); |
@@ -2032,6 +2125,13 @@ int open_ctree(struct super_block *sb, | |||
2032 | init_rwsem(&fs_info->cleanup_work_sem); | 2125 | init_rwsem(&fs_info->cleanup_work_sem); |
2033 | init_rwsem(&fs_info->subvol_sem); | 2126 | init_rwsem(&fs_info->subvol_sem); |
2034 | 2127 | ||
2128 | spin_lock_init(&fs_info->qgroup_lock); | ||
2129 | fs_info->qgroup_tree = RB_ROOT; | ||
2130 | INIT_LIST_HEAD(&fs_info->dirty_qgroups); | ||
2131 | fs_info->qgroup_seq = 1; | ||
2132 | fs_info->quota_enabled = 0; | ||
2133 | fs_info->pending_quota_state = 0; | ||
2134 | |||
2035 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | 2135 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); |
2036 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | 2136 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); |
2037 | 2137 | ||
@@ -2244,7 +2344,7 @@ int open_ctree(struct super_block *sb, | |||
2244 | ret |= btrfs_start_workers(&fs_info->caching_workers); | 2344 | ret |= btrfs_start_workers(&fs_info->caching_workers); |
2245 | ret |= btrfs_start_workers(&fs_info->readahead_workers); | 2345 | ret |= btrfs_start_workers(&fs_info->readahead_workers); |
2246 | if (ret) { | 2346 | if (ret) { |
2247 | ret = -ENOMEM; | 2347 | err = -ENOMEM; |
2248 | goto fail_sb_buffer; | 2348 | goto fail_sb_buffer; |
2249 | } | 2349 | } |
2250 | 2350 | ||
@@ -2356,6 +2456,17 @@ retry_root_backup: | |||
2356 | goto recovery_tree_root; | 2456 | goto recovery_tree_root; |
2357 | csum_root->track_dirty = 1; | 2457 | csum_root->track_dirty = 1; |
2358 | 2458 | ||
2459 | ret = find_and_setup_root(tree_root, fs_info, | ||
2460 | BTRFS_QUOTA_TREE_OBJECTID, quota_root); | ||
2461 | if (ret) { | ||
2462 | kfree(quota_root); | ||
2463 | quota_root = fs_info->quota_root = NULL; | ||
2464 | } else { | ||
2465 | quota_root->track_dirty = 1; | ||
2466 | fs_info->quota_enabled = 1; | ||
2467 | fs_info->pending_quota_state = 1; | ||
2468 | } | ||
2469 | |||
2359 | fs_info->generation = generation; | 2470 | fs_info->generation = generation; |
2360 | fs_info->last_trans_committed = generation; | 2471 | fs_info->last_trans_committed = generation; |
2361 | 2472 | ||
@@ -2415,6 +2526,9 @@ retry_root_backup: | |||
2415 | " integrity check module %s\n", sb->s_id); | 2526 | " integrity check module %s\n", sb->s_id); |
2416 | } | 2527 | } |
2417 | #endif | 2528 | #endif |
2529 | ret = btrfs_read_qgroup_config(fs_info); | ||
2530 | if (ret) | ||
2531 | goto fail_trans_kthread; | ||
2418 | 2532 | ||
2419 | /* do not make disk changes in broken FS */ | 2533 | /* do not make disk changes in broken FS */ |
2420 | if (btrfs_super_log_root(disk_super) != 0 && | 2534 | if (btrfs_super_log_root(disk_super) != 0 && |
@@ -2425,7 +2539,7 @@ retry_root_backup: | |||
2425 | printk(KERN_WARNING "Btrfs log replay required " | 2539 | printk(KERN_WARNING "Btrfs log replay required " |
2426 | "on RO media\n"); | 2540 | "on RO media\n"); |
2427 | err = -EIO; | 2541 | err = -EIO; |
2428 | goto fail_trans_kthread; | 2542 | goto fail_qgroup; |
2429 | } | 2543 | } |
2430 | blocksize = | 2544 | blocksize = |
2431 | btrfs_level_size(tree_root, | 2545 | btrfs_level_size(tree_root, |
@@ -2434,7 +2548,7 @@ retry_root_backup: | |||
2434 | log_tree_root = btrfs_alloc_root(fs_info); | 2548 | log_tree_root = btrfs_alloc_root(fs_info); |
2435 | if (!log_tree_root) { | 2549 | if (!log_tree_root) { |
2436 | err = -ENOMEM; | 2550 | err = -ENOMEM; |
2437 | goto fail_trans_kthread; | 2551 | goto fail_qgroup; |
2438 | } | 2552 | } |
2439 | 2553 | ||
2440 | __setup_root(nodesize, leafsize, sectorsize, stripesize, | 2554 | __setup_root(nodesize, leafsize, sectorsize, stripesize, |
@@ -2466,15 +2580,15 @@ retry_root_backup: | |||
2466 | 2580 | ||
2467 | if (!(sb->s_flags & MS_RDONLY)) { | 2581 | if (!(sb->s_flags & MS_RDONLY)) { |
2468 | ret = btrfs_cleanup_fs_roots(fs_info); | 2582 | ret = btrfs_cleanup_fs_roots(fs_info); |
2469 | if (ret) { | 2583 | if (ret) |
2470 | } | 2584 | goto fail_trans_kthread; |
2471 | 2585 | ||
2472 | ret = btrfs_recover_relocation(tree_root); | 2586 | ret = btrfs_recover_relocation(tree_root); |
2473 | if (ret < 0) { | 2587 | if (ret < 0) { |
2474 | printk(KERN_WARNING | 2588 | printk(KERN_WARNING |
2475 | "btrfs: failed to recover relocation\n"); | 2589 | "btrfs: failed to recover relocation\n"); |
2476 | err = -EINVAL; | 2590 | err = -EINVAL; |
2477 | goto fail_trans_kthread; | 2591 | goto fail_qgroup; |
2478 | } | 2592 | } |
2479 | } | 2593 | } |
2480 | 2594 | ||
@@ -2484,10 +2598,10 @@ retry_root_backup: | |||
2484 | 2598 | ||
2485 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); | 2599 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); |
2486 | if (!fs_info->fs_root) | 2600 | if (!fs_info->fs_root) |
2487 | goto fail_trans_kthread; | 2601 | goto fail_qgroup; |
2488 | if (IS_ERR(fs_info->fs_root)) { | 2602 | if (IS_ERR(fs_info->fs_root)) { |
2489 | err = PTR_ERR(fs_info->fs_root); | 2603 | err = PTR_ERR(fs_info->fs_root); |
2490 | goto fail_trans_kthread; | 2604 | goto fail_qgroup; |
2491 | } | 2605 | } |
2492 | 2606 | ||
2493 | if (sb->s_flags & MS_RDONLY) | 2607 | if (sb->s_flags & MS_RDONLY) |
@@ -2511,6 +2625,8 @@ retry_root_backup: | |||
2511 | 2625 | ||
2512 | return 0; | 2626 | return 0; |
2513 | 2627 | ||
2628 | fail_qgroup: | ||
2629 | btrfs_free_qgroup_config(fs_info); | ||
2514 | fail_trans_kthread: | 2630 | fail_trans_kthread: |
2515 | kthread_stop(fs_info->transaction_kthread); | 2631 | kthread_stop(fs_info->transaction_kthread); |
2516 | fail_cleaner: | 2632 | fail_cleaner: |
@@ -3109,6 +3225,8 @@ int close_ctree(struct btrfs_root *root) | |||
3109 | fs_info->closing = 2; | 3225 | fs_info->closing = 2; |
3110 | smp_mb(); | 3226 | smp_mb(); |
3111 | 3227 | ||
3228 | btrfs_free_qgroup_config(root->fs_info); | ||
3229 | |||
3112 | if (fs_info->delalloc_bytes) { | 3230 | if (fs_info->delalloc_bytes) { |
3113 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", | 3231 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", |
3114 | (unsigned long long)fs_info->delalloc_bytes); | 3232 | (unsigned long long)fs_info->delalloc_bytes); |
@@ -3128,6 +3246,10 @@ int close_ctree(struct btrfs_root *root) | |||
3128 | free_extent_buffer(fs_info->dev_root->commit_root); | 3246 | free_extent_buffer(fs_info->dev_root->commit_root); |
3129 | free_extent_buffer(fs_info->csum_root->node); | 3247 | free_extent_buffer(fs_info->csum_root->node); |
3130 | free_extent_buffer(fs_info->csum_root->commit_root); | 3248 | free_extent_buffer(fs_info->csum_root->commit_root); |
3249 | if (fs_info->quota_root) { | ||
3250 | free_extent_buffer(fs_info->quota_root->node); | ||
3251 | free_extent_buffer(fs_info->quota_root->commit_root); | ||
3252 | } | ||
3131 | 3253 | ||
3132 | btrfs_free_block_groups(fs_info); | 3254 | btrfs_free_block_groups(fs_info); |
3133 | 3255 | ||
@@ -3258,7 +3380,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
3258 | return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | 3380 | return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); |
3259 | } | 3381 | } |
3260 | 3382 | ||
3261 | static int btree_lock_page_hook(struct page *page, void *data, | 3383 | int btree_lock_page_hook(struct page *page, void *data, |
3262 | void (*flush_fn)(void *)) | 3384 | void (*flush_fn)(void *)) |
3263 | { | 3385 | { |
3264 | struct inode *inode = page->mapping->host; | 3386 | struct inode *inode = page->mapping->host; |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 05b3fab39f7..95e147eea23 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -89,6 +89,12 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
89 | int btrfs_cleanup_transaction(struct btrfs_root *root); | 89 | int btrfs_cleanup_transaction(struct btrfs_root *root); |
90 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, | 90 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, |
91 | struct btrfs_root *root); | 91 | struct btrfs_root *root); |
92 | void btrfs_abort_devices(struct btrfs_root *root); | ||
93 | struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | ||
94 | struct btrfs_fs_info *fs_info, | ||
95 | u64 objectid); | ||
96 | int btree_lock_page_hook(struct page *page, void *data, | ||
97 | void (*flush_fn)(void *)); | ||
92 | 98 | ||
93 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 99 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
94 | void btrfs_init_lockdep(void); | 100 | void btrfs_init_lockdep(void); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6e1d36702ff..4e1b153b7c4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #include "locking.h" | 34 | #include "locking.h" |
35 | #include "free-space-cache.h" | 35 | #include "free-space-cache.h" |
36 | 36 | ||
37 | #undef SCRAMBLE_DELAYED_REFS | ||
38 | |||
37 | /* | 39 | /* |
38 | * control flags for do_chunk_alloc's force field | 40 | * control flags for do_chunk_alloc's force field |
39 | * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk | 41 | * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk |
@@ -2217,6 +2219,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2217 | struct btrfs_delayed_ref_node *ref; | 2219 | struct btrfs_delayed_ref_node *ref; |
2218 | struct btrfs_delayed_ref_head *locked_ref = NULL; | 2220 | struct btrfs_delayed_ref_head *locked_ref = NULL; |
2219 | struct btrfs_delayed_extent_op *extent_op; | 2221 | struct btrfs_delayed_extent_op *extent_op; |
2222 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
2220 | int ret; | 2223 | int ret; |
2221 | int count = 0; | 2224 | int count = 0; |
2222 | int must_insert_reserved = 0; | 2225 | int must_insert_reserved = 0; |
@@ -2255,7 +2258,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2255 | ref = select_delayed_ref(locked_ref); | 2258 | ref = select_delayed_ref(locked_ref); |
2256 | 2259 | ||
2257 | if (ref && ref->seq && | 2260 | if (ref && ref->seq && |
2258 | btrfs_check_delayed_seq(delayed_refs, ref->seq)) { | 2261 | btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { |
2259 | /* | 2262 | /* |
2260 | * there are still refs with lower seq numbers in the | 2263 | * there are still refs with lower seq numbers in the |
2261 | * process of being added. Don't run this ref yet. | 2264 | * process of being added. Don't run this ref yet. |
@@ -2337,7 +2340,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2337 | } | 2340 | } |
2338 | 2341 | ||
2339 | next: | 2342 | next: |
2340 | do_chunk_alloc(trans, root->fs_info->extent_root, | 2343 | do_chunk_alloc(trans, fs_info->extent_root, |
2341 | 2 * 1024 * 1024, | 2344 | 2 * 1024 * 1024, |
2342 | btrfs_get_alloc_profile(root, 0), | 2345 | btrfs_get_alloc_profile(root, 0), |
2343 | CHUNK_ALLOC_NO_FORCE); | 2346 | CHUNK_ALLOC_NO_FORCE); |
@@ -2347,21 +2350,99 @@ next: | |||
2347 | return count; | 2350 | return count; |
2348 | } | 2351 | } |
2349 | 2352 | ||
2350 | static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, | 2353 | static void wait_for_more_refs(struct btrfs_fs_info *fs_info, |
2354 | struct btrfs_delayed_ref_root *delayed_refs, | ||
2351 | unsigned long num_refs, | 2355 | unsigned long num_refs, |
2352 | struct list_head *first_seq) | 2356 | struct list_head *first_seq) |
2353 | { | 2357 | { |
2354 | spin_unlock(&delayed_refs->lock); | 2358 | spin_unlock(&delayed_refs->lock); |
2355 | pr_debug("waiting for more refs (num %ld, first %p)\n", | 2359 | pr_debug("waiting for more refs (num %ld, first %p)\n", |
2356 | num_refs, first_seq); | 2360 | num_refs, first_seq); |
2357 | wait_event(delayed_refs->seq_wait, | 2361 | wait_event(fs_info->tree_mod_seq_wait, |
2358 | num_refs != delayed_refs->num_entries || | 2362 | num_refs != delayed_refs->num_entries || |
2359 | delayed_refs->seq_head.next != first_seq); | 2363 | fs_info->tree_mod_seq_list.next != first_seq); |
2360 | pr_debug("done waiting for more refs (num %ld, first %p)\n", | 2364 | pr_debug("done waiting for more refs (num %ld, first %p)\n", |
2361 | delayed_refs->num_entries, delayed_refs->seq_head.next); | 2365 | delayed_refs->num_entries, fs_info->tree_mod_seq_list.next); |
2362 | spin_lock(&delayed_refs->lock); | 2366 | spin_lock(&delayed_refs->lock); |
2363 | } | 2367 | } |
2364 | 2368 | ||
2369 | #ifdef SCRAMBLE_DELAYED_REFS | ||
2370 | /* | ||
2371 | * Normally delayed refs get processed in ascending bytenr order. This | ||
2372 | * correlates in most cases to the order added. To expose dependencies on this | ||
2373 | * order, we start to process the tree in the middle instead of the beginning | ||
2374 | */ | ||
2375 | static u64 find_middle(struct rb_root *root) | ||
2376 | { | ||
2377 | struct rb_node *n = root->rb_node; | ||
2378 | struct btrfs_delayed_ref_node *entry; | ||
2379 | int alt = 1; | ||
2380 | u64 middle; | ||
2381 | u64 first = 0, last = 0; | ||
2382 | |||
2383 | n = rb_first(root); | ||
2384 | if (n) { | ||
2385 | entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); | ||
2386 | first = entry->bytenr; | ||
2387 | } | ||
2388 | n = rb_last(root); | ||
2389 | if (n) { | ||
2390 | entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); | ||
2391 | last = entry->bytenr; | ||
2392 | } | ||
2393 | n = root->rb_node; | ||
2394 | |||
2395 | while (n) { | ||
2396 | entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); | ||
2397 | WARN_ON(!entry->in_tree); | ||
2398 | |||
2399 | middle = entry->bytenr; | ||
2400 | |||
2401 | if (alt) | ||
2402 | n = n->rb_left; | ||
2403 | else | ||
2404 | n = n->rb_right; | ||
2405 | |||
2406 | alt = 1 - alt; | ||
2407 | } | ||
2408 | return middle; | ||
2409 | } | ||
2410 | #endif | ||
2411 | |||
2412 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | ||
2413 | struct btrfs_fs_info *fs_info) | ||
2414 | { | ||
2415 | struct qgroup_update *qgroup_update; | ||
2416 | int ret = 0; | ||
2417 | |||
2418 | if (list_empty(&trans->qgroup_ref_list) != | ||
2419 | !trans->delayed_ref_elem.seq) { | ||
2420 | /* list without seq or seq without list */ | ||
2421 | printk(KERN_ERR "btrfs: qgroup accounting update error, list is%s empty, seq is %llu\n", | ||
2422 | list_empty(&trans->qgroup_ref_list) ? "" : " not", | ||
2423 | trans->delayed_ref_elem.seq); | ||
2424 | BUG(); | ||
2425 | } | ||
2426 | |||
2427 | if (!trans->delayed_ref_elem.seq) | ||
2428 | return 0; | ||
2429 | |||
2430 | while (!list_empty(&trans->qgroup_ref_list)) { | ||
2431 | qgroup_update = list_first_entry(&trans->qgroup_ref_list, | ||
2432 | struct qgroup_update, list); | ||
2433 | list_del(&qgroup_update->list); | ||
2434 | if (!ret) | ||
2435 | ret = btrfs_qgroup_account_ref( | ||
2436 | trans, fs_info, qgroup_update->node, | ||
2437 | qgroup_update->extent_op); | ||
2438 | kfree(qgroup_update); | ||
2439 | } | ||
2440 | |||
2441 | btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem); | ||
2442 | |||
2443 | return ret; | ||
2444 | } | ||
2445 | |||
2365 | /* | 2446 | /* |
2366 | * this starts processing the delayed reference count updates and | 2447 | * this starts processing the delayed reference count updates and |
2367 | * extent insertions we have queued up so far. count can be | 2448 | * extent insertions we have queued up so far. count can be |
@@ -2398,11 +2479,18 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2398 | 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), | 2479 | 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), |
2399 | CHUNK_ALLOC_NO_FORCE); | 2480 | CHUNK_ALLOC_NO_FORCE); |
2400 | 2481 | ||
2482 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
2483 | |||
2401 | delayed_refs = &trans->transaction->delayed_refs; | 2484 | delayed_refs = &trans->transaction->delayed_refs; |
2402 | INIT_LIST_HEAD(&cluster); | 2485 | INIT_LIST_HEAD(&cluster); |
2403 | again: | 2486 | again: |
2404 | consider_waiting = 0; | 2487 | consider_waiting = 0; |
2405 | spin_lock(&delayed_refs->lock); | 2488 | spin_lock(&delayed_refs->lock); |
2489 | |||
2490 | #ifdef SCRAMBLE_DELAYED_REFS | ||
2491 | delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); | ||
2492 | #endif | ||
2493 | |||
2406 | if (count == 0) { | 2494 | if (count == 0) { |
2407 | count = delayed_refs->num_entries * 2; | 2495 | count = delayed_refs->num_entries * 2; |
2408 | run_most = 1; | 2496 | run_most = 1; |
@@ -2437,7 +2525,7 @@ again: | |||
2437 | num_refs = delayed_refs->num_entries; | 2525 | num_refs = delayed_refs->num_entries; |
2438 | first_seq = root->fs_info->tree_mod_seq_list.next; | 2526 | first_seq = root->fs_info->tree_mod_seq_list.next; |
2439 | } else { | 2527 | } else { |
2440 | wait_for_more_refs(delayed_refs, | 2528 | wait_for_more_refs(root->fs_info, delayed_refs, |
2441 | num_refs, first_seq); | 2529 | num_refs, first_seq); |
2442 | /* | 2530 | /* |
2443 | * after waiting, things have changed. we | 2531 | * after waiting, things have changed. we |
@@ -2502,6 +2590,7 @@ again: | |||
2502 | } | 2590 | } |
2503 | out: | 2591 | out: |
2504 | spin_unlock(&delayed_refs->lock); | 2592 | spin_unlock(&delayed_refs->lock); |
2593 | assert_qgroups_uptodate(trans); | ||
2505 | return 0; | 2594 | return 0; |
2506 | } | 2595 | } |
2507 | 2596 | ||
@@ -2581,8 +2670,10 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, | |||
2581 | 2670 | ||
2582 | node = rb_prev(node); | 2671 | node = rb_prev(node); |
2583 | if (node) { | 2672 | if (node) { |
2673 | int seq = ref->seq; | ||
2674 | |||
2584 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | 2675 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); |
2585 | if (ref->bytenr == bytenr) | 2676 | if (ref->bytenr == bytenr && ref->seq == seq) |
2586 | goto out_unlock; | 2677 | goto out_unlock; |
2587 | } | 2678 | } |
2588 | 2679 | ||
@@ -2903,8 +2994,13 @@ again: | |||
2903 | } | 2994 | } |
2904 | 2995 | ||
2905 | spin_lock(&block_group->lock); | 2996 | spin_lock(&block_group->lock); |
2906 | if (block_group->cached != BTRFS_CACHE_FINISHED) { | 2997 | if (block_group->cached != BTRFS_CACHE_FINISHED || |
2907 | /* We're not cached, don't bother trying to write stuff out */ | 2998 | !btrfs_test_opt(root, SPACE_CACHE)) { |
2999 | /* | ||
3000 | * don't bother trying to write stuff out _if_ | ||
3001 | * a) we're not cached, | ||
3002 | * b) we're with nospace_cache mount option. | ||
3003 | */ | ||
2908 | dcs = BTRFS_DC_WRITTEN; | 3004 | dcs = BTRFS_DC_WRITTEN; |
2909 | spin_unlock(&block_group->lock); | 3005 | spin_unlock(&block_group->lock); |
2910 | goto out_put; | 3006 | goto out_put; |
@@ -3134,6 +3230,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3134 | init_waitqueue_head(&found->wait); | 3230 | init_waitqueue_head(&found->wait); |
3135 | *space_info = found; | 3231 | *space_info = found; |
3136 | list_add_rcu(&found->list, &info->space_info); | 3232 | list_add_rcu(&found->list, &info->space_info); |
3233 | if (flags & BTRFS_BLOCK_GROUP_DATA) | ||
3234 | info->data_sinfo = found; | ||
3137 | return 0; | 3235 | return 0; |
3138 | } | 3236 | } |
3139 | 3237 | ||
@@ -3263,12 +3361,6 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | |||
3263 | return get_alloc_profile(root, flags); | 3361 | return get_alloc_profile(root, flags); |
3264 | } | 3362 | } |
3265 | 3363 | ||
3266 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | ||
3267 | { | ||
3268 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, | ||
3269 | BTRFS_BLOCK_GROUP_DATA); | ||
3270 | } | ||
3271 | |||
3272 | /* | 3364 | /* |
3273 | * This will check the space that the inode allocates from to make sure we have | 3365 | * This will check the space that the inode allocates from to make sure we have |
3274 | * enough space for bytes. | 3366 | * enough space for bytes. |
@@ -3277,6 +3369,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) | |||
3277 | { | 3369 | { |
3278 | struct btrfs_space_info *data_sinfo; | 3370 | struct btrfs_space_info *data_sinfo; |
3279 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3371 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3372 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3280 | u64 used; | 3373 | u64 used; |
3281 | int ret = 0, committed = 0, alloc_chunk = 1; | 3374 | int ret = 0, committed = 0, alloc_chunk = 1; |
3282 | 3375 | ||
@@ -3289,7 +3382,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) | |||
3289 | committed = 1; | 3382 | committed = 1; |
3290 | } | 3383 | } |
3291 | 3384 | ||
3292 | data_sinfo = BTRFS_I(inode)->space_info; | 3385 | data_sinfo = fs_info->data_sinfo; |
3293 | if (!data_sinfo) | 3386 | if (!data_sinfo) |
3294 | goto alloc; | 3387 | goto alloc; |
3295 | 3388 | ||
@@ -3330,10 +3423,9 @@ alloc: | |||
3330 | goto commit_trans; | 3423 | goto commit_trans; |
3331 | } | 3424 | } |
3332 | 3425 | ||
3333 | if (!data_sinfo) { | 3426 | if (!data_sinfo) |
3334 | btrfs_set_inode_space_info(root, inode); | 3427 | data_sinfo = fs_info->data_sinfo; |
3335 | data_sinfo = BTRFS_I(inode)->space_info; | 3428 | |
3336 | } | ||
3337 | goto again; | 3429 | goto again; |
3338 | } | 3430 | } |
3339 | 3431 | ||
@@ -3380,7 +3472,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | |||
3380 | /* make sure bytes are sectorsize aligned */ | 3472 | /* make sure bytes are sectorsize aligned */ |
3381 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3473 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
3382 | 3474 | ||
3383 | data_sinfo = BTRFS_I(inode)->space_info; | 3475 | data_sinfo = root->fs_info->data_sinfo; |
3384 | spin_lock(&data_sinfo->lock); | 3476 | spin_lock(&data_sinfo->lock); |
3385 | data_sinfo->bytes_may_use -= bytes; | 3477 | data_sinfo->bytes_may_use -= bytes; |
3386 | trace_btrfs_space_reservation(root->fs_info, "space_info", | 3478 | trace_btrfs_space_reservation(root->fs_info, "space_info", |
@@ -3586,89 +3678,58 @@ out: | |||
3586 | /* | 3678 | /* |
3587 | * shrink metadata reservation for delalloc | 3679 | * shrink metadata reservation for delalloc |
3588 | */ | 3680 | */ |
3589 | static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, | 3681 | static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, |
3590 | bool wait_ordered) | 3682 | bool wait_ordered) |
3591 | { | 3683 | { |
3592 | struct btrfs_block_rsv *block_rsv; | 3684 | struct btrfs_block_rsv *block_rsv; |
3593 | struct btrfs_space_info *space_info; | 3685 | struct btrfs_space_info *space_info; |
3594 | struct btrfs_trans_handle *trans; | 3686 | struct btrfs_trans_handle *trans; |
3595 | u64 reserved; | 3687 | u64 delalloc_bytes; |
3596 | u64 max_reclaim; | 3688 | u64 max_reclaim; |
3597 | u64 reclaimed = 0; | ||
3598 | long time_left; | 3689 | long time_left; |
3599 | unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; | 3690 | unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; |
3600 | int loops = 0; | 3691 | int loops = 0; |
3601 | unsigned long progress; | ||
3602 | 3692 | ||
3603 | trans = (struct btrfs_trans_handle *)current->journal_info; | 3693 | trans = (struct btrfs_trans_handle *)current->journal_info; |
3604 | block_rsv = &root->fs_info->delalloc_block_rsv; | 3694 | block_rsv = &root->fs_info->delalloc_block_rsv; |
3605 | space_info = block_rsv->space_info; | 3695 | space_info = block_rsv->space_info; |
3606 | 3696 | ||
3607 | smp_mb(); | 3697 | smp_mb(); |
3608 | reserved = space_info->bytes_may_use; | 3698 | delalloc_bytes = root->fs_info->delalloc_bytes; |
3609 | progress = space_info->reservation_progress; | 3699 | if (delalloc_bytes == 0) { |
3610 | |||
3611 | if (reserved == 0) | ||
3612 | return 0; | ||
3613 | |||
3614 | smp_mb(); | ||
3615 | if (root->fs_info->delalloc_bytes == 0) { | ||
3616 | if (trans) | 3700 | if (trans) |
3617 | return 0; | 3701 | return; |
3618 | btrfs_wait_ordered_extents(root, 0, 0); | 3702 | btrfs_wait_ordered_extents(root, 0, 0); |
3619 | return 0; | 3703 | return; |
3620 | } | 3704 | } |
3621 | 3705 | ||
3622 | max_reclaim = min(reserved, to_reclaim); | 3706 | while (delalloc_bytes && loops < 3) { |
3623 | nr_pages = max_t(unsigned long, nr_pages, | 3707 | max_reclaim = min(delalloc_bytes, to_reclaim); |
3624 | max_reclaim >> PAGE_CACHE_SHIFT); | 3708 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; |
3625 | while (loops < 1024) { | ||
3626 | /* have the flusher threads jump in and do some IO */ | ||
3627 | smp_mb(); | ||
3628 | nr_pages = min_t(unsigned long, nr_pages, | ||
3629 | root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); | ||
3630 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, | 3709 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, |
3631 | WB_REASON_FS_FREE_SPACE); | 3710 | WB_REASON_FS_FREE_SPACE); |
3632 | 3711 | ||
3633 | spin_lock(&space_info->lock); | 3712 | spin_lock(&space_info->lock); |
3634 | if (reserved > space_info->bytes_may_use) | 3713 | if (space_info->bytes_used + space_info->bytes_reserved + |
3635 | reclaimed += reserved - space_info->bytes_may_use; | 3714 | space_info->bytes_pinned + space_info->bytes_readonly + |
3636 | reserved = space_info->bytes_may_use; | 3715 | space_info->bytes_may_use + orig <= |
3716 | space_info->total_bytes) { | ||
3717 | spin_unlock(&space_info->lock); | ||
3718 | break; | ||
3719 | } | ||
3637 | spin_unlock(&space_info->lock); | 3720 | spin_unlock(&space_info->lock); |
3638 | 3721 | ||
3639 | loops++; | 3722 | loops++; |
3640 | |||
3641 | if (reserved == 0 || reclaimed >= max_reclaim) | ||
3642 | break; | ||
3643 | |||
3644 | if (trans && trans->transaction->blocked) | ||
3645 | return -EAGAIN; | ||
3646 | |||
3647 | if (wait_ordered && !trans) { | 3723 | if (wait_ordered && !trans) { |
3648 | btrfs_wait_ordered_extents(root, 0, 0); | 3724 | btrfs_wait_ordered_extents(root, 0, 0); |
3649 | } else { | 3725 | } else { |
3650 | time_left = schedule_timeout_interruptible(1); | 3726 | time_left = schedule_timeout_killable(1); |
3651 | |||
3652 | /* We were interrupted, exit */ | ||
3653 | if (time_left) | 3727 | if (time_left) |
3654 | break; | 3728 | break; |
3655 | } | 3729 | } |
3656 | 3730 | smp_mb(); | |
3657 | /* we've kicked the IO a few times, if anything has been freed, | 3731 | delalloc_bytes = root->fs_info->delalloc_bytes; |
3658 | * exit. There is no sense in looping here for a long time | ||
3659 | * when we really need to commit the transaction, or there are | ||
3660 | * just too many writers without enough free space | ||
3661 | */ | ||
3662 | |||
3663 | if (loops > 3) { | ||
3664 | smp_mb(); | ||
3665 | if (progress != space_info->reservation_progress) | ||
3666 | break; | ||
3667 | } | ||
3668 | |||
3669 | } | 3732 | } |
3670 | |||
3671 | return reclaimed >= to_reclaim; | ||
3672 | } | 3733 | } |
3673 | 3734 | ||
3674 | /** | 3735 | /** |
@@ -3728,6 +3789,58 @@ commit: | |||
3728 | return btrfs_commit_transaction(trans, root); | 3789 | return btrfs_commit_transaction(trans, root); |
3729 | } | 3790 | } |
3730 | 3791 | ||
3792 | enum flush_state { | ||
3793 | FLUSH_DELALLOC = 1, | ||
3794 | FLUSH_DELALLOC_WAIT = 2, | ||
3795 | FLUSH_DELAYED_ITEMS_NR = 3, | ||
3796 | FLUSH_DELAYED_ITEMS = 4, | ||
3797 | COMMIT_TRANS = 5, | ||
3798 | }; | ||
3799 | |||
3800 | static int flush_space(struct btrfs_root *root, | ||
3801 | struct btrfs_space_info *space_info, u64 num_bytes, | ||
3802 | u64 orig_bytes, int state) | ||
3803 | { | ||
3804 | struct btrfs_trans_handle *trans; | ||
3805 | int nr; | ||
3806 | int ret = 0; | ||
3807 | |||
3808 | switch (state) { | ||
3809 | case FLUSH_DELALLOC: | ||
3810 | case FLUSH_DELALLOC_WAIT: | ||
3811 | shrink_delalloc(root, num_bytes, orig_bytes, | ||
3812 | state == FLUSH_DELALLOC_WAIT); | ||
3813 | break; | ||
3814 | case FLUSH_DELAYED_ITEMS_NR: | ||
3815 | case FLUSH_DELAYED_ITEMS: | ||
3816 | if (state == FLUSH_DELAYED_ITEMS_NR) { | ||
3817 | u64 bytes = btrfs_calc_trans_metadata_size(root, 1); | ||
3818 | |||
3819 | nr = (int)div64_u64(num_bytes, bytes); | ||
3820 | if (!nr) | ||
3821 | nr = 1; | ||
3822 | nr *= 2; | ||
3823 | } else { | ||
3824 | nr = -1; | ||
3825 | } | ||
3826 | trans = btrfs_join_transaction(root); | ||
3827 | if (IS_ERR(trans)) { | ||
3828 | ret = PTR_ERR(trans); | ||
3829 | break; | ||
3830 | } | ||
3831 | ret = btrfs_run_delayed_items_nr(trans, root, nr); | ||
3832 | btrfs_end_transaction(trans, root); | ||
3833 | break; | ||
3834 | case COMMIT_TRANS: | ||
3835 | ret = may_commit_transaction(root, space_info, orig_bytes, 0); | ||
3836 | break; | ||
3837 | default: | ||
3838 | ret = -ENOSPC; | ||
3839 | break; | ||
3840 | } | ||
3841 | |||
3842 | return ret; | ||
3843 | } | ||
3731 | /** | 3844 | /** |
3732 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | 3845 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space |
3733 | * @root - the root we're allocating for | 3846 | * @root - the root we're allocating for |
@@ -3749,11 +3862,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root, | |||
3749 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3862 | struct btrfs_space_info *space_info = block_rsv->space_info; |
3750 | u64 used; | 3863 | u64 used; |
3751 | u64 num_bytes = orig_bytes; | 3864 | u64 num_bytes = orig_bytes; |
3752 | int retries = 0; | 3865 | int flush_state = FLUSH_DELALLOC; |
3753 | int ret = 0; | 3866 | int ret = 0; |
3754 | bool committed = false; | ||
3755 | bool flushing = false; | 3867 | bool flushing = false; |
3756 | bool wait_ordered = false; | 3868 | bool committed = false; |
3757 | 3869 | ||
3758 | again: | 3870 | again: |
3759 | ret = 0; | 3871 | ret = 0; |
@@ -3812,9 +3924,8 @@ again: | |||
3812 | * amount plus the amount of bytes that we need for this | 3924 | * amount plus the amount of bytes that we need for this |
3813 | * reservation. | 3925 | * reservation. |
3814 | */ | 3926 | */ |
3815 | wait_ordered = true; | ||
3816 | num_bytes = used - space_info->total_bytes + | 3927 | num_bytes = used - space_info->total_bytes + |
3817 | (orig_bytes * (retries + 1)); | 3928 | (orig_bytes * 2); |
3818 | } | 3929 | } |
3819 | 3930 | ||
3820 | if (ret) { | 3931 | if (ret) { |
@@ -3867,8 +3978,6 @@ again: | |||
3867 | trace_btrfs_space_reservation(root->fs_info, | 3978 | trace_btrfs_space_reservation(root->fs_info, |
3868 | "space_info", space_info->flags, orig_bytes, 1); | 3979 | "space_info", space_info->flags, orig_bytes, 1); |
3869 | ret = 0; | 3980 | ret = 0; |
3870 | } else { | ||
3871 | wait_ordered = true; | ||
3872 | } | 3981 | } |
3873 | } | 3982 | } |
3874 | 3983 | ||
@@ -3887,36 +3996,13 @@ again: | |||
3887 | if (!ret || !flush) | 3996 | if (!ret || !flush) |
3888 | goto out; | 3997 | goto out; |
3889 | 3998 | ||
3890 | /* | 3999 | ret = flush_space(root, space_info, num_bytes, orig_bytes, |
3891 | * We do synchronous shrinking since we don't actually unreserve | 4000 | flush_state); |
3892 | * metadata until after the IO is completed. | 4001 | flush_state++; |
3893 | */ | 4002 | if (!ret) |
3894 | ret = shrink_delalloc(root, num_bytes, wait_ordered); | ||
3895 | if (ret < 0) | ||
3896 | goto out; | ||
3897 | |||
3898 | ret = 0; | ||
3899 | |||
3900 | /* | ||
3901 | * So if we were overcommitted it's possible that somebody else flushed | ||
3902 | * out enough space and we simply didn't have enough space to reclaim, | ||
3903 | * so go back around and try again. | ||
3904 | */ | ||
3905 | if (retries < 2) { | ||
3906 | wait_ordered = true; | ||
3907 | retries++; | ||
3908 | goto again; | 4003 | goto again; |
3909 | } | 4004 | else if (flush_state <= COMMIT_TRANS) |
3910 | |||
3911 | ret = -ENOSPC; | ||
3912 | if (committed) | ||
3913 | goto out; | ||
3914 | |||
3915 | ret = may_commit_transaction(root, space_info, orig_bytes, 0); | ||
3916 | if (!ret) { | ||
3917 | committed = true; | ||
3918 | goto again; | 4005 | goto again; |
3919 | } | ||
3920 | 4006 | ||
3921 | out: | 4007 | out: |
3922 | if (flushing) { | 4008 | if (flushing) { |
@@ -3934,7 +4020,10 @@ static struct btrfs_block_rsv *get_block_rsv( | |||
3934 | { | 4020 | { |
3935 | struct btrfs_block_rsv *block_rsv = NULL; | 4021 | struct btrfs_block_rsv *block_rsv = NULL; |
3936 | 4022 | ||
3937 | if (root->ref_cows || root == root->fs_info->csum_root) | 4023 | if (root->ref_cows) |
4024 | block_rsv = trans->block_rsv; | ||
4025 | |||
4026 | if (root == root->fs_info->csum_root && trans->adding_csums) | ||
3938 | block_rsv = trans->block_rsv; | 4027 | block_rsv = trans->block_rsv; |
3939 | 4028 | ||
3940 | if (!block_rsv) | 4029 | if (!block_rsv) |
@@ -4286,6 +4375,9 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
4286 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 4375 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
4287 | struct btrfs_root *root) | 4376 | struct btrfs_root *root) |
4288 | { | 4377 | { |
4378 | if (!trans->block_rsv) | ||
4379 | return; | ||
4380 | |||
4289 | if (!trans->bytes_reserved) | 4381 | if (!trans->bytes_reserved) |
4290 | return; | 4382 | return; |
4291 | 4383 | ||
@@ -4444,7 +4536,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4444 | int ret; | 4536 | int ret; |
4445 | 4537 | ||
4446 | /* Need to be holding the i_mutex here if we aren't free space cache */ | 4538 | /* Need to be holding the i_mutex here if we aren't free space cache */ |
4447 | if (btrfs_is_free_space_inode(root, inode)) | 4539 | if (btrfs_is_free_space_inode(inode)) |
4448 | flush = 0; | 4540 | flush = 0; |
4449 | 4541 | ||
4450 | if (flush && btrfs_transaction_in_commit(root->fs_info)) | 4542 | if (flush && btrfs_transaction_in_commit(root->fs_info)) |
@@ -4476,6 +4568,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4476 | csum_bytes = BTRFS_I(inode)->csum_bytes; | 4568 | csum_bytes = BTRFS_I(inode)->csum_bytes; |
4477 | spin_unlock(&BTRFS_I(inode)->lock); | 4569 | spin_unlock(&BTRFS_I(inode)->lock); |
4478 | 4570 | ||
4571 | if (root->fs_info->quota_enabled) { | ||
4572 | ret = btrfs_qgroup_reserve(root, num_bytes + | ||
4573 | nr_extents * root->leafsize); | ||
4574 | if (ret) | ||
4575 | return ret; | ||
4576 | } | ||
4577 | |||
4479 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); | 4578 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); |
4480 | if (ret) { | 4579 | if (ret) { |
4481 | u64 to_free = 0; | 4580 | u64 to_free = 0; |
@@ -4554,6 +4653,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
4554 | 4653 | ||
4555 | trace_btrfs_space_reservation(root->fs_info, "delalloc", | 4654 | trace_btrfs_space_reservation(root->fs_info, "delalloc", |
4556 | btrfs_ino(inode), to_free, 0); | 4655 | btrfs_ino(inode), to_free, 0); |
4656 | if (root->fs_info->quota_enabled) { | ||
4657 | btrfs_qgroup_free(root, num_bytes + | ||
4658 | dropped * root->leafsize); | ||
4659 | } | ||
4660 | |||
4557 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 4661 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, |
4558 | to_free); | 4662 | to_free); |
4559 | } | 4663 | } |
@@ -5190,8 +5294,9 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
5190 | rb_erase(&head->node.rb_node, &delayed_refs->root); | 5294 | rb_erase(&head->node.rb_node, &delayed_refs->root); |
5191 | 5295 | ||
5192 | delayed_refs->num_entries--; | 5296 | delayed_refs->num_entries--; |
5193 | if (waitqueue_active(&delayed_refs->seq_wait)) | 5297 | smp_mb(); |
5194 | wake_up(&delayed_refs->seq_wait); | 5298 | if (waitqueue_active(&root->fs_info->tree_mod_seq_wait)) |
5299 | wake_up(&root->fs_info->tree_mod_seq_wait); | ||
5195 | 5300 | ||
5196 | /* | 5301 | /* |
5197 | * we don't take a ref on the node because we're removing it from the | 5302 | * we don't take a ref on the node because we're removing it from the |
@@ -5748,7 +5853,11 @@ loop: | |||
5748 | ret = do_chunk_alloc(trans, root, num_bytes + | 5853 | ret = do_chunk_alloc(trans, root, num_bytes + |
5749 | 2 * 1024 * 1024, data, | 5854 | 2 * 1024 * 1024, data, |
5750 | CHUNK_ALLOC_LIMITED); | 5855 | CHUNK_ALLOC_LIMITED); |
5751 | if (ret < 0) { | 5856 | /* |
5857 | * Do not bail out on ENOSPC since we | ||
5858 | * can do more things. | ||
5859 | */ | ||
5860 | if (ret < 0 && ret != -ENOSPC) { | ||
5752 | btrfs_abort_transaction(trans, | 5861 | btrfs_abort_transaction(trans, |
5753 | root, ret); | 5862 | root, ret); |
5754 | goto out; | 5863 | goto out; |
@@ -5816,13 +5925,13 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
5816 | again: | 5925 | again: |
5817 | list_for_each_entry(cache, &info->block_groups[index], list) { | 5926 | list_for_each_entry(cache, &info->block_groups[index], list) { |
5818 | spin_lock(&cache->lock); | 5927 | spin_lock(&cache->lock); |
5819 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " | 5928 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", |
5820 | "%llu pinned %llu reserved\n", | ||
5821 | (unsigned long long)cache->key.objectid, | 5929 | (unsigned long long)cache->key.objectid, |
5822 | (unsigned long long)cache->key.offset, | 5930 | (unsigned long long)cache->key.offset, |
5823 | (unsigned long long)btrfs_block_group_used(&cache->item), | 5931 | (unsigned long long)btrfs_block_group_used(&cache->item), |
5824 | (unsigned long long)cache->pinned, | 5932 | (unsigned long long)cache->pinned, |
5825 | (unsigned long long)cache->reserved); | 5933 | (unsigned long long)cache->reserved, |
5934 | cache->ro ? "[readonly]" : ""); | ||
5826 | btrfs_dump_free_space(cache, bytes); | 5935 | btrfs_dump_free_space(cache, bytes); |
5827 | spin_unlock(&cache->lock); | 5936 | spin_unlock(&cache->lock); |
5828 | } | 5937 | } |
@@ -7610,8 +7719,21 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7610 | INIT_LIST_HEAD(&cache->list); | 7719 | INIT_LIST_HEAD(&cache->list); |
7611 | INIT_LIST_HEAD(&cache->cluster_list); | 7720 | INIT_LIST_HEAD(&cache->cluster_list); |
7612 | 7721 | ||
7613 | if (need_clear) | 7722 | if (need_clear) { |
7723 | /* | ||
7724 | * When we mount with old space cache, we need to | ||
7725 | * set BTRFS_DC_CLEAR and set dirty flag. | ||
7726 | * | ||
7727 | * a) Setting 'BTRFS_DC_CLEAR' makes sure that we | ||
7728 | * truncate the old free space cache inode and | ||
7729 | * setup a new one. | ||
7730 | * b) Setting 'dirty flag' makes sure that we flush | ||
7731 | * the new space cache info onto disk. | ||
7732 | */ | ||
7614 | cache->disk_cache_state = BTRFS_DC_CLEAR; | 7733 | cache->disk_cache_state = BTRFS_DC_CLEAR; |
7734 | if (btrfs_test_opt(root, SPACE_CACHE)) | ||
7735 | cache->dirty = 1; | ||
7736 | } | ||
7615 | 7737 | ||
7616 | read_extent_buffer(leaf, &cache->item, | 7738 | read_extent_buffer(leaf, &cache->item, |
7617 | btrfs_item_ptr_offset(leaf, path->slots[0]), | 7739 | btrfs_item_ptr_offset(leaf, path->slots[0]), |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index deafe19c34b..45c81bb4ac8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1919,7 +1919,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | |||
1919 | return -EIO; | 1919 | return -EIO; |
1920 | } | 1920 | } |
1921 | 1921 | ||
1922 | printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu " | 1922 | printk_ratelimited_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu " |
1923 | "(dev %s sector %llu)\n", page->mapping->host->i_ino, | 1923 | "(dev %s sector %llu)\n", page->mapping->host->i_ino, |
1924 | start, rcu_str_deref(dev->name), sector); | 1924 | start, rcu_str_deref(dev->name), sector); |
1925 | 1925 | ||
@@ -3078,8 +3078,15 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb, | |||
3078 | } | 3078 | } |
3079 | } | 3079 | } |
3080 | 3080 | ||
3081 | /* | ||
3082 | * We need to do this to prevent races in people who check if the eb is | ||
3083 | * under IO since we can end up having no IO bits set for a short period | ||
3084 | * of time. | ||
3085 | */ | ||
3086 | spin_lock(&eb->refs_lock); | ||
3081 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | 3087 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { |
3082 | set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); | 3088 | set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); |
3089 | spin_unlock(&eb->refs_lock); | ||
3083 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | 3090 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); |
3084 | spin_lock(&fs_info->delalloc_lock); | 3091 | spin_lock(&fs_info->delalloc_lock); |
3085 | if (fs_info->dirty_metadata_bytes >= eb->len) | 3092 | if (fs_info->dirty_metadata_bytes >= eb->len) |
@@ -3088,6 +3095,8 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb, | |||
3088 | WARN_ON(1); | 3095 | WARN_ON(1); |
3089 | spin_unlock(&fs_info->delalloc_lock); | 3096 | spin_unlock(&fs_info->delalloc_lock); |
3090 | ret = 1; | 3097 | ret = 1; |
3098 | } else { | ||
3099 | spin_unlock(&eb->refs_lock); | ||
3091 | } | 3100 | } |
3092 | 3101 | ||
3093 | btrfs_tree_unlock(eb); | 3102 | btrfs_tree_unlock(eb); |
@@ -3558,19 +3567,38 @@ int extent_readpages(struct extent_io_tree *tree, | |||
3558 | struct bio *bio = NULL; | 3567 | struct bio *bio = NULL; |
3559 | unsigned page_idx; | 3568 | unsigned page_idx; |
3560 | unsigned long bio_flags = 0; | 3569 | unsigned long bio_flags = 0; |
3570 | struct page *pagepool[16]; | ||
3571 | struct page *page; | ||
3572 | int i = 0; | ||
3573 | int nr = 0; | ||
3561 | 3574 | ||
3562 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 3575 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
3563 | struct page *page = list_entry(pages->prev, struct page, lru); | 3576 | page = list_entry(pages->prev, struct page, lru); |
3564 | 3577 | ||
3565 | prefetchw(&page->flags); | 3578 | prefetchw(&page->flags); |
3566 | list_del(&page->lru); | 3579 | list_del(&page->lru); |
3567 | if (!add_to_page_cache_lru(page, mapping, | 3580 | if (add_to_page_cache_lru(page, mapping, |
3568 | page->index, GFP_NOFS)) { | 3581 | page->index, GFP_NOFS)) { |
3569 | __extent_read_full_page(tree, page, get_extent, | 3582 | page_cache_release(page); |
3570 | &bio, 0, &bio_flags); | 3583 | continue; |
3571 | } | 3584 | } |
3572 | page_cache_release(page); | 3585 | |
3586 | pagepool[nr++] = page; | ||
3587 | if (nr < ARRAY_SIZE(pagepool)) | ||
3588 | continue; | ||
3589 | for (i = 0; i < nr; i++) { | ||
3590 | __extent_read_full_page(tree, pagepool[i], get_extent, | ||
3591 | &bio, 0, &bio_flags); | ||
3592 | page_cache_release(pagepool[i]); | ||
3593 | } | ||
3594 | nr = 0; | ||
3595 | } | ||
3596 | for (i = 0; i < nr; i++) { | ||
3597 | __extent_read_full_page(tree, pagepool[i], get_extent, | ||
3598 | &bio, 0, &bio_flags); | ||
3599 | page_cache_release(pagepool[i]); | ||
3573 | } | 3600 | } |
3601 | |||
3574 | BUG_ON(!list_empty(pages)); | 3602 | BUG_ON(!list_empty(pages)); |
3575 | if (bio) | 3603 | if (bio) |
3576 | return submit_one_bio(READ, bio, 0, bio_flags); | 3604 | return submit_one_bio(READ, bio, 0, bio_flags); |
@@ -4124,11 +4152,10 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) | |||
4124 | * So bump the ref count first, then set the bit. If someone | 4152 | * So bump the ref count first, then set the bit. If someone |
4125 | * beat us to it, drop the ref we added. | 4153 | * beat us to it, drop the ref we added. |
4126 | */ | 4154 | */ |
4127 | if (!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) { | 4155 | spin_lock(&eb->refs_lock); |
4156 | if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) | ||
4128 | atomic_inc(&eb->refs); | 4157 | atomic_inc(&eb->refs); |
4129 | if (test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) | 4158 | spin_unlock(&eb->refs_lock); |
4130 | atomic_dec(&eb->refs); | ||
4131 | } | ||
4132 | } | 4159 | } |
4133 | 4160 | ||
4134 | static void mark_extent_buffer_accessed(struct extent_buffer *eb) | 4161 | static void mark_extent_buffer_accessed(struct extent_buffer *eb) |
@@ -4240,9 +4267,7 @@ again: | |||
4240 | goto free_eb; | 4267 | goto free_eb; |
4241 | } | 4268 | } |
4242 | /* add one reference for the tree */ | 4269 | /* add one reference for the tree */ |
4243 | spin_lock(&eb->refs_lock); | ||
4244 | check_buffer_tree_ref(eb); | 4270 | check_buffer_tree_ref(eb); |
4245 | spin_unlock(&eb->refs_lock); | ||
4246 | spin_unlock(&tree->buffer_lock); | 4271 | spin_unlock(&tree->buffer_lock); |
4247 | radix_tree_preload_end(); | 4272 | radix_tree_preload_end(); |
4248 | 4273 | ||
@@ -4301,7 +4326,7 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) | |||
4301 | } | 4326 | } |
4302 | 4327 | ||
4303 | /* Expects to have eb->eb_lock already held */ | 4328 | /* Expects to have eb->eb_lock already held */ |
4304 | static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask) | 4329 | static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask) |
4305 | { | 4330 | { |
4306 | WARN_ON(atomic_read(&eb->refs) == 0); | 4331 | WARN_ON(atomic_read(&eb->refs) == 0); |
4307 | if (atomic_dec_and_test(&eb->refs)) { | 4332 | if (atomic_dec_and_test(&eb->refs)) { |
@@ -4322,9 +4347,11 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask) | |||
4322 | btrfs_release_extent_buffer_page(eb, 0); | 4347 | btrfs_release_extent_buffer_page(eb, 0); |
4323 | 4348 | ||
4324 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); | 4349 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); |
4325 | return; | 4350 | return 1; |
4326 | } | 4351 | } |
4327 | spin_unlock(&eb->refs_lock); | 4352 | spin_unlock(&eb->refs_lock); |
4353 | |||
4354 | return 0; | ||
4328 | } | 4355 | } |
4329 | 4356 | ||
4330 | void free_extent_buffer(struct extent_buffer *eb) | 4357 | void free_extent_buffer(struct extent_buffer *eb) |
@@ -4963,7 +4990,6 @@ int try_release_extent_buffer(struct page *page, gfp_t mask) | |||
4963 | spin_unlock(&eb->refs_lock); | 4990 | spin_unlock(&eb->refs_lock); |
4964 | return 0; | 4991 | return 0; |
4965 | } | 4992 | } |
4966 | release_extent_buffer(eb, mask); | ||
4967 | 4993 | ||
4968 | return 1; | 4994 | return release_extent_buffer(eb, mask); |
4969 | } | 4995 | } |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 5d158d32023..b45b9de0c21 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -183,7 +183,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
183 | * read from the commit root and sidestep a nasty deadlock | 183 | * read from the commit root and sidestep a nasty deadlock |
184 | * between reading the free space cache and updating the csum tree. | 184 | * between reading the free space cache and updating the csum tree. |
185 | */ | 185 | */ |
186 | if (btrfs_is_free_space_inode(root, inode)) { | 186 | if (btrfs_is_free_space_inode(inode)) { |
187 | path->search_commit_root = 1; | 187 | path->search_commit_root = 1; |
188 | path->skip_locking = 1; | 188 | path->skip_locking = 1; |
189 | } | 189 | } |
@@ -690,6 +690,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
690 | return -ENOMEM; | 690 | return -ENOMEM; |
691 | 691 | ||
692 | sector_sum = sums->sums; | 692 | sector_sum = sums->sums; |
693 | trans->adding_csums = 1; | ||
693 | again: | 694 | again: |
694 | next_offset = (u64)-1; | 695 | next_offset = (u64)-1; |
695 | found_next = 0; | 696 | found_next = 0; |
@@ -853,6 +854,7 @@ next_sector: | |||
853 | goto again; | 854 | goto again; |
854 | } | 855 | } |
855 | out: | 856 | out: |
857 | trans->adding_csums = 0; | ||
856 | btrfs_free_path(path); | 858 | btrfs_free_path(path); |
857 | return ret; | 859 | return ret; |
858 | 860 | ||
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6c4e2baa929..6b10acfc2f5 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -1968,7 +1968,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | |||
1968 | 1968 | ||
1969 | for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { | 1969 | for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { |
1970 | info = rb_entry(n, struct btrfs_free_space, offset_index); | 1970 | info = rb_entry(n, struct btrfs_free_space, offset_index); |
1971 | if (info->bytes >= bytes) | 1971 | if (info->bytes >= bytes && !block_group->ro) |
1972 | count++; | 1972 | count++; |
1973 | printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", | 1973 | printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", |
1974 | (unsigned long long)info->offset, | 1974 | (unsigned long long)info->offset, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fb8d671d00e..48bdfd2591c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -825,7 +825,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
825 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 825 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
826 | int ret = 0; | 826 | int ret = 0; |
827 | 827 | ||
828 | BUG_ON(btrfs_is_free_space_inode(root, inode)); | 828 | BUG_ON(btrfs_is_free_space_inode(inode)); |
829 | trans = btrfs_join_transaction(root); | 829 | trans = btrfs_join_transaction(root); |
830 | if (IS_ERR(trans)) { | 830 | if (IS_ERR(trans)) { |
831 | extent_clear_unlock_delalloc(inode, | 831 | extent_clear_unlock_delalloc(inode, |
@@ -1010,7 +1010,7 @@ static noinline void async_cow_submit(struct btrfs_work *work) | |||
1010 | atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); | 1010 | atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); |
1011 | 1011 | ||
1012 | if (atomic_read(&root->fs_info->async_delalloc_pages) < | 1012 | if (atomic_read(&root->fs_info->async_delalloc_pages) < |
1013 | 5 * 1042 * 1024 && | 1013 | 5 * 1024 * 1024 && |
1014 | waitqueue_active(&root->fs_info->async_submit_wait)) | 1014 | waitqueue_active(&root->fs_info->async_submit_wait)) |
1015 | wake_up(&root->fs_info->async_submit_wait); | 1015 | wake_up(&root->fs_info->async_submit_wait); |
1016 | 1016 | ||
@@ -1035,7 +1035,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
1035 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1035 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1036 | unsigned long nr_pages; | 1036 | unsigned long nr_pages; |
1037 | u64 cur_end; | 1037 | u64 cur_end; |
1038 | int limit = 10 * 1024 * 1042; | 1038 | int limit = 10 * 1024 * 1024; |
1039 | 1039 | ||
1040 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, | 1040 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, |
1041 | 1, 0, NULL, GFP_NOFS); | 1041 | 1, 0, NULL, GFP_NOFS); |
@@ -1153,7 +1153,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, | |||
1153 | return -ENOMEM; | 1153 | return -ENOMEM; |
1154 | } | 1154 | } |
1155 | 1155 | ||
1156 | nolock = btrfs_is_free_space_inode(root, inode); | 1156 | nolock = btrfs_is_free_space_inode(inode); |
1157 | 1157 | ||
1158 | if (nolock) | 1158 | if (nolock) |
1159 | trans = btrfs_join_transaction_nolock(root); | 1159 | trans = btrfs_join_transaction_nolock(root); |
@@ -1466,7 +1466,7 @@ static void btrfs_set_bit_hook(struct inode *inode, | |||
1466 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1466 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1467 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1467 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1468 | u64 len = state->end + 1 - state->start; | 1468 | u64 len = state->end + 1 - state->start; |
1469 | bool do_list = !btrfs_is_free_space_inode(root, inode); | 1469 | bool do_list = !btrfs_is_free_space_inode(inode); |
1470 | 1470 | ||
1471 | if (*bits & EXTENT_FIRST_DELALLOC) { | 1471 | if (*bits & EXTENT_FIRST_DELALLOC) { |
1472 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1472 | *bits &= ~EXTENT_FIRST_DELALLOC; |
@@ -1501,7 +1501,7 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
1501 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1501 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1502 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1502 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1503 | u64 len = state->end + 1 - state->start; | 1503 | u64 len = state->end + 1 - state->start; |
1504 | bool do_list = !btrfs_is_free_space_inode(root, inode); | 1504 | bool do_list = !btrfs_is_free_space_inode(inode); |
1505 | 1505 | ||
1506 | if (*bits & EXTENT_FIRST_DELALLOC) { | 1506 | if (*bits & EXTENT_FIRST_DELALLOC) { |
1507 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1507 | *bits &= ~EXTENT_FIRST_DELALLOC; |
@@ -1612,7 +1612,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1612 | 1612 | ||
1613 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 1613 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
1614 | 1614 | ||
1615 | if (btrfs_is_free_space_inode(root, inode)) | 1615 | if (btrfs_is_free_space_inode(inode)) |
1616 | metadata = 2; | 1616 | metadata = 2; |
1617 | 1617 | ||
1618 | if (!(rw & REQ_WRITE)) { | 1618 | if (!(rw & REQ_WRITE)) { |
@@ -1869,7 +1869,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
1869 | int ret; | 1869 | int ret; |
1870 | bool nolock; | 1870 | bool nolock; |
1871 | 1871 | ||
1872 | nolock = btrfs_is_free_space_inode(root, inode); | 1872 | nolock = btrfs_is_free_space_inode(inode); |
1873 | 1873 | ||
1874 | if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) { | 1874 | if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) { |
1875 | ret = -EIO; | 1875 | ret = -EIO; |
@@ -2007,7 +2007,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
2007 | ordered_extent->work.func = finish_ordered_fn; | 2007 | ordered_extent->work.func = finish_ordered_fn; |
2008 | ordered_extent->work.flags = 0; | 2008 | ordered_extent->work.flags = 0; |
2009 | 2009 | ||
2010 | if (btrfs_is_free_space_inode(root, inode)) | 2010 | if (btrfs_is_free_space_inode(inode)) |
2011 | workers = &root->fs_info->endio_freespace_worker; | 2011 | workers = &root->fs_info->endio_freespace_worker; |
2012 | else | 2012 | else |
2013 | workers = &root->fs_info->endio_write_workers; | 2013 | workers = &root->fs_info->endio_write_workers; |
@@ -2732,8 +2732,10 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2732 | * The data relocation inode should also be directly updated | 2732 | * The data relocation inode should also be directly updated |
2733 | * without delay | 2733 | * without delay |
2734 | */ | 2734 | */ |
2735 | if (!btrfs_is_free_space_inode(root, inode) | 2735 | if (!btrfs_is_free_space_inode(inode) |
2736 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { | 2736 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { |
2737 | btrfs_update_root_times(trans, root); | ||
2738 | |||
2737 | ret = btrfs_delayed_update_inode(trans, root, inode); | 2739 | ret = btrfs_delayed_update_inode(trans, root, inode); |
2738 | if (!ret) | 2740 | if (!ret) |
2739 | btrfs_set_inode_last_trans(trans, inode); | 2741 | btrfs_set_inode_last_trans(trans, inode); |
@@ -2833,7 +2835,7 @@ err: | |||
2833 | inode_inc_iversion(inode); | 2835 | inode_inc_iversion(inode); |
2834 | inode_inc_iversion(dir); | 2836 | inode_inc_iversion(dir); |
2835 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 2837 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
2836 | btrfs_update_inode(trans, root, dir); | 2838 | ret = btrfs_update_inode(trans, root, dir); |
2837 | out: | 2839 | out: |
2838 | return ret; | 2840 | return ret; |
2839 | } | 2841 | } |
@@ -3743,7 +3745,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
3743 | 3745 | ||
3744 | truncate_inode_pages(&inode->i_data, 0); | 3746 | truncate_inode_pages(&inode->i_data, 0); |
3745 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || | 3747 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || |
3746 | btrfs_is_free_space_inode(root, inode))) | 3748 | btrfs_is_free_space_inode(inode))) |
3747 | goto no_delete; | 3749 | goto no_delete; |
3748 | 3750 | ||
3749 | if (is_bad_inode(inode)) { | 3751 | if (is_bad_inode(inode)) { |
@@ -4082,7 +4084,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) | |||
4082 | struct btrfs_iget_args *args = p; | 4084 | struct btrfs_iget_args *args = p; |
4083 | inode->i_ino = args->ino; | 4085 | inode->i_ino = args->ino; |
4084 | BTRFS_I(inode)->root = args->root; | 4086 | BTRFS_I(inode)->root = args->root; |
4085 | btrfs_set_inode_space_info(args->root, inode); | ||
4086 | return 0; | 4087 | return 0; |
4087 | } | 4088 | } |
4088 | 4089 | ||
@@ -4457,7 +4458,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4457 | if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) | 4458 | if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) |
4458 | return 0; | 4459 | return 0; |
4459 | 4460 | ||
4460 | if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) | 4461 | if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(inode)) |
4461 | nolock = true; | 4462 | nolock = true; |
4462 | 4463 | ||
4463 | if (wbc->sync_mode == WB_SYNC_ALL) { | 4464 | if (wbc->sync_mode == WB_SYNC_ALL) { |
@@ -4518,6 +4519,11 @@ int btrfs_dirty_inode(struct inode *inode) | |||
4518 | static int btrfs_update_time(struct inode *inode, struct timespec *now, | 4519 | static int btrfs_update_time(struct inode *inode, struct timespec *now, |
4519 | int flags) | 4520 | int flags) |
4520 | { | 4521 | { |
4522 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
4523 | |||
4524 | if (btrfs_root_readonly(root)) | ||
4525 | return -EROFS; | ||
4526 | |||
4521 | if (flags & S_VERSION) | 4527 | if (flags & S_VERSION) |
4522 | inode_inc_iversion(inode); | 4528 | inode_inc_iversion(inode); |
4523 | if (flags & S_CTIME) | 4529 | if (flags & S_CTIME) |
@@ -4662,7 +4668,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4662 | BTRFS_I(inode)->root = root; | 4668 | BTRFS_I(inode)->root = root; |
4663 | BTRFS_I(inode)->generation = trans->transid; | 4669 | BTRFS_I(inode)->generation = trans->transid; |
4664 | inode->i_generation = BTRFS_I(inode)->generation; | 4670 | inode->i_generation = BTRFS_I(inode)->generation; |
4665 | btrfs_set_inode_space_info(root, inode); | ||
4666 | 4671 | ||
4667 | if (S_ISDIR(mode)) | 4672 | if (S_ISDIR(mode)) |
4668 | owner = 0; | 4673 | owner = 0; |
@@ -4690,6 +4695,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4690 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 4695 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
4691 | inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 4696 | inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
4692 | struct btrfs_inode_item); | 4697 | struct btrfs_inode_item); |
4698 | memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item, | ||
4699 | sizeof(*inode_item)); | ||
4693 | fill_inode_item(trans, path->nodes[0], inode_item, inode); | 4700 | fill_inode_item(trans, path->nodes[0], inode_item, inode); |
4694 | 4701 | ||
4695 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, | 4702 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, |
@@ -4723,6 +4730,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4723 | trace_btrfs_inode_new(inode); | 4730 | trace_btrfs_inode_new(inode); |
4724 | btrfs_set_inode_last_trans(trans, inode); | 4731 | btrfs_set_inode_last_trans(trans, inode); |
4725 | 4732 | ||
4733 | btrfs_update_root_times(trans, root); | ||
4734 | |||
4726 | return inode; | 4735 | return inode; |
4727 | fail: | 4736 | fail: |
4728 | if (dir) | 4737 | if (dir) |
@@ -6939,7 +6948,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6939 | return NULL; | 6948 | return NULL; |
6940 | 6949 | ||
6941 | ei->root = NULL; | 6950 | ei->root = NULL; |
6942 | ei->space_info = NULL; | ||
6943 | ei->generation = 0; | 6951 | ei->generation = 0; |
6944 | ei->last_trans = 0; | 6952 | ei->last_trans = 0; |
6945 | ei->last_sub_trans = 0; | 6953 | ei->last_sub_trans = 0; |
@@ -7046,7 +7054,7 @@ int btrfs_drop_inode(struct inode *inode) | |||
7046 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7054 | struct btrfs_root *root = BTRFS_I(inode)->root; |
7047 | 7055 | ||
7048 | if (btrfs_root_refs(&root->root_item) == 0 && | 7056 | if (btrfs_root_refs(&root->root_item) == 0 && |
7049 | !btrfs_is_free_space_inode(root, inode)) | 7057 | !btrfs_is_free_space_inode(inode)) |
7050 | return 1; | 7058 | return 1; |
7051 | else | 7059 | else |
7052 | return generic_drop_inode(inode); | 7060 | return generic_drop_inode(inode); |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1e9f6c019ad..43f0012016e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/vmalloc.h> | 41 | #include <linux/vmalloc.h> |
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/blkdev.h> | 43 | #include <linux/blkdev.h> |
44 | #include <linux/uuid.h> | ||
44 | #include "compat.h" | 45 | #include "compat.h" |
45 | #include "ctree.h" | 46 | #include "ctree.h" |
46 | #include "disk-io.h" | 47 | #include "disk-io.h" |
@@ -53,6 +54,7 @@ | |||
53 | #include "inode-map.h" | 54 | #include "inode-map.h" |
54 | #include "backref.h" | 55 | #include "backref.h" |
55 | #include "rcu-string.h" | 56 | #include "rcu-string.h" |
57 | #include "send.h" | ||
56 | 58 | ||
57 | /* Mask out flags that are inappropriate for the given type of inode. */ | 59 | /* Mask out flags that are inappropriate for the given type of inode. */ |
58 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) | 60 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) |
@@ -336,7 +338,8 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) | |||
336 | static noinline int create_subvol(struct btrfs_root *root, | 338 | static noinline int create_subvol(struct btrfs_root *root, |
337 | struct dentry *dentry, | 339 | struct dentry *dentry, |
338 | char *name, int namelen, | 340 | char *name, int namelen, |
339 | u64 *async_transid) | 341 | u64 *async_transid, |
342 | struct btrfs_qgroup_inherit **inherit) | ||
340 | { | 343 | { |
341 | struct btrfs_trans_handle *trans; | 344 | struct btrfs_trans_handle *trans; |
342 | struct btrfs_key key; | 345 | struct btrfs_key key; |
@@ -346,11 +349,13 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
346 | struct btrfs_root *new_root; | 349 | struct btrfs_root *new_root; |
347 | struct dentry *parent = dentry->d_parent; | 350 | struct dentry *parent = dentry->d_parent; |
348 | struct inode *dir; | 351 | struct inode *dir; |
352 | struct timespec cur_time = CURRENT_TIME; | ||
349 | int ret; | 353 | int ret; |
350 | int err; | 354 | int err; |
351 | u64 objectid; | 355 | u64 objectid; |
352 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; | 356 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; |
353 | u64 index = 0; | 357 | u64 index = 0; |
358 | uuid_le new_uuid; | ||
354 | 359 | ||
355 | ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); | 360 | ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); |
356 | if (ret) | 361 | if (ret) |
@@ -368,6 +373,11 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
368 | if (IS_ERR(trans)) | 373 | if (IS_ERR(trans)) |
369 | return PTR_ERR(trans); | 374 | return PTR_ERR(trans); |
370 | 375 | ||
376 | ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, | ||
377 | inherit ? *inherit : NULL); | ||
378 | if (ret) | ||
379 | goto fail; | ||
380 | |||
371 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 381 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
372 | 0, objectid, NULL, 0, 0, 0); | 382 | 0, objectid, NULL, 0, 0, 0); |
373 | if (IS_ERR(leaf)) { | 383 | if (IS_ERR(leaf)) { |
@@ -389,8 +399,9 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
389 | BTRFS_UUID_SIZE); | 399 | BTRFS_UUID_SIZE); |
390 | btrfs_mark_buffer_dirty(leaf); | 400 | btrfs_mark_buffer_dirty(leaf); |
391 | 401 | ||
402 | memset(&root_item, 0, sizeof(root_item)); | ||
403 | |||
392 | inode_item = &root_item.inode; | 404 | inode_item = &root_item.inode; |
393 | memset(inode_item, 0, sizeof(*inode_item)); | ||
394 | inode_item->generation = cpu_to_le64(1); | 405 | inode_item->generation = cpu_to_le64(1); |
395 | inode_item->size = cpu_to_le64(3); | 406 | inode_item->size = cpu_to_le64(3); |
396 | inode_item->nlink = cpu_to_le32(1); | 407 | inode_item->nlink = cpu_to_le32(1); |
@@ -408,8 +419,15 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
408 | btrfs_set_root_used(&root_item, leaf->len); | 419 | btrfs_set_root_used(&root_item, leaf->len); |
409 | btrfs_set_root_last_snapshot(&root_item, 0); | 420 | btrfs_set_root_last_snapshot(&root_item, 0); |
410 | 421 | ||
411 | memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); | 422 | btrfs_set_root_generation_v2(&root_item, |
412 | root_item.drop_level = 0; | 423 | btrfs_root_generation(&root_item)); |
424 | uuid_le_gen(&new_uuid); | ||
425 | memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); | ||
426 | root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); | ||
427 | root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec); | ||
428 | root_item.ctime = root_item.otime; | ||
429 | btrfs_set_root_ctransid(&root_item, trans->transid); | ||
430 | btrfs_set_root_otransid(&root_item, trans->transid); | ||
413 | 431 | ||
414 | btrfs_tree_unlock(leaf); | 432 | btrfs_tree_unlock(leaf); |
415 | free_extent_buffer(leaf); | 433 | free_extent_buffer(leaf); |
@@ -484,7 +502,7 @@ fail: | |||
484 | 502 | ||
485 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | 503 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, |
486 | char *name, int namelen, u64 *async_transid, | 504 | char *name, int namelen, u64 *async_transid, |
487 | bool readonly) | 505 | bool readonly, struct btrfs_qgroup_inherit **inherit) |
488 | { | 506 | { |
489 | struct inode *inode; | 507 | struct inode *inode; |
490 | struct btrfs_pending_snapshot *pending_snapshot; | 508 | struct btrfs_pending_snapshot *pending_snapshot; |
@@ -502,6 +520,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
502 | pending_snapshot->dentry = dentry; | 520 | pending_snapshot->dentry = dentry; |
503 | pending_snapshot->root = root; | 521 | pending_snapshot->root = root; |
504 | pending_snapshot->readonly = readonly; | 522 | pending_snapshot->readonly = readonly; |
523 | if (inherit) { | ||
524 | pending_snapshot->inherit = *inherit; | ||
525 | *inherit = NULL; /* take responsibility to free it */ | ||
526 | } | ||
505 | 527 | ||
506 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | 528 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); |
507 | if (IS_ERR(trans)) { | 529 | if (IS_ERR(trans)) { |
@@ -635,7 +657,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | |||
635 | static noinline int btrfs_mksubvol(struct path *parent, | 657 | static noinline int btrfs_mksubvol(struct path *parent, |
636 | char *name, int namelen, | 658 | char *name, int namelen, |
637 | struct btrfs_root *snap_src, | 659 | struct btrfs_root *snap_src, |
638 | u64 *async_transid, bool readonly) | 660 | u64 *async_transid, bool readonly, |
661 | struct btrfs_qgroup_inherit **inherit) | ||
639 | { | 662 | { |
640 | struct inode *dir = parent->dentry->d_inode; | 663 | struct inode *dir = parent->dentry->d_inode; |
641 | struct dentry *dentry; | 664 | struct dentry *dentry; |
@@ -652,13 +675,9 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
652 | if (dentry->d_inode) | 675 | if (dentry->d_inode) |
653 | goto out_dput; | 676 | goto out_dput; |
654 | 677 | ||
655 | error = mnt_want_write(parent->mnt); | ||
656 | if (error) | ||
657 | goto out_dput; | ||
658 | |||
659 | error = btrfs_may_create(dir, dentry); | 678 | error = btrfs_may_create(dir, dentry); |
660 | if (error) | 679 | if (error) |
661 | goto out_drop_write; | 680 | goto out_dput; |
662 | 681 | ||
663 | down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); | 682 | down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); |
664 | 683 | ||
@@ -666,18 +685,16 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
666 | goto out_up_read; | 685 | goto out_up_read; |
667 | 686 | ||
668 | if (snap_src) { | 687 | if (snap_src) { |
669 | error = create_snapshot(snap_src, dentry, | 688 | error = create_snapshot(snap_src, dentry, name, namelen, |
670 | name, namelen, async_transid, readonly); | 689 | async_transid, readonly, inherit); |
671 | } else { | 690 | } else { |
672 | error = create_subvol(BTRFS_I(dir)->root, dentry, | 691 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
673 | name, namelen, async_transid); | 692 | name, namelen, async_transid, inherit); |
674 | } | 693 | } |
675 | if (!error) | 694 | if (!error) |
676 | fsnotify_mkdir(dir, dentry); | 695 | fsnotify_mkdir(dir, dentry); |
677 | out_up_read: | 696 | out_up_read: |
678 | up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); | 697 | up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); |
679 | out_drop_write: | ||
680 | mnt_drop_write(parent->mnt); | ||
681 | out_dput: | 698 | out_dput: |
682 | dput(dentry); | 699 | dput(dentry); |
683 | out_unlock: | 700 | out_unlock: |
@@ -832,7 +849,8 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) | |||
832 | } | 849 | } |
833 | 850 | ||
834 | static int should_defrag_range(struct inode *inode, u64 start, int thresh, | 851 | static int should_defrag_range(struct inode *inode, u64 start, int thresh, |
835 | u64 *last_len, u64 *skip, u64 *defrag_end) | 852 | u64 *last_len, u64 *skip, u64 *defrag_end, |
853 | int compress) | ||
836 | { | 854 | { |
837 | struct extent_map *em; | 855 | struct extent_map *em; |
838 | int ret = 1; | 856 | int ret = 1; |
@@ -863,7 +881,7 @@ static int should_defrag_range(struct inode *inode, u64 start, int thresh, | |||
863 | * we hit a real extent, if it is big or the next extent is not a | 881 | * we hit a real extent, if it is big or the next extent is not a |
864 | * real extent, don't bother defragging it | 882 | * real extent, don't bother defragging it |
865 | */ | 883 | */ |
866 | if ((*last_len == 0 || *last_len >= thresh) && | 884 | if (!compress && (*last_len == 0 || *last_len >= thresh) && |
867 | (em->len >= thresh || !next_mergeable)) | 885 | (em->len >= thresh || !next_mergeable)) |
868 | ret = 0; | 886 | ret = 0; |
869 | out: | 887 | out: |
@@ -1047,11 +1065,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1047 | u64 newer_than, unsigned long max_to_defrag) | 1065 | u64 newer_than, unsigned long max_to_defrag) |
1048 | { | 1066 | { |
1049 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1067 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1050 | struct btrfs_super_block *disk_super; | ||
1051 | struct file_ra_state *ra = NULL; | 1068 | struct file_ra_state *ra = NULL; |
1052 | unsigned long last_index; | 1069 | unsigned long last_index; |
1053 | u64 isize = i_size_read(inode); | 1070 | u64 isize = i_size_read(inode); |
1054 | u64 features; | ||
1055 | u64 last_len = 0; | 1071 | u64 last_len = 0; |
1056 | u64 skip = 0; | 1072 | u64 skip = 0; |
1057 | u64 defrag_end = 0; | 1073 | u64 defrag_end = 0; |
@@ -1145,7 +1161,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1145 | 1161 | ||
1146 | if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, | 1162 | if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, |
1147 | extent_thresh, &last_len, &skip, | 1163 | extent_thresh, &last_len, &skip, |
1148 | &defrag_end)) { | 1164 | &defrag_end, range->flags & |
1165 | BTRFS_DEFRAG_RANGE_COMPRESS)) { | ||
1149 | unsigned long next; | 1166 | unsigned long next; |
1150 | /* | 1167 | /* |
1151 | * the should_defrag function tells us how much to skip | 1168 | * the should_defrag function tells us how much to skip |
@@ -1237,11 +1254,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1237 | mutex_unlock(&inode->i_mutex); | 1254 | mutex_unlock(&inode->i_mutex); |
1238 | } | 1255 | } |
1239 | 1256 | ||
1240 | disk_super = root->fs_info->super_copy; | ||
1241 | features = btrfs_super_incompat_flags(disk_super); | ||
1242 | if (range->compress_type == BTRFS_COMPRESS_LZO) { | 1257 | if (range->compress_type == BTRFS_COMPRESS_LZO) { |
1243 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; | 1258 | btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO); |
1244 | btrfs_set_super_incompat_flags(disk_super, features); | ||
1245 | } | 1259 | } |
1246 | 1260 | ||
1247 | ret = defrag_count; | 1261 | ret = defrag_count; |
@@ -1379,41 +1393,39 @@ out: | |||
1379 | } | 1393 | } |
1380 | 1394 | ||
1381 | static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | 1395 | static noinline int btrfs_ioctl_snap_create_transid(struct file *file, |
1382 | char *name, | 1396 | char *name, unsigned long fd, int subvol, |
1383 | unsigned long fd, | 1397 | u64 *transid, bool readonly, |
1384 | int subvol, | 1398 | struct btrfs_qgroup_inherit **inherit) |
1385 | u64 *transid, | ||
1386 | bool readonly) | ||
1387 | { | 1399 | { |
1388 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
1389 | struct file *src_file; | 1400 | struct file *src_file; |
1390 | int namelen; | 1401 | int namelen; |
1391 | int ret = 0; | 1402 | int ret = 0; |
1392 | 1403 | ||
1393 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 1404 | ret = mnt_want_write_file(file); |
1394 | return -EROFS; | 1405 | if (ret) |
1406 | goto out; | ||
1395 | 1407 | ||
1396 | namelen = strlen(name); | 1408 | namelen = strlen(name); |
1397 | if (strchr(name, '/')) { | 1409 | if (strchr(name, '/')) { |
1398 | ret = -EINVAL; | 1410 | ret = -EINVAL; |
1399 | goto out; | 1411 | goto out_drop_write; |
1400 | } | 1412 | } |
1401 | 1413 | ||
1402 | if (name[0] == '.' && | 1414 | if (name[0] == '.' && |
1403 | (namelen == 1 || (name[1] == '.' && namelen == 2))) { | 1415 | (namelen == 1 || (name[1] == '.' && namelen == 2))) { |
1404 | ret = -EEXIST; | 1416 | ret = -EEXIST; |
1405 | goto out; | 1417 | goto out_drop_write; |
1406 | } | 1418 | } |
1407 | 1419 | ||
1408 | if (subvol) { | 1420 | if (subvol) { |
1409 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 1421 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
1410 | NULL, transid, readonly); | 1422 | NULL, transid, readonly, inherit); |
1411 | } else { | 1423 | } else { |
1412 | struct inode *src_inode; | 1424 | struct inode *src_inode; |
1413 | src_file = fget(fd); | 1425 | src_file = fget(fd); |
1414 | if (!src_file) { | 1426 | if (!src_file) { |
1415 | ret = -EINVAL; | 1427 | ret = -EINVAL; |
1416 | goto out; | 1428 | goto out_drop_write; |
1417 | } | 1429 | } |
1418 | 1430 | ||
1419 | src_inode = src_file->f_path.dentry->d_inode; | 1431 | src_inode = src_file->f_path.dentry->d_inode; |
@@ -1422,13 +1434,15 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
1422 | "another FS\n"); | 1434 | "another FS\n"); |
1423 | ret = -EINVAL; | 1435 | ret = -EINVAL; |
1424 | fput(src_file); | 1436 | fput(src_file); |
1425 | goto out; | 1437 | goto out_drop_write; |
1426 | } | 1438 | } |
1427 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 1439 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
1428 | BTRFS_I(src_inode)->root, | 1440 | BTRFS_I(src_inode)->root, |
1429 | transid, readonly); | 1441 | transid, readonly, inherit); |
1430 | fput(src_file); | 1442 | fput(src_file); |
1431 | } | 1443 | } |
1444 | out_drop_write: | ||
1445 | mnt_drop_write_file(file); | ||
1432 | out: | 1446 | out: |
1433 | return ret; | 1447 | return ret; |
1434 | } | 1448 | } |
@@ -1446,7 +1460,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
1446 | 1460 | ||
1447 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | 1461 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, |
1448 | vol_args->fd, subvol, | 1462 | vol_args->fd, subvol, |
1449 | NULL, false); | 1463 | NULL, false, NULL); |
1450 | 1464 | ||
1451 | kfree(vol_args); | 1465 | kfree(vol_args); |
1452 | return ret; | 1466 | return ret; |
@@ -1460,6 +1474,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, | |||
1460 | u64 transid = 0; | 1474 | u64 transid = 0; |
1461 | u64 *ptr = NULL; | 1475 | u64 *ptr = NULL; |
1462 | bool readonly = false; | 1476 | bool readonly = false; |
1477 | struct btrfs_qgroup_inherit *inherit = NULL; | ||
1463 | 1478 | ||
1464 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 1479 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
1465 | if (IS_ERR(vol_args)) | 1480 | if (IS_ERR(vol_args)) |
@@ -1467,7 +1482,8 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, | |||
1467 | vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | 1482 | vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; |
1468 | 1483 | ||
1469 | if (vol_args->flags & | 1484 | if (vol_args->flags & |
1470 | ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { | 1485 | ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | |
1486 | BTRFS_SUBVOL_QGROUP_INHERIT)) { | ||
1471 | ret = -EOPNOTSUPP; | 1487 | ret = -EOPNOTSUPP; |
1472 | goto out; | 1488 | goto out; |
1473 | } | 1489 | } |
@@ -1476,10 +1492,21 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, | |||
1476 | ptr = &transid; | 1492 | ptr = &transid; |
1477 | if (vol_args->flags & BTRFS_SUBVOL_RDONLY) | 1493 | if (vol_args->flags & BTRFS_SUBVOL_RDONLY) |
1478 | readonly = true; | 1494 | readonly = true; |
1495 | if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { | ||
1496 | if (vol_args->size > PAGE_CACHE_SIZE) { | ||
1497 | ret = -EINVAL; | ||
1498 | goto out; | ||
1499 | } | ||
1500 | inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); | ||
1501 | if (IS_ERR(inherit)) { | ||
1502 | ret = PTR_ERR(inherit); | ||
1503 | goto out; | ||
1504 | } | ||
1505 | } | ||
1479 | 1506 | ||
1480 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | 1507 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, |
1481 | vol_args->fd, subvol, | 1508 | vol_args->fd, subvol, ptr, |
1482 | ptr, readonly); | 1509 | readonly, &inherit); |
1483 | 1510 | ||
1484 | if (ret == 0 && ptr && | 1511 | if (ret == 0 && ptr && |
1485 | copy_to_user(arg + | 1512 | copy_to_user(arg + |
@@ -1488,6 +1515,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, | |||
1488 | ret = -EFAULT; | 1515 | ret = -EFAULT; |
1489 | out: | 1516 | out: |
1490 | kfree(vol_args); | 1517 | kfree(vol_args); |
1518 | kfree(inherit); | ||
1491 | return ret; | 1519 | return ret; |
1492 | } | 1520 | } |
1493 | 1521 | ||
@@ -1523,29 +1551,40 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, | |||
1523 | u64 flags; | 1551 | u64 flags; |
1524 | int ret = 0; | 1552 | int ret = 0; |
1525 | 1553 | ||
1526 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 1554 | ret = mnt_want_write_file(file); |
1527 | return -EROFS; | 1555 | if (ret) |
1556 | goto out; | ||
1528 | 1557 | ||
1529 | if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) | 1558 | if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { |
1530 | return -EINVAL; | 1559 | ret = -EINVAL; |
1560 | goto out_drop_write; | ||
1561 | } | ||
1531 | 1562 | ||
1532 | if (copy_from_user(&flags, arg, sizeof(flags))) | 1563 | if (copy_from_user(&flags, arg, sizeof(flags))) { |
1533 | return -EFAULT; | 1564 | ret = -EFAULT; |
1565 | goto out_drop_write; | ||
1566 | } | ||
1534 | 1567 | ||
1535 | if (flags & BTRFS_SUBVOL_CREATE_ASYNC) | 1568 | if (flags & BTRFS_SUBVOL_CREATE_ASYNC) { |
1536 | return -EINVAL; | 1569 | ret = -EINVAL; |
1570 | goto out_drop_write; | ||
1571 | } | ||
1537 | 1572 | ||
1538 | if (flags & ~BTRFS_SUBVOL_RDONLY) | 1573 | if (flags & ~BTRFS_SUBVOL_RDONLY) { |
1539 | return -EOPNOTSUPP; | 1574 | ret = -EOPNOTSUPP; |
1575 | goto out_drop_write; | ||
1576 | } | ||
1540 | 1577 | ||
1541 | if (!inode_owner_or_capable(inode)) | 1578 | if (!inode_owner_or_capable(inode)) { |
1542 | return -EACCES; | 1579 | ret = -EACCES; |
1580 | goto out_drop_write; | ||
1581 | } | ||
1543 | 1582 | ||
1544 | down_write(&root->fs_info->subvol_sem); | 1583 | down_write(&root->fs_info->subvol_sem); |
1545 | 1584 | ||
1546 | /* nothing to do */ | 1585 | /* nothing to do */ |
1547 | if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) | 1586 | if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) |
1548 | goto out; | 1587 | goto out_drop_sem; |
1549 | 1588 | ||
1550 | root_flags = btrfs_root_flags(&root->root_item); | 1589 | root_flags = btrfs_root_flags(&root->root_item); |
1551 | if (flags & BTRFS_SUBVOL_RDONLY) | 1590 | if (flags & BTRFS_SUBVOL_RDONLY) |
@@ -1568,8 +1607,11 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, | |||
1568 | out_reset: | 1607 | out_reset: |
1569 | if (ret) | 1608 | if (ret) |
1570 | btrfs_set_root_flags(&root->root_item, root_flags); | 1609 | btrfs_set_root_flags(&root->root_item, root_flags); |
1571 | out: | 1610 | out_drop_sem: |
1572 | up_write(&root->fs_info->subvol_sem); | 1611 | up_write(&root->fs_info->subvol_sem); |
1612 | out_drop_write: | ||
1613 | mnt_drop_write_file(file); | ||
1614 | out: | ||
1573 | return ret; | 1615 | return ret; |
1574 | } | 1616 | } |
1575 | 1617 | ||
@@ -2340,6 +2382,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2340 | goto out_drop_write; | 2382 | goto out_drop_write; |
2341 | } | 2383 | } |
2342 | 2384 | ||
2385 | ret = -EXDEV; | ||
2386 | if (src_file->f_path.mnt != file->f_path.mnt) | ||
2387 | goto out_fput; | ||
2388 | |||
2343 | src = src_file->f_dentry->d_inode; | 2389 | src = src_file->f_dentry->d_inode; |
2344 | 2390 | ||
2345 | ret = -EINVAL; | 2391 | ret = -EINVAL; |
@@ -2360,7 +2406,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2360 | goto out_fput; | 2406 | goto out_fput; |
2361 | 2407 | ||
2362 | ret = -EXDEV; | 2408 | ret = -EXDEV; |
2363 | if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root) | 2409 | if (src->i_sb != inode->i_sb) |
2364 | goto out_fput; | 2410 | goto out_fput; |
2365 | 2411 | ||
2366 | ret = -ENOMEM; | 2412 | ret = -ENOMEM; |
@@ -2434,13 +2480,14 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2434 | * note the key will change type as we walk through the | 2480 | * note the key will change type as we walk through the |
2435 | * tree. | 2481 | * tree. |
2436 | */ | 2482 | */ |
2437 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 2483 | ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, |
2484 | 0, 0); | ||
2438 | if (ret < 0) | 2485 | if (ret < 0) |
2439 | goto out; | 2486 | goto out; |
2440 | 2487 | ||
2441 | nritems = btrfs_header_nritems(path->nodes[0]); | 2488 | nritems = btrfs_header_nritems(path->nodes[0]); |
2442 | if (path->slots[0] >= nritems) { | 2489 | if (path->slots[0] >= nritems) { |
2443 | ret = btrfs_next_leaf(root, path); | 2490 | ret = btrfs_next_leaf(BTRFS_I(src)->root, path); |
2444 | if (ret < 0) | 2491 | if (ret < 0) |
2445 | goto out; | 2492 | goto out; |
2446 | if (ret > 0) | 2493 | if (ret > 0) |
@@ -2749,8 +2796,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
2749 | struct btrfs_path *path; | 2796 | struct btrfs_path *path; |
2750 | struct btrfs_key location; | 2797 | struct btrfs_key location; |
2751 | struct btrfs_disk_key disk_key; | 2798 | struct btrfs_disk_key disk_key; |
2752 | struct btrfs_super_block *disk_super; | ||
2753 | u64 features; | ||
2754 | u64 objectid = 0; | 2799 | u64 objectid = 0; |
2755 | u64 dir_id; | 2800 | u64 dir_id; |
2756 | 2801 | ||
@@ -2801,12 +2846,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
2801 | btrfs_mark_buffer_dirty(path->nodes[0]); | 2846 | btrfs_mark_buffer_dirty(path->nodes[0]); |
2802 | btrfs_free_path(path); | 2847 | btrfs_free_path(path); |
2803 | 2848 | ||
2804 | disk_super = root->fs_info->super_copy; | 2849 | btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); |
2805 | features = btrfs_super_incompat_flags(disk_super); | ||
2806 | if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) { | ||
2807 | features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; | ||
2808 | btrfs_set_super_incompat_flags(disk_super, features); | ||
2809 | } | ||
2810 | btrfs_end_transaction(trans, root); | 2850 | btrfs_end_transaction(trans, root); |
2811 | 2851 | ||
2812 | return 0; | 2852 | return 0; |
@@ -3063,19 +3103,21 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, | |||
3063 | } | 3103 | } |
3064 | 3104 | ||
3065 | static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, | 3105 | static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, |
3066 | void __user *arg, int reset_after_read) | 3106 | void __user *arg) |
3067 | { | 3107 | { |
3068 | struct btrfs_ioctl_get_dev_stats *sa; | 3108 | struct btrfs_ioctl_get_dev_stats *sa; |
3069 | int ret; | 3109 | int ret; |
3070 | 3110 | ||
3071 | if (reset_after_read && !capable(CAP_SYS_ADMIN)) | ||
3072 | return -EPERM; | ||
3073 | |||
3074 | sa = memdup_user(arg, sizeof(*sa)); | 3111 | sa = memdup_user(arg, sizeof(*sa)); |
3075 | if (IS_ERR(sa)) | 3112 | if (IS_ERR(sa)) |
3076 | return PTR_ERR(sa); | 3113 | return PTR_ERR(sa); |
3077 | 3114 | ||
3078 | ret = btrfs_get_dev_stats(root, sa, reset_after_read); | 3115 | if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) { |
3116 | kfree(sa); | ||
3117 | return -EPERM; | ||
3118 | } | ||
3119 | |||
3120 | ret = btrfs_get_dev_stats(root, sa); | ||
3079 | 3121 | ||
3080 | if (copy_to_user(arg, sa, sizeof(*sa))) | 3122 | if (copy_to_user(arg, sa, sizeof(*sa))) |
3081 | ret = -EFAULT; | 3123 | ret = -EFAULT; |
@@ -3265,9 +3307,6 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) | |||
3265 | if (!capable(CAP_SYS_ADMIN)) | 3307 | if (!capable(CAP_SYS_ADMIN)) |
3266 | return -EPERM; | 3308 | return -EPERM; |
3267 | 3309 | ||
3268 | if (fs_info->sb->s_flags & MS_RDONLY) | ||
3269 | return -EROFS; | ||
3270 | |||
3271 | ret = mnt_want_write_file(file); | 3310 | ret = mnt_want_write_file(file); |
3272 | if (ret) | 3311 | if (ret) |
3273 | return ret; | 3312 | return ret; |
@@ -3390,6 +3429,264 @@ out: | |||
3390 | return ret; | 3429 | return ret; |
3391 | } | 3430 | } |
3392 | 3431 | ||
3432 | static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) | ||
3433 | { | ||
3434 | struct btrfs_ioctl_quota_ctl_args *sa; | ||
3435 | struct btrfs_trans_handle *trans = NULL; | ||
3436 | int ret; | ||
3437 | int err; | ||
3438 | |||
3439 | if (!capable(CAP_SYS_ADMIN)) | ||
3440 | return -EPERM; | ||
3441 | |||
3442 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
3443 | return -EROFS; | ||
3444 | |||
3445 | sa = memdup_user(arg, sizeof(*sa)); | ||
3446 | if (IS_ERR(sa)) | ||
3447 | return PTR_ERR(sa); | ||
3448 | |||
3449 | if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { | ||
3450 | trans = btrfs_start_transaction(root, 2); | ||
3451 | if (IS_ERR(trans)) { | ||
3452 | ret = PTR_ERR(trans); | ||
3453 | goto out; | ||
3454 | } | ||
3455 | } | ||
3456 | |||
3457 | switch (sa->cmd) { | ||
3458 | case BTRFS_QUOTA_CTL_ENABLE: | ||
3459 | ret = btrfs_quota_enable(trans, root->fs_info); | ||
3460 | break; | ||
3461 | case BTRFS_QUOTA_CTL_DISABLE: | ||
3462 | ret = btrfs_quota_disable(trans, root->fs_info); | ||
3463 | break; | ||
3464 | case BTRFS_QUOTA_CTL_RESCAN: | ||
3465 | ret = btrfs_quota_rescan(root->fs_info); | ||
3466 | break; | ||
3467 | default: | ||
3468 | ret = -EINVAL; | ||
3469 | break; | ||
3470 | } | ||
3471 | |||
3472 | if (copy_to_user(arg, sa, sizeof(*sa))) | ||
3473 | ret = -EFAULT; | ||
3474 | |||
3475 | if (trans) { | ||
3476 | err = btrfs_commit_transaction(trans, root); | ||
3477 | if (err && !ret) | ||
3478 | ret = err; | ||
3479 | } | ||
3480 | |||
3481 | out: | ||
3482 | kfree(sa); | ||
3483 | return ret; | ||
3484 | } | ||
3485 | |||
3486 | static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) | ||
3487 | { | ||
3488 | struct btrfs_ioctl_qgroup_assign_args *sa; | ||
3489 | struct btrfs_trans_handle *trans; | ||
3490 | int ret; | ||
3491 | int err; | ||
3492 | |||
3493 | if (!capable(CAP_SYS_ADMIN)) | ||
3494 | return -EPERM; | ||
3495 | |||
3496 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
3497 | return -EROFS; | ||
3498 | |||
3499 | sa = memdup_user(arg, sizeof(*sa)); | ||
3500 | if (IS_ERR(sa)) | ||
3501 | return PTR_ERR(sa); | ||
3502 | |||
3503 | trans = btrfs_join_transaction(root); | ||
3504 | if (IS_ERR(trans)) { | ||
3505 | ret = PTR_ERR(trans); | ||
3506 | goto out; | ||
3507 | } | ||
3508 | |||
3509 | /* FIXME: check if the IDs really exist */ | ||
3510 | if (sa->assign) { | ||
3511 | ret = btrfs_add_qgroup_relation(trans, root->fs_info, | ||
3512 | sa->src, sa->dst); | ||
3513 | } else { | ||
3514 | ret = btrfs_del_qgroup_relation(trans, root->fs_info, | ||
3515 | sa->src, sa->dst); | ||
3516 | } | ||
3517 | |||
3518 | err = btrfs_end_transaction(trans, root); | ||
3519 | if (err && !ret) | ||
3520 | ret = err; | ||
3521 | |||
3522 | out: | ||
3523 | kfree(sa); | ||
3524 | return ret; | ||
3525 | } | ||
3526 | |||
3527 | static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) | ||
3528 | { | ||
3529 | struct btrfs_ioctl_qgroup_create_args *sa; | ||
3530 | struct btrfs_trans_handle *trans; | ||
3531 | int ret; | ||
3532 | int err; | ||
3533 | |||
3534 | if (!capable(CAP_SYS_ADMIN)) | ||
3535 | return -EPERM; | ||
3536 | |||
3537 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
3538 | return -EROFS; | ||
3539 | |||
3540 | sa = memdup_user(arg, sizeof(*sa)); | ||
3541 | if (IS_ERR(sa)) | ||
3542 | return PTR_ERR(sa); | ||
3543 | |||
3544 | trans = btrfs_join_transaction(root); | ||
3545 | if (IS_ERR(trans)) { | ||
3546 | ret = PTR_ERR(trans); | ||
3547 | goto out; | ||
3548 | } | ||
3549 | |||
3550 | /* FIXME: check if the IDs really exist */ | ||
3551 | if (sa->create) { | ||
3552 | ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid, | ||
3553 | NULL); | ||
3554 | } else { | ||
3555 | ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid); | ||
3556 | } | ||
3557 | |||
3558 | err = btrfs_end_transaction(trans, root); | ||
3559 | if (err && !ret) | ||
3560 | ret = err; | ||
3561 | |||
3562 | out: | ||
3563 | kfree(sa); | ||
3564 | return ret; | ||
3565 | } | ||
3566 | |||
3567 | static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) | ||
3568 | { | ||
3569 | struct btrfs_ioctl_qgroup_limit_args *sa; | ||
3570 | struct btrfs_trans_handle *trans; | ||
3571 | int ret; | ||
3572 | int err; | ||
3573 | u64 qgroupid; | ||
3574 | |||
3575 | if (!capable(CAP_SYS_ADMIN)) | ||
3576 | return -EPERM; | ||
3577 | |||
3578 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
3579 | return -EROFS; | ||
3580 | |||
3581 | sa = memdup_user(arg, sizeof(*sa)); | ||
3582 | if (IS_ERR(sa)) | ||
3583 | return PTR_ERR(sa); | ||
3584 | |||
3585 | trans = btrfs_join_transaction(root); | ||
3586 | if (IS_ERR(trans)) { | ||
3587 | ret = PTR_ERR(trans); | ||
3588 | goto out; | ||
3589 | } | ||
3590 | |||
3591 | qgroupid = sa->qgroupid; | ||
3592 | if (!qgroupid) { | ||
3593 | /* take the current subvol as qgroup */ | ||
3594 | qgroupid = root->root_key.objectid; | ||
3595 | } | ||
3596 | |||
3597 | /* FIXME: check if the IDs really exist */ | ||
3598 | ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim); | ||
3599 | |||
3600 | err = btrfs_end_transaction(trans, root); | ||
3601 | if (err && !ret) | ||
3602 | ret = err; | ||
3603 | |||
3604 | out: | ||
3605 | kfree(sa); | ||
3606 | return ret; | ||
3607 | } | ||
3608 | |||
3609 | static long btrfs_ioctl_set_received_subvol(struct file *file, | ||
3610 | void __user *arg) | ||
3611 | { | ||
3612 | struct btrfs_ioctl_received_subvol_args *sa = NULL; | ||
3613 | struct inode *inode = fdentry(file)->d_inode; | ||
3614 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3615 | struct btrfs_root_item *root_item = &root->root_item; | ||
3616 | struct btrfs_trans_handle *trans; | ||
3617 | struct timespec ct = CURRENT_TIME; | ||
3618 | int ret = 0; | ||
3619 | |||
3620 | ret = mnt_want_write_file(file); | ||
3621 | if (ret < 0) | ||
3622 | return ret; | ||
3623 | |||
3624 | down_write(&root->fs_info->subvol_sem); | ||
3625 | |||
3626 | if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { | ||
3627 | ret = -EINVAL; | ||
3628 | goto out; | ||
3629 | } | ||
3630 | |||
3631 | if (btrfs_root_readonly(root)) { | ||
3632 | ret = -EROFS; | ||
3633 | goto out; | ||
3634 | } | ||
3635 | |||
3636 | if (!inode_owner_or_capable(inode)) { | ||
3637 | ret = -EACCES; | ||
3638 | goto out; | ||
3639 | } | ||
3640 | |||
3641 | sa = memdup_user(arg, sizeof(*sa)); | ||
3642 | if (IS_ERR(sa)) { | ||
3643 | ret = PTR_ERR(sa); | ||
3644 | sa = NULL; | ||
3645 | goto out; | ||
3646 | } | ||
3647 | |||
3648 | trans = btrfs_start_transaction(root, 1); | ||
3649 | if (IS_ERR(trans)) { | ||
3650 | ret = PTR_ERR(trans); | ||
3651 | trans = NULL; | ||
3652 | goto out; | ||
3653 | } | ||
3654 | |||
3655 | sa->rtransid = trans->transid; | ||
3656 | sa->rtime.sec = ct.tv_sec; | ||
3657 | sa->rtime.nsec = ct.tv_nsec; | ||
3658 | |||
3659 | memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); | ||
3660 | btrfs_set_root_stransid(root_item, sa->stransid); | ||
3661 | btrfs_set_root_rtransid(root_item, sa->rtransid); | ||
3662 | root_item->stime.sec = cpu_to_le64(sa->stime.sec); | ||
3663 | root_item->stime.nsec = cpu_to_le32(sa->stime.nsec); | ||
3664 | root_item->rtime.sec = cpu_to_le64(sa->rtime.sec); | ||
3665 | root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec); | ||
3666 | |||
3667 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | ||
3668 | &root->root_key, &root->root_item); | ||
3669 | if (ret < 0) { | ||
3670 | btrfs_end_transaction(trans, root); | ||
3671 | trans = NULL; | ||
3672 | goto out; | ||
3673 | } else { | ||
3674 | ret = btrfs_commit_transaction(trans, root); | ||
3675 | if (ret < 0) | ||
3676 | goto out; | ||
3677 | } | ||
3678 | |||
3679 | ret = copy_to_user(arg, sa, sizeof(*sa)); | ||
3680 | if (ret) | ||
3681 | ret = -EFAULT; | ||
3682 | |||
3683 | out: | ||
3684 | kfree(sa); | ||
3685 | up_write(&root->fs_info->subvol_sem); | ||
3686 | mnt_drop_write_file(file); | ||
3687 | return ret; | ||
3688 | } | ||
3689 | |||
3393 | long btrfs_ioctl(struct file *file, unsigned int | 3690 | long btrfs_ioctl(struct file *file, unsigned int |
3394 | cmd, unsigned long arg) | 3691 | cmd, unsigned long arg) |
3395 | { | 3692 | { |
@@ -3411,6 +3708,8 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
3411 | return btrfs_ioctl_snap_create_v2(file, argp, 0); | 3708 | return btrfs_ioctl_snap_create_v2(file, argp, 0); |
3412 | case BTRFS_IOC_SUBVOL_CREATE: | 3709 | case BTRFS_IOC_SUBVOL_CREATE: |
3413 | return btrfs_ioctl_snap_create(file, argp, 1); | 3710 | return btrfs_ioctl_snap_create(file, argp, 1); |
3711 | case BTRFS_IOC_SUBVOL_CREATE_V2: | ||
3712 | return btrfs_ioctl_snap_create_v2(file, argp, 1); | ||
3414 | case BTRFS_IOC_SNAP_DESTROY: | 3713 | case BTRFS_IOC_SNAP_DESTROY: |
3415 | return btrfs_ioctl_snap_destroy(file, argp); | 3714 | return btrfs_ioctl_snap_destroy(file, argp); |
3416 | case BTRFS_IOC_SUBVOL_GETFLAGS: | 3715 | case BTRFS_IOC_SUBVOL_GETFLAGS: |
@@ -3472,10 +3771,20 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
3472 | return btrfs_ioctl_balance_ctl(root, arg); | 3771 | return btrfs_ioctl_balance_ctl(root, arg); |
3473 | case BTRFS_IOC_BALANCE_PROGRESS: | 3772 | case BTRFS_IOC_BALANCE_PROGRESS: |
3474 | return btrfs_ioctl_balance_progress(root, argp); | 3773 | return btrfs_ioctl_balance_progress(root, argp); |
3774 | case BTRFS_IOC_SET_RECEIVED_SUBVOL: | ||
3775 | return btrfs_ioctl_set_received_subvol(file, argp); | ||
3776 | case BTRFS_IOC_SEND: | ||
3777 | return btrfs_ioctl_send(file, argp); | ||
3475 | case BTRFS_IOC_GET_DEV_STATS: | 3778 | case BTRFS_IOC_GET_DEV_STATS: |
3476 | return btrfs_ioctl_get_dev_stats(root, argp, 0); | 3779 | return btrfs_ioctl_get_dev_stats(root, argp); |
3477 | case BTRFS_IOC_GET_AND_RESET_DEV_STATS: | 3780 | case BTRFS_IOC_QUOTA_CTL: |
3478 | return btrfs_ioctl_get_dev_stats(root, argp, 1); | 3781 | return btrfs_ioctl_quota_ctl(root, argp); |
3782 | case BTRFS_IOC_QGROUP_ASSIGN: | ||
3783 | return btrfs_ioctl_qgroup_assign(root, argp); | ||
3784 | case BTRFS_IOC_QGROUP_CREATE: | ||
3785 | return btrfs_ioctl_qgroup_create(root, argp); | ||
3786 | case BTRFS_IOC_QGROUP_LIMIT: | ||
3787 | return btrfs_ioctl_qgroup_limit(root, argp); | ||
3479 | } | 3788 | } |
3480 | 3789 | ||
3481 | return -ENOTTY; | 3790 | return -ENOTTY; |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index e440aa653c3..731e2875ab9 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -32,15 +32,46 @@ struct btrfs_ioctl_vol_args { | |||
32 | 32 | ||
33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) | 33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) |
34 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) | 34 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) |
35 | #define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) | ||
35 | #define BTRFS_FSID_SIZE 16 | 36 | #define BTRFS_FSID_SIZE 16 |
36 | #define BTRFS_UUID_SIZE 16 | 37 | #define BTRFS_UUID_SIZE 16 |
37 | 38 | ||
39 | #define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) | ||
40 | |||
41 | struct btrfs_qgroup_limit { | ||
42 | __u64 flags; | ||
43 | __u64 max_rfer; | ||
44 | __u64 max_excl; | ||
45 | __u64 rsv_rfer; | ||
46 | __u64 rsv_excl; | ||
47 | }; | ||
48 | |||
49 | struct btrfs_qgroup_inherit { | ||
50 | __u64 flags; | ||
51 | __u64 num_qgroups; | ||
52 | __u64 num_ref_copies; | ||
53 | __u64 num_excl_copies; | ||
54 | struct btrfs_qgroup_limit lim; | ||
55 | __u64 qgroups[0]; | ||
56 | }; | ||
57 | |||
58 | struct btrfs_ioctl_qgroup_limit_args { | ||
59 | __u64 qgroupid; | ||
60 | struct btrfs_qgroup_limit lim; | ||
61 | }; | ||
62 | |||
38 | #define BTRFS_SUBVOL_NAME_MAX 4039 | 63 | #define BTRFS_SUBVOL_NAME_MAX 4039 |
39 | struct btrfs_ioctl_vol_args_v2 { | 64 | struct btrfs_ioctl_vol_args_v2 { |
40 | __s64 fd; | 65 | __s64 fd; |
41 | __u64 transid; | 66 | __u64 transid; |
42 | __u64 flags; | 67 | __u64 flags; |
43 | __u64 unused[4]; | 68 | union { |
69 | struct { | ||
70 | __u64 size; | ||
71 | struct btrfs_qgroup_inherit __user *qgroup_inherit; | ||
72 | }; | ||
73 | __u64 unused[4]; | ||
74 | }; | ||
44 | char name[BTRFS_SUBVOL_NAME_MAX + 1]; | 75 | char name[BTRFS_SUBVOL_NAME_MAX + 1]; |
45 | }; | 76 | }; |
46 | 77 | ||
@@ -285,9 +316,13 @@ enum btrfs_dev_stat_values { | |||
285 | BTRFS_DEV_STAT_VALUES_MAX | 316 | BTRFS_DEV_STAT_VALUES_MAX |
286 | }; | 317 | }; |
287 | 318 | ||
319 | /* Reset statistics after reading; needs SYS_ADMIN capability */ | ||
320 | #define BTRFS_DEV_STATS_RESET (1ULL << 0) | ||
321 | |||
288 | struct btrfs_ioctl_get_dev_stats { | 322 | struct btrfs_ioctl_get_dev_stats { |
289 | __u64 devid; /* in */ | 323 | __u64 devid; /* in */ |
290 | __u64 nr_items; /* in/out */ | 324 | __u64 nr_items; /* in/out */ |
325 | __u64 flags; /* in/out */ | ||
291 | 326 | ||
292 | /* out values: */ | 327 | /* out values: */ |
293 | __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; | 328 | __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; |
@@ -295,6 +330,48 @@ struct btrfs_ioctl_get_dev_stats { | |||
295 | __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ | 330 | __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ |
296 | }; | 331 | }; |
297 | 332 | ||
333 | #define BTRFS_QUOTA_CTL_ENABLE 1 | ||
334 | #define BTRFS_QUOTA_CTL_DISABLE 2 | ||
335 | #define BTRFS_QUOTA_CTL_RESCAN 3 | ||
336 | struct btrfs_ioctl_quota_ctl_args { | ||
337 | __u64 cmd; | ||
338 | __u64 status; | ||
339 | }; | ||
340 | |||
341 | struct btrfs_ioctl_qgroup_assign_args { | ||
342 | __u64 assign; | ||
343 | __u64 src; | ||
344 | __u64 dst; | ||
345 | }; | ||
346 | |||
347 | struct btrfs_ioctl_qgroup_create_args { | ||
348 | __u64 create; | ||
349 | __u64 qgroupid; | ||
350 | }; | ||
351 | struct btrfs_ioctl_timespec { | ||
352 | __u64 sec; | ||
353 | __u32 nsec; | ||
354 | }; | ||
355 | |||
356 | struct btrfs_ioctl_received_subvol_args { | ||
357 | char uuid[BTRFS_UUID_SIZE]; /* in */ | ||
358 | __u64 stransid; /* in */ | ||
359 | __u64 rtransid; /* out */ | ||
360 | struct btrfs_ioctl_timespec stime; /* in */ | ||
361 | struct btrfs_ioctl_timespec rtime; /* out */ | ||
362 | __u64 flags; /* in */ | ||
363 | __u64 reserved[16]; /* in */ | ||
364 | }; | ||
365 | |||
366 | struct btrfs_ioctl_send_args { | ||
367 | __s64 send_fd; /* in */ | ||
368 | __u64 clone_sources_count; /* in */ | ||
369 | __u64 __user *clone_sources; /* in */ | ||
370 | __u64 parent_root; /* in */ | ||
371 | __u64 flags; /* in */ | ||
372 | __u64 reserved[4]; /* in */ | ||
373 | }; | ||
374 | |||
298 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ | 375 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ |
299 | struct btrfs_ioctl_vol_args) | 376 | struct btrfs_ioctl_vol_args) |
300 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ | 377 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ |
@@ -339,6 +416,8 @@ struct btrfs_ioctl_get_dev_stats { | |||
339 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) | 416 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) |
340 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ | 417 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ |
341 | struct btrfs_ioctl_vol_args_v2) | 418 | struct btrfs_ioctl_vol_args_v2) |
419 | #define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ | ||
420 | struct btrfs_ioctl_vol_args_v2) | ||
342 | #define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) | 421 | #define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) |
343 | #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) | 422 | #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) |
344 | #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ | 423 | #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ |
@@ -359,9 +438,19 @@ struct btrfs_ioctl_get_dev_stats { | |||
359 | struct btrfs_ioctl_ino_path_args) | 438 | struct btrfs_ioctl_ino_path_args) |
360 | #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ | 439 | #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ |
361 | struct btrfs_ioctl_ino_path_args) | 440 | struct btrfs_ioctl_ino_path_args) |
441 | #define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ | ||
442 | struct btrfs_ioctl_received_subvol_args) | ||
443 | #define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args) | ||
444 | #define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ | ||
445 | struct btrfs_ioctl_vol_args) | ||
446 | #define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ | ||
447 | struct btrfs_ioctl_quota_ctl_args) | ||
448 | #define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ | ||
449 | struct btrfs_ioctl_qgroup_assign_args) | ||
450 | #define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ | ||
451 | struct btrfs_ioctl_qgroup_create_args) | ||
452 | #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ | ||
453 | struct btrfs_ioctl_qgroup_limit_args) | ||
362 | #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ | 454 | #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ |
363 | struct btrfs_ioctl_get_dev_stats) | 455 | struct btrfs_ioctl_get_dev_stats) |
364 | #define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \ | ||
365 | struct btrfs_ioctl_get_dev_stats) | ||
366 | |||
367 | #endif | 456 | #endif |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 272f911203f..a44eff07480 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -78,13 +78,15 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) | |||
78 | write_lock(&eb->lock); | 78 | write_lock(&eb->lock); |
79 | WARN_ON(atomic_read(&eb->spinning_writers)); | 79 | WARN_ON(atomic_read(&eb->spinning_writers)); |
80 | atomic_inc(&eb->spinning_writers); | 80 | atomic_inc(&eb->spinning_writers); |
81 | if (atomic_dec_and_test(&eb->blocking_writers)) | 81 | if (atomic_dec_and_test(&eb->blocking_writers) && |
82 | waitqueue_active(&eb->write_lock_wq)) | ||
82 | wake_up(&eb->write_lock_wq); | 83 | wake_up(&eb->write_lock_wq); |
83 | } else if (rw == BTRFS_READ_LOCK_BLOCKING) { | 84 | } else if (rw == BTRFS_READ_LOCK_BLOCKING) { |
84 | BUG_ON(atomic_read(&eb->blocking_readers) == 0); | 85 | BUG_ON(atomic_read(&eb->blocking_readers) == 0); |
85 | read_lock(&eb->lock); | 86 | read_lock(&eb->lock); |
86 | atomic_inc(&eb->spinning_readers); | 87 | atomic_inc(&eb->spinning_readers); |
87 | if (atomic_dec_and_test(&eb->blocking_readers)) | 88 | if (atomic_dec_and_test(&eb->blocking_readers) && |
89 | waitqueue_active(&eb->read_lock_wq)) | ||
88 | wake_up(&eb->read_lock_wq); | 90 | wake_up(&eb->read_lock_wq); |
89 | } | 91 | } |
90 | return; | 92 | return; |
@@ -199,7 +201,8 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) | |||
199 | } | 201 | } |
200 | btrfs_assert_tree_read_locked(eb); | 202 | btrfs_assert_tree_read_locked(eb); |
201 | WARN_ON(atomic_read(&eb->blocking_readers) == 0); | 203 | WARN_ON(atomic_read(&eb->blocking_readers) == 0); |
202 | if (atomic_dec_and_test(&eb->blocking_readers)) | 204 | if (atomic_dec_and_test(&eb->blocking_readers) && |
205 | waitqueue_active(&eb->read_lock_wq)) | ||
203 | wake_up(&eb->read_lock_wq); | 206 | wake_up(&eb->read_lock_wq); |
204 | atomic_dec(&eb->read_locks); | 207 | atomic_dec(&eb->read_locks); |
205 | } | 208 | } |
@@ -247,8 +250,9 @@ void btrfs_tree_unlock(struct extent_buffer *eb) | |||
247 | if (blockers) { | 250 | if (blockers) { |
248 | WARN_ON(atomic_read(&eb->spinning_writers)); | 251 | WARN_ON(atomic_read(&eb->spinning_writers)); |
249 | atomic_dec(&eb->blocking_writers); | 252 | atomic_dec(&eb->blocking_writers); |
250 | smp_wmb(); | 253 | smp_mb(); |
251 | wake_up(&eb->write_lock_wq); | 254 | if (waitqueue_active(&eb->write_lock_wq)) |
255 | wake_up(&eb->write_lock_wq); | ||
252 | } else { | 256 | } else { |
253 | WARN_ON(atomic_read(&eb->spinning_writers) != 1); | 257 | WARN_ON(atomic_read(&eb->spinning_writers) != 1); |
254 | atomic_dec(&eb->spinning_writers); | 258 | atomic_dec(&eb->spinning_writers); |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c new file mode 100644 index 00000000000..bc424ae5a81 --- /dev/null +++ b/fs/btrfs/qgroup.c | |||
@@ -0,0 +1,1571 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011 STRATO. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/sched.h> | ||
20 | #include <linux/pagemap.h> | ||
21 | #include <linux/writeback.h> | ||
22 | #include <linux/blkdev.h> | ||
23 | #include <linux/rbtree.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/workqueue.h> | ||
26 | |||
27 | #include "ctree.h" | ||
28 | #include "transaction.h" | ||
29 | #include "disk-io.h" | ||
30 | #include "locking.h" | ||
31 | #include "ulist.h" | ||
32 | #include "ioctl.h" | ||
33 | #include "backref.h" | ||
34 | |||
35 | /* TODO XXX FIXME | ||
36 | * - subvol delete -> delete when ref goes to 0? delete limits also? | ||
37 | * - reorganize keys | ||
38 | * - compressed | ||
39 | * - sync | ||
40 | * - rescan | ||
41 | * - copy also limits on subvol creation | ||
42 | * - limit | ||
43 | * - caches fuer ulists | ||
44 | * - performance benchmarks | ||
45 | * - check all ioctl parameters | ||
46 | */ | ||
47 | |||
48 | /* | ||
49 | * one struct for each qgroup, organized in fs_info->qgroup_tree. | ||
50 | */ | ||
51 | struct btrfs_qgroup { | ||
52 | u64 qgroupid; | ||
53 | |||
54 | /* | ||
55 | * state | ||
56 | */ | ||
57 | u64 rfer; /* referenced */ | ||
58 | u64 rfer_cmpr; /* referenced compressed */ | ||
59 | u64 excl; /* exclusive */ | ||
60 | u64 excl_cmpr; /* exclusive compressed */ | ||
61 | |||
62 | /* | ||
63 | * limits | ||
64 | */ | ||
65 | u64 lim_flags; /* which limits are set */ | ||
66 | u64 max_rfer; | ||
67 | u64 max_excl; | ||
68 | u64 rsv_rfer; | ||
69 | u64 rsv_excl; | ||
70 | |||
71 | /* | ||
72 | * reservation tracking | ||
73 | */ | ||
74 | u64 reserved; | ||
75 | |||
76 | /* | ||
77 | * lists | ||
78 | */ | ||
79 | struct list_head groups; /* groups this group is member of */ | ||
80 | struct list_head members; /* groups that are members of this group */ | ||
81 | struct list_head dirty; /* dirty groups */ | ||
82 | struct rb_node node; /* tree of qgroups */ | ||
83 | |||
84 | /* | ||
85 | * temp variables for accounting operations | ||
86 | */ | ||
87 | u64 tag; | ||
88 | u64 refcnt; | ||
89 | }; | ||
90 | |||
91 | /* | ||
92 | * glue structure to represent the relations between qgroups. | ||
93 | */ | ||
94 | struct btrfs_qgroup_list { | ||
95 | struct list_head next_group; | ||
96 | struct list_head next_member; | ||
97 | struct btrfs_qgroup *group; | ||
98 | struct btrfs_qgroup *member; | ||
99 | }; | ||
100 | |||
101 | /* must be called with qgroup_lock held */ | ||
102 | static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, | ||
103 | u64 qgroupid) | ||
104 | { | ||
105 | struct rb_node *n = fs_info->qgroup_tree.rb_node; | ||
106 | struct btrfs_qgroup *qgroup; | ||
107 | |||
108 | while (n) { | ||
109 | qgroup = rb_entry(n, struct btrfs_qgroup, node); | ||
110 | if (qgroup->qgroupid < qgroupid) | ||
111 | n = n->rb_left; | ||
112 | else if (qgroup->qgroupid > qgroupid) | ||
113 | n = n->rb_right; | ||
114 | else | ||
115 | return qgroup; | ||
116 | } | ||
117 | return NULL; | ||
118 | } | ||
119 | |||
120 | /* must be called with qgroup_lock held */ | ||
121 | static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, | ||
122 | u64 qgroupid) | ||
123 | { | ||
124 | struct rb_node **p = &fs_info->qgroup_tree.rb_node; | ||
125 | struct rb_node *parent = NULL; | ||
126 | struct btrfs_qgroup *qgroup; | ||
127 | |||
128 | while (*p) { | ||
129 | parent = *p; | ||
130 | qgroup = rb_entry(parent, struct btrfs_qgroup, node); | ||
131 | |||
132 | if (qgroup->qgroupid < qgroupid) | ||
133 | p = &(*p)->rb_left; | ||
134 | else if (qgroup->qgroupid > qgroupid) | ||
135 | p = &(*p)->rb_right; | ||
136 | else | ||
137 | return qgroup; | ||
138 | } | ||
139 | |||
140 | qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); | ||
141 | if (!qgroup) | ||
142 | return ERR_PTR(-ENOMEM); | ||
143 | |||
144 | qgroup->qgroupid = qgroupid; | ||
145 | INIT_LIST_HEAD(&qgroup->groups); | ||
146 | INIT_LIST_HEAD(&qgroup->members); | ||
147 | INIT_LIST_HEAD(&qgroup->dirty); | ||
148 | |||
149 | rb_link_node(&qgroup->node, parent, p); | ||
150 | rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); | ||
151 | |||
152 | return qgroup; | ||
153 | } | ||
154 | |||
155 | /* must be called with qgroup_lock held */ | ||
156 | static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) | ||
157 | { | ||
158 | struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); | ||
159 | struct btrfs_qgroup_list *list; | ||
160 | |||
161 | if (!qgroup) | ||
162 | return -ENOENT; | ||
163 | |||
164 | rb_erase(&qgroup->node, &fs_info->qgroup_tree); | ||
165 | list_del(&qgroup->dirty); | ||
166 | |||
167 | while (!list_empty(&qgroup->groups)) { | ||
168 | list = list_first_entry(&qgroup->groups, | ||
169 | struct btrfs_qgroup_list, next_group); | ||
170 | list_del(&list->next_group); | ||
171 | list_del(&list->next_member); | ||
172 | kfree(list); | ||
173 | } | ||
174 | |||
175 | while (!list_empty(&qgroup->members)) { | ||
176 | list = list_first_entry(&qgroup->members, | ||
177 | struct btrfs_qgroup_list, next_member); | ||
178 | list_del(&list->next_group); | ||
179 | list_del(&list->next_member); | ||
180 | kfree(list); | ||
181 | } | ||
182 | kfree(qgroup); | ||
183 | |||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | /* must be called with qgroup_lock held */ | ||
188 | static int add_relation_rb(struct btrfs_fs_info *fs_info, | ||
189 | u64 memberid, u64 parentid) | ||
190 | { | ||
191 | struct btrfs_qgroup *member; | ||
192 | struct btrfs_qgroup *parent; | ||
193 | struct btrfs_qgroup_list *list; | ||
194 | |||
195 | member = find_qgroup_rb(fs_info, memberid); | ||
196 | parent = find_qgroup_rb(fs_info, parentid); | ||
197 | if (!member || !parent) | ||
198 | return -ENOENT; | ||
199 | |||
200 | list = kzalloc(sizeof(*list), GFP_ATOMIC); | ||
201 | if (!list) | ||
202 | return -ENOMEM; | ||
203 | |||
204 | list->group = parent; | ||
205 | list->member = member; | ||
206 | list_add_tail(&list->next_group, &member->groups); | ||
207 | list_add_tail(&list->next_member, &parent->members); | ||
208 | |||
209 | return 0; | ||
210 | } | ||
211 | |||
212 | /* must be called with qgroup_lock held */ | ||
213 | static int del_relation_rb(struct btrfs_fs_info *fs_info, | ||
214 | u64 memberid, u64 parentid) | ||
215 | { | ||
216 | struct btrfs_qgroup *member; | ||
217 | struct btrfs_qgroup *parent; | ||
218 | struct btrfs_qgroup_list *list; | ||
219 | |||
220 | member = find_qgroup_rb(fs_info, memberid); | ||
221 | parent = find_qgroup_rb(fs_info, parentid); | ||
222 | if (!member || !parent) | ||
223 | return -ENOENT; | ||
224 | |||
225 | list_for_each_entry(list, &member->groups, next_group) { | ||
226 | if (list->group == parent) { | ||
227 | list_del(&list->next_group); | ||
228 | list_del(&list->next_member); | ||
229 | kfree(list); | ||
230 | return 0; | ||
231 | } | ||
232 | } | ||
233 | return -ENOENT; | ||
234 | } | ||
235 | |||
236 | /* | ||
237 | * The full config is read in one go, only called from open_ctree() | ||
238 | * It doesn't use any locking, as at this point we're still single-threaded | ||
239 | */ | ||
240 | int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) | ||
241 | { | ||
242 | struct btrfs_key key; | ||
243 | struct btrfs_key found_key; | ||
244 | struct btrfs_root *quota_root = fs_info->quota_root; | ||
245 | struct btrfs_path *path = NULL; | ||
246 | struct extent_buffer *l; | ||
247 | int slot; | ||
248 | int ret = 0; | ||
249 | u64 flags = 0; | ||
250 | |||
251 | if (!fs_info->quota_enabled) | ||
252 | return 0; | ||
253 | |||
254 | path = btrfs_alloc_path(); | ||
255 | if (!path) { | ||
256 | ret = -ENOMEM; | ||
257 | goto out; | ||
258 | } | ||
259 | |||
260 | /* default this to quota off, in case no status key is found */ | ||
261 | fs_info->qgroup_flags = 0; | ||
262 | |||
263 | /* | ||
264 | * pass 1: read status, all qgroup infos and limits | ||
265 | */ | ||
266 | key.objectid = 0; | ||
267 | key.type = 0; | ||
268 | key.offset = 0; | ||
269 | ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); | ||
270 | if (ret) | ||
271 | goto out; | ||
272 | |||
273 | while (1) { | ||
274 | struct btrfs_qgroup *qgroup; | ||
275 | |||
276 | slot = path->slots[0]; | ||
277 | l = path->nodes[0]; | ||
278 | btrfs_item_key_to_cpu(l, &found_key, slot); | ||
279 | |||
280 | if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { | ||
281 | struct btrfs_qgroup_status_item *ptr; | ||
282 | |||
283 | ptr = btrfs_item_ptr(l, slot, | ||
284 | struct btrfs_qgroup_status_item); | ||
285 | |||
286 | if (btrfs_qgroup_status_version(l, ptr) != | ||
287 | BTRFS_QGROUP_STATUS_VERSION) { | ||
288 | printk(KERN_ERR | ||
289 | "btrfs: old qgroup version, quota disabled\n"); | ||
290 | goto out; | ||
291 | } | ||
292 | if (btrfs_qgroup_status_generation(l, ptr) != | ||
293 | fs_info->generation) { | ||
294 | flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
295 | printk(KERN_ERR | ||
296 | "btrfs: qgroup generation mismatch, " | ||
297 | "marked as inconsistent\n"); | ||
298 | } | ||
299 | fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, | ||
300 | ptr); | ||
301 | /* FIXME read scan element */ | ||
302 | goto next1; | ||
303 | } | ||
304 | |||
305 | if (found_key.type != BTRFS_QGROUP_INFO_KEY && | ||
306 | found_key.type != BTRFS_QGROUP_LIMIT_KEY) | ||
307 | goto next1; | ||
308 | |||
309 | qgroup = find_qgroup_rb(fs_info, found_key.offset); | ||
310 | if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || | ||
311 | (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { | ||
312 | printk(KERN_ERR "btrfs: inconsitent qgroup config\n"); | ||
313 | flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
314 | } | ||
315 | if (!qgroup) { | ||
316 | qgroup = add_qgroup_rb(fs_info, found_key.offset); | ||
317 | if (IS_ERR(qgroup)) { | ||
318 | ret = PTR_ERR(qgroup); | ||
319 | goto out; | ||
320 | } | ||
321 | } | ||
322 | switch (found_key.type) { | ||
323 | case BTRFS_QGROUP_INFO_KEY: { | ||
324 | struct btrfs_qgroup_info_item *ptr; | ||
325 | |||
326 | ptr = btrfs_item_ptr(l, slot, | ||
327 | struct btrfs_qgroup_info_item); | ||
328 | qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); | ||
329 | qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); | ||
330 | qgroup->excl = btrfs_qgroup_info_excl(l, ptr); | ||
331 | qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); | ||
332 | /* generation currently unused */ | ||
333 | break; | ||
334 | } | ||
335 | case BTRFS_QGROUP_LIMIT_KEY: { | ||
336 | struct btrfs_qgroup_limit_item *ptr; | ||
337 | |||
338 | ptr = btrfs_item_ptr(l, slot, | ||
339 | struct btrfs_qgroup_limit_item); | ||
340 | qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); | ||
341 | qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); | ||
342 | qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); | ||
343 | qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); | ||
344 | qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); | ||
345 | break; | ||
346 | } | ||
347 | } | ||
348 | next1: | ||
349 | ret = btrfs_next_item(quota_root, path); | ||
350 | if (ret < 0) | ||
351 | goto out; | ||
352 | if (ret) | ||
353 | break; | ||
354 | } | ||
355 | btrfs_release_path(path); | ||
356 | |||
357 | /* | ||
358 | * pass 2: read all qgroup relations | ||
359 | */ | ||
360 | key.objectid = 0; | ||
361 | key.type = BTRFS_QGROUP_RELATION_KEY; | ||
362 | key.offset = 0; | ||
363 | ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); | ||
364 | if (ret) | ||
365 | goto out; | ||
366 | while (1) { | ||
367 | slot = path->slots[0]; | ||
368 | l = path->nodes[0]; | ||
369 | btrfs_item_key_to_cpu(l, &found_key, slot); | ||
370 | |||
371 | if (found_key.type != BTRFS_QGROUP_RELATION_KEY) | ||
372 | goto next2; | ||
373 | |||
374 | if (found_key.objectid > found_key.offset) { | ||
375 | /* parent <- member, not needed to build config */ | ||
376 | /* FIXME should we omit the key completely? */ | ||
377 | goto next2; | ||
378 | } | ||
379 | |||
380 | ret = add_relation_rb(fs_info, found_key.objectid, | ||
381 | found_key.offset); | ||
382 | if (ret) | ||
383 | goto out; | ||
384 | next2: | ||
385 | ret = btrfs_next_item(quota_root, path); | ||
386 | if (ret < 0) | ||
387 | goto out; | ||
388 | if (ret) | ||
389 | break; | ||
390 | } | ||
391 | out: | ||
392 | fs_info->qgroup_flags |= flags; | ||
393 | if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { | ||
394 | fs_info->quota_enabled = 0; | ||
395 | fs_info->pending_quota_state = 0; | ||
396 | } | ||
397 | btrfs_free_path(path); | ||
398 | |||
399 | return ret < 0 ? ret : 0; | ||
400 | } | ||
401 | |||
402 | /* | ||
403 | * This is only called from close_ctree() or open_ctree(), both in single- | ||
404 | * treaded paths. Clean up the in-memory structures. No locking needed. | ||
405 | */ | ||
406 | void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) | ||
407 | { | ||
408 | struct rb_node *n; | ||
409 | struct btrfs_qgroup *qgroup; | ||
410 | struct btrfs_qgroup_list *list; | ||
411 | |||
412 | while ((n = rb_first(&fs_info->qgroup_tree))) { | ||
413 | qgroup = rb_entry(n, struct btrfs_qgroup, node); | ||
414 | rb_erase(n, &fs_info->qgroup_tree); | ||
415 | |||
416 | WARN_ON(!list_empty(&qgroup->dirty)); | ||
417 | |||
418 | while (!list_empty(&qgroup->groups)) { | ||
419 | list = list_first_entry(&qgroup->groups, | ||
420 | struct btrfs_qgroup_list, | ||
421 | next_group); | ||
422 | list_del(&list->next_group); | ||
423 | list_del(&list->next_member); | ||
424 | kfree(list); | ||
425 | } | ||
426 | |||
427 | while (!list_empty(&qgroup->members)) { | ||
428 | list = list_first_entry(&qgroup->members, | ||
429 | struct btrfs_qgroup_list, | ||
430 | next_member); | ||
431 | list_del(&list->next_group); | ||
432 | list_del(&list->next_member); | ||
433 | kfree(list); | ||
434 | } | ||
435 | kfree(qgroup); | ||
436 | } | ||
437 | } | ||
438 | |||
439 | static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, | ||
440 | struct btrfs_root *quota_root, | ||
441 | u64 src, u64 dst) | ||
442 | { | ||
443 | int ret; | ||
444 | struct btrfs_path *path; | ||
445 | struct btrfs_key key; | ||
446 | |||
447 | path = btrfs_alloc_path(); | ||
448 | if (!path) | ||
449 | return -ENOMEM; | ||
450 | |||
451 | key.objectid = src; | ||
452 | key.type = BTRFS_QGROUP_RELATION_KEY; | ||
453 | key.offset = dst; | ||
454 | |||
455 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); | ||
456 | |||
457 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
458 | |||
459 | btrfs_free_path(path); | ||
460 | return ret; | ||
461 | } | ||
462 | |||
463 | static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, | ||
464 | struct btrfs_root *quota_root, | ||
465 | u64 src, u64 dst) | ||
466 | { | ||
467 | int ret; | ||
468 | struct btrfs_path *path; | ||
469 | struct btrfs_key key; | ||
470 | |||
471 | path = btrfs_alloc_path(); | ||
472 | if (!path) | ||
473 | return -ENOMEM; | ||
474 | |||
475 | key.objectid = src; | ||
476 | key.type = BTRFS_QGROUP_RELATION_KEY; | ||
477 | key.offset = dst; | ||
478 | |||
479 | ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); | ||
480 | if (ret < 0) | ||
481 | goto out; | ||
482 | |||
483 | if (ret > 0) { | ||
484 | ret = -ENOENT; | ||
485 | goto out; | ||
486 | } | ||
487 | |||
488 | ret = btrfs_del_item(trans, quota_root, path); | ||
489 | out: | ||
490 | btrfs_free_path(path); | ||
491 | return ret; | ||
492 | } | ||
493 | |||
494 | static int add_qgroup_item(struct btrfs_trans_handle *trans, | ||
495 | struct btrfs_root *quota_root, u64 qgroupid) | ||
496 | { | ||
497 | int ret; | ||
498 | struct btrfs_path *path; | ||
499 | struct btrfs_qgroup_info_item *qgroup_info; | ||
500 | struct btrfs_qgroup_limit_item *qgroup_limit; | ||
501 | struct extent_buffer *leaf; | ||
502 | struct btrfs_key key; | ||
503 | |||
504 | path = btrfs_alloc_path(); | ||
505 | if (!path) | ||
506 | return -ENOMEM; | ||
507 | |||
508 | key.objectid = 0; | ||
509 | key.type = BTRFS_QGROUP_INFO_KEY; | ||
510 | key.offset = qgroupid; | ||
511 | |||
512 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, | ||
513 | sizeof(*qgroup_info)); | ||
514 | if (ret) | ||
515 | goto out; | ||
516 | |||
517 | leaf = path->nodes[0]; | ||
518 | qgroup_info = btrfs_item_ptr(leaf, path->slots[0], | ||
519 | struct btrfs_qgroup_info_item); | ||
520 | btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); | ||
521 | btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); | ||
522 | btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); | ||
523 | btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); | ||
524 | btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); | ||
525 | |||
526 | btrfs_mark_buffer_dirty(leaf); | ||
527 | |||
528 | btrfs_release_path(path); | ||
529 | |||
530 | key.type = BTRFS_QGROUP_LIMIT_KEY; | ||
531 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, | ||
532 | sizeof(*qgroup_limit)); | ||
533 | if (ret) | ||
534 | goto out; | ||
535 | |||
536 | leaf = path->nodes[0]; | ||
537 | qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], | ||
538 | struct btrfs_qgroup_limit_item); | ||
539 | btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); | ||
540 | btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); | ||
541 | btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); | ||
542 | btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); | ||
543 | btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); | ||
544 | |||
545 | btrfs_mark_buffer_dirty(leaf); | ||
546 | |||
547 | ret = 0; | ||
548 | out: | ||
549 | btrfs_free_path(path); | ||
550 | return ret; | ||
551 | } | ||
552 | |||
553 | static int del_qgroup_item(struct btrfs_trans_handle *trans, | ||
554 | struct btrfs_root *quota_root, u64 qgroupid) | ||
555 | { | ||
556 | int ret; | ||
557 | struct btrfs_path *path; | ||
558 | struct btrfs_key key; | ||
559 | |||
560 | path = btrfs_alloc_path(); | ||
561 | if (!path) | ||
562 | return -ENOMEM; | ||
563 | |||
564 | key.objectid = 0; | ||
565 | key.type = BTRFS_QGROUP_INFO_KEY; | ||
566 | key.offset = qgroupid; | ||
567 | ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); | ||
568 | if (ret < 0) | ||
569 | goto out; | ||
570 | |||
571 | if (ret > 0) { | ||
572 | ret = -ENOENT; | ||
573 | goto out; | ||
574 | } | ||
575 | |||
576 | ret = btrfs_del_item(trans, quota_root, path); | ||
577 | if (ret) | ||
578 | goto out; | ||
579 | |||
580 | btrfs_release_path(path); | ||
581 | |||
582 | key.type = BTRFS_QGROUP_LIMIT_KEY; | ||
583 | ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); | ||
584 | if (ret < 0) | ||
585 | goto out; | ||
586 | |||
587 | if (ret > 0) { | ||
588 | ret = -ENOENT; | ||
589 | goto out; | ||
590 | } | ||
591 | |||
592 | ret = btrfs_del_item(trans, quota_root, path); | ||
593 | |||
594 | out: | ||
595 | btrfs_free_path(path); | ||
596 | return ret; | ||
597 | } | ||
598 | |||
599 | static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, | ||
600 | struct btrfs_root *root, u64 qgroupid, | ||
601 | u64 flags, u64 max_rfer, u64 max_excl, | ||
602 | u64 rsv_rfer, u64 rsv_excl) | ||
603 | { | ||
604 | struct btrfs_path *path; | ||
605 | struct btrfs_key key; | ||
606 | struct extent_buffer *l; | ||
607 | struct btrfs_qgroup_limit_item *qgroup_limit; | ||
608 | int ret; | ||
609 | int slot; | ||
610 | |||
611 | key.objectid = 0; | ||
612 | key.type = BTRFS_QGROUP_LIMIT_KEY; | ||
613 | key.offset = qgroupid; | ||
614 | |||
615 | path = btrfs_alloc_path(); | ||
616 | BUG_ON(!path); | ||
617 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
618 | if (ret > 0) | ||
619 | ret = -ENOENT; | ||
620 | |||
621 | if (ret) | ||
622 | goto out; | ||
623 | |||
624 | l = path->nodes[0]; | ||
625 | slot = path->slots[0]; | ||
626 | qgroup_limit = btrfs_item_ptr(l, path->slots[0], | ||
627 | struct btrfs_qgroup_limit_item); | ||
628 | btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags); | ||
629 | btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer); | ||
630 | btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl); | ||
631 | btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer); | ||
632 | btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl); | ||
633 | |||
634 | btrfs_mark_buffer_dirty(l); | ||
635 | |||
636 | out: | ||
637 | btrfs_free_path(path); | ||
638 | return ret; | ||
639 | } | ||
640 | |||
641 | static int update_qgroup_info_item(struct btrfs_trans_handle *trans, | ||
642 | struct btrfs_root *root, | ||
643 | struct btrfs_qgroup *qgroup) | ||
644 | { | ||
645 | struct btrfs_path *path; | ||
646 | struct btrfs_key key; | ||
647 | struct extent_buffer *l; | ||
648 | struct btrfs_qgroup_info_item *qgroup_info; | ||
649 | int ret; | ||
650 | int slot; | ||
651 | |||
652 | key.objectid = 0; | ||
653 | key.type = BTRFS_QGROUP_INFO_KEY; | ||
654 | key.offset = qgroup->qgroupid; | ||
655 | |||
656 | path = btrfs_alloc_path(); | ||
657 | BUG_ON(!path); | ||
658 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
659 | if (ret > 0) | ||
660 | ret = -ENOENT; | ||
661 | |||
662 | if (ret) | ||
663 | goto out; | ||
664 | |||
665 | l = path->nodes[0]; | ||
666 | slot = path->slots[0]; | ||
667 | qgroup_info = btrfs_item_ptr(l, path->slots[0], | ||
668 | struct btrfs_qgroup_info_item); | ||
669 | btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); | ||
670 | btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); | ||
671 | btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); | ||
672 | btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); | ||
673 | btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); | ||
674 | |||
675 | btrfs_mark_buffer_dirty(l); | ||
676 | |||
677 | out: | ||
678 | btrfs_free_path(path); | ||
679 | return ret; | ||
680 | } | ||
681 | |||
682 | static int update_qgroup_status_item(struct btrfs_trans_handle *trans, | ||
683 | struct btrfs_fs_info *fs_info, | ||
684 | struct btrfs_root *root) | ||
685 | { | ||
686 | struct btrfs_path *path; | ||
687 | struct btrfs_key key; | ||
688 | struct extent_buffer *l; | ||
689 | struct btrfs_qgroup_status_item *ptr; | ||
690 | int ret; | ||
691 | int slot; | ||
692 | |||
693 | key.objectid = 0; | ||
694 | key.type = BTRFS_QGROUP_STATUS_KEY; | ||
695 | key.offset = 0; | ||
696 | |||
697 | path = btrfs_alloc_path(); | ||
698 | BUG_ON(!path); | ||
699 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
700 | if (ret > 0) | ||
701 | ret = -ENOENT; | ||
702 | |||
703 | if (ret) | ||
704 | goto out; | ||
705 | |||
706 | l = path->nodes[0]; | ||
707 | slot = path->slots[0]; | ||
708 | ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); | ||
709 | btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); | ||
710 | btrfs_set_qgroup_status_generation(l, ptr, trans->transid); | ||
711 | /* XXX scan */ | ||
712 | |||
713 | btrfs_mark_buffer_dirty(l); | ||
714 | |||
715 | out: | ||
716 | btrfs_free_path(path); | ||
717 | return ret; | ||
718 | } | ||
719 | |||
720 | /* | ||
721 | * called with qgroup_lock held | ||
722 | */ | ||
723 | static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, | ||
724 | struct btrfs_root *root) | ||
725 | { | ||
726 | struct btrfs_path *path; | ||
727 | struct btrfs_key key; | ||
728 | int ret; | ||
729 | |||
730 | if (!root) | ||
731 | return -EINVAL; | ||
732 | |||
733 | path = btrfs_alloc_path(); | ||
734 | if (!path) | ||
735 | return -ENOMEM; | ||
736 | |||
737 | while (1) { | ||
738 | key.objectid = 0; | ||
739 | key.offset = 0; | ||
740 | key.type = 0; | ||
741 | |||
742 | path->leave_spinning = 1; | ||
743 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
744 | if (ret > 0) { | ||
745 | if (path->slots[0] == 0) | ||
746 | break; | ||
747 | path->slots[0]--; | ||
748 | } else if (ret < 0) { | ||
749 | break; | ||
750 | } | ||
751 | |||
752 | ret = btrfs_del_item(trans, root, path); | ||
753 | if (ret) | ||
754 | goto out; | ||
755 | btrfs_release_path(path); | ||
756 | } | ||
757 | ret = 0; | ||
758 | out: | ||
759 | root->fs_info->pending_quota_state = 0; | ||
760 | btrfs_free_path(path); | ||
761 | return ret; | ||
762 | } | ||
763 | |||
764 | int btrfs_quota_enable(struct btrfs_trans_handle *trans, | ||
765 | struct btrfs_fs_info *fs_info) | ||
766 | { | ||
767 | struct btrfs_root *quota_root; | ||
768 | struct btrfs_path *path = NULL; | ||
769 | struct btrfs_qgroup_status_item *ptr; | ||
770 | struct extent_buffer *leaf; | ||
771 | struct btrfs_key key; | ||
772 | int ret = 0; | ||
773 | |||
774 | spin_lock(&fs_info->qgroup_lock); | ||
775 | if (fs_info->quota_root) { | ||
776 | fs_info->pending_quota_state = 1; | ||
777 | spin_unlock(&fs_info->qgroup_lock); | ||
778 | goto out; | ||
779 | } | ||
780 | spin_unlock(&fs_info->qgroup_lock); | ||
781 | |||
782 | /* | ||
783 | * initially create the quota tree | ||
784 | */ | ||
785 | quota_root = btrfs_create_tree(trans, fs_info, | ||
786 | BTRFS_QUOTA_TREE_OBJECTID); | ||
787 | if (IS_ERR(quota_root)) { | ||
788 | ret = PTR_ERR(quota_root); | ||
789 | goto out; | ||
790 | } | ||
791 | |||
792 | path = btrfs_alloc_path(); | ||
793 | if (!path) | ||
794 | return -ENOMEM; | ||
795 | |||
796 | key.objectid = 0; | ||
797 | key.type = BTRFS_QGROUP_STATUS_KEY; | ||
798 | key.offset = 0; | ||
799 | |||
800 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, | ||
801 | sizeof(*ptr)); | ||
802 | if (ret) | ||
803 | goto out; | ||
804 | |||
805 | leaf = path->nodes[0]; | ||
806 | ptr = btrfs_item_ptr(leaf, path->slots[0], | ||
807 | struct btrfs_qgroup_status_item); | ||
808 | btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); | ||
809 | btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); | ||
810 | fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | | ||
811 | BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
812 | btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); | ||
813 | btrfs_set_qgroup_status_scan(leaf, ptr, 0); | ||
814 | |||
815 | btrfs_mark_buffer_dirty(leaf); | ||
816 | |||
817 | spin_lock(&fs_info->qgroup_lock); | ||
818 | fs_info->quota_root = quota_root; | ||
819 | fs_info->pending_quota_state = 1; | ||
820 | spin_unlock(&fs_info->qgroup_lock); | ||
821 | out: | ||
822 | btrfs_free_path(path); | ||
823 | return ret; | ||
824 | } | ||
825 | |||
826 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, | ||
827 | struct btrfs_fs_info *fs_info) | ||
828 | { | ||
829 | struct btrfs_root *tree_root = fs_info->tree_root; | ||
830 | struct btrfs_root *quota_root; | ||
831 | int ret = 0; | ||
832 | |||
833 | spin_lock(&fs_info->qgroup_lock); | ||
834 | fs_info->quota_enabled = 0; | ||
835 | fs_info->pending_quota_state = 0; | ||
836 | quota_root = fs_info->quota_root; | ||
837 | fs_info->quota_root = NULL; | ||
838 | btrfs_free_qgroup_config(fs_info); | ||
839 | spin_unlock(&fs_info->qgroup_lock); | ||
840 | |||
841 | if (!quota_root) | ||
842 | return -EINVAL; | ||
843 | |||
844 | ret = btrfs_clean_quota_tree(trans, quota_root); | ||
845 | if (ret) | ||
846 | goto out; | ||
847 | |||
848 | ret = btrfs_del_root(trans, tree_root, "a_root->root_key); | ||
849 | if (ret) | ||
850 | goto out; | ||
851 | |||
852 | list_del("a_root->dirty_list); | ||
853 | |||
854 | btrfs_tree_lock(quota_root->node); | ||
855 | clean_tree_block(trans, tree_root, quota_root->node); | ||
856 | btrfs_tree_unlock(quota_root->node); | ||
857 | btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); | ||
858 | |||
859 | free_extent_buffer(quota_root->node); | ||
860 | free_extent_buffer(quota_root->commit_root); | ||
861 | kfree(quota_root); | ||
862 | out: | ||
863 | return ret; | ||
864 | } | ||
865 | |||
866 | int btrfs_quota_rescan(struct btrfs_fs_info *fs_info) | ||
867 | { | ||
868 | /* FIXME */ | ||
869 | return 0; | ||
870 | } | ||
871 | |||
872 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | ||
873 | struct btrfs_fs_info *fs_info, u64 src, u64 dst) | ||
874 | { | ||
875 | struct btrfs_root *quota_root; | ||
876 | int ret = 0; | ||
877 | |||
878 | quota_root = fs_info->quota_root; | ||
879 | if (!quota_root) | ||
880 | return -EINVAL; | ||
881 | |||
882 | ret = add_qgroup_relation_item(trans, quota_root, src, dst); | ||
883 | if (ret) | ||
884 | return ret; | ||
885 | |||
886 | ret = add_qgroup_relation_item(trans, quota_root, dst, src); | ||
887 | if (ret) { | ||
888 | del_qgroup_relation_item(trans, quota_root, src, dst); | ||
889 | return ret; | ||
890 | } | ||
891 | |||
892 | spin_lock(&fs_info->qgroup_lock); | ||
893 | ret = add_relation_rb(quota_root->fs_info, src, dst); | ||
894 | spin_unlock(&fs_info->qgroup_lock); | ||
895 | |||
896 | return ret; | ||
897 | } | ||
898 | |||
899 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | ||
900 | struct btrfs_fs_info *fs_info, u64 src, u64 dst) | ||
901 | { | ||
902 | struct btrfs_root *quota_root; | ||
903 | int ret = 0; | ||
904 | int err; | ||
905 | |||
906 | quota_root = fs_info->quota_root; | ||
907 | if (!quota_root) | ||
908 | return -EINVAL; | ||
909 | |||
910 | ret = del_qgroup_relation_item(trans, quota_root, src, dst); | ||
911 | err = del_qgroup_relation_item(trans, quota_root, dst, src); | ||
912 | if (err && !ret) | ||
913 | ret = err; | ||
914 | |||
915 | spin_lock(&fs_info->qgroup_lock); | ||
916 | del_relation_rb(fs_info, src, dst); | ||
917 | |||
918 | spin_unlock(&fs_info->qgroup_lock); | ||
919 | |||
920 | return ret; | ||
921 | } | ||
922 | |||
923 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, | ||
924 | struct btrfs_fs_info *fs_info, u64 qgroupid, char *name) | ||
925 | { | ||
926 | struct btrfs_root *quota_root; | ||
927 | struct btrfs_qgroup *qgroup; | ||
928 | int ret = 0; | ||
929 | |||
930 | quota_root = fs_info->quota_root; | ||
931 | if (!quota_root) | ||
932 | return -EINVAL; | ||
933 | |||
934 | ret = add_qgroup_item(trans, quota_root, qgroupid); | ||
935 | |||
936 | spin_lock(&fs_info->qgroup_lock); | ||
937 | qgroup = add_qgroup_rb(fs_info, qgroupid); | ||
938 | spin_unlock(&fs_info->qgroup_lock); | ||
939 | |||
940 | if (IS_ERR(qgroup)) | ||
941 | ret = PTR_ERR(qgroup); | ||
942 | |||
943 | return ret; | ||
944 | } | ||
945 | |||
946 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, | ||
947 | struct btrfs_fs_info *fs_info, u64 qgroupid) | ||
948 | { | ||
949 | struct btrfs_root *quota_root; | ||
950 | int ret = 0; | ||
951 | |||
952 | quota_root = fs_info->quota_root; | ||
953 | if (!quota_root) | ||
954 | return -EINVAL; | ||
955 | |||
956 | ret = del_qgroup_item(trans, quota_root, qgroupid); | ||
957 | |||
958 | spin_lock(&fs_info->qgroup_lock); | ||
959 | del_qgroup_rb(quota_root->fs_info, qgroupid); | ||
960 | |||
961 | spin_unlock(&fs_info->qgroup_lock); | ||
962 | |||
963 | return ret; | ||
964 | } | ||
965 | |||
966 | int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, | ||
967 | struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
968 | struct btrfs_qgroup_limit *limit) | ||
969 | { | ||
970 | struct btrfs_root *quota_root = fs_info->quota_root; | ||
971 | struct btrfs_qgroup *qgroup; | ||
972 | int ret = 0; | ||
973 | |||
974 | if (!quota_root) | ||
975 | return -EINVAL; | ||
976 | |||
977 | ret = update_qgroup_limit_item(trans, quota_root, qgroupid, | ||
978 | limit->flags, limit->max_rfer, | ||
979 | limit->max_excl, limit->rsv_rfer, | ||
980 | limit->rsv_excl); | ||
981 | if (ret) { | ||
982 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
983 | printk(KERN_INFO "unable to update quota limit for %llu\n", | ||
984 | (unsigned long long)qgroupid); | ||
985 | } | ||
986 | |||
987 | spin_lock(&fs_info->qgroup_lock); | ||
988 | |||
989 | qgroup = find_qgroup_rb(fs_info, qgroupid); | ||
990 | if (!qgroup) { | ||
991 | ret = -ENOENT; | ||
992 | goto unlock; | ||
993 | } | ||
994 | qgroup->lim_flags = limit->flags; | ||
995 | qgroup->max_rfer = limit->max_rfer; | ||
996 | qgroup->max_excl = limit->max_excl; | ||
997 | qgroup->rsv_rfer = limit->rsv_rfer; | ||
998 | qgroup->rsv_excl = limit->rsv_excl; | ||
999 | |||
1000 | unlock: | ||
1001 | spin_unlock(&fs_info->qgroup_lock); | ||
1002 | |||
1003 | return ret; | ||
1004 | } | ||
1005 | |||
1006 | static void qgroup_dirty(struct btrfs_fs_info *fs_info, | ||
1007 | struct btrfs_qgroup *qgroup) | ||
1008 | { | ||
1009 | if (list_empty(&qgroup->dirty)) | ||
1010 | list_add(&qgroup->dirty, &fs_info->dirty_qgroups); | ||
1011 | } | ||
1012 | |||
1013 | /* | ||
1014 | * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts | ||
1015 | * the modification into a list that's later used by btrfs_end_transaction to | ||
1016 | * pass the recorded modifications on to btrfs_qgroup_account_ref. | ||
1017 | */ | ||
1018 | int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, | ||
1019 | struct btrfs_delayed_ref_node *node, | ||
1020 | struct btrfs_delayed_extent_op *extent_op) | ||
1021 | { | ||
1022 | struct qgroup_update *u; | ||
1023 | |||
1024 | BUG_ON(!trans->delayed_ref_elem.seq); | ||
1025 | u = kmalloc(sizeof(*u), GFP_NOFS); | ||
1026 | if (!u) | ||
1027 | return -ENOMEM; | ||
1028 | |||
1029 | u->node = node; | ||
1030 | u->extent_op = extent_op; | ||
1031 | list_add_tail(&u->list, &trans->qgroup_ref_list); | ||
1032 | |||
1033 | return 0; | ||
1034 | } | ||
1035 | |||
1036 | /* | ||
1037 | * btrfs_qgroup_account_ref is called for every ref that is added to or deleted | ||
1038 | * from the fs. First, all roots referencing the extent are searched, and | ||
1039 | * then the space is accounted accordingly to the different roots. The | ||
1040 | * accounting algorithm works in 3 steps documented inline. | ||
1041 | */ | ||
1042 | int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | ||
1043 | struct btrfs_fs_info *fs_info, | ||
1044 | struct btrfs_delayed_ref_node *node, | ||
1045 | struct btrfs_delayed_extent_op *extent_op) | ||
1046 | { | ||
1047 | struct btrfs_key ins; | ||
1048 | struct btrfs_root *quota_root; | ||
1049 | u64 ref_root; | ||
1050 | struct btrfs_qgroup *qgroup; | ||
1051 | struct ulist_node *unode; | ||
1052 | struct ulist *roots = NULL; | ||
1053 | struct ulist *tmp = NULL; | ||
1054 | struct ulist_iterator uiter; | ||
1055 | u64 seq; | ||
1056 | int ret = 0; | ||
1057 | int sgn; | ||
1058 | |||
1059 | if (!fs_info->quota_enabled) | ||
1060 | return 0; | ||
1061 | |||
1062 | BUG_ON(!fs_info->quota_root); | ||
1063 | |||
1064 | ins.objectid = node->bytenr; | ||
1065 | ins.offset = node->num_bytes; | ||
1066 | ins.type = BTRFS_EXTENT_ITEM_KEY; | ||
1067 | |||
1068 | if (node->type == BTRFS_TREE_BLOCK_REF_KEY || | ||
1069 | node->type == BTRFS_SHARED_BLOCK_REF_KEY) { | ||
1070 | struct btrfs_delayed_tree_ref *ref; | ||
1071 | ref = btrfs_delayed_node_to_tree_ref(node); | ||
1072 | ref_root = ref->root; | ||
1073 | } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || | ||
1074 | node->type == BTRFS_SHARED_DATA_REF_KEY) { | ||
1075 | struct btrfs_delayed_data_ref *ref; | ||
1076 | ref = btrfs_delayed_node_to_data_ref(node); | ||
1077 | ref_root = ref->root; | ||
1078 | } else { | ||
1079 | BUG(); | ||
1080 | } | ||
1081 | |||
1082 | if (!is_fstree(ref_root)) { | ||
1083 | /* | ||
1084 | * non-fs-trees are not being accounted | ||
1085 | */ | ||
1086 | return 0; | ||
1087 | } | ||
1088 | |||
1089 | switch (node->action) { | ||
1090 | case BTRFS_ADD_DELAYED_REF: | ||
1091 | case BTRFS_ADD_DELAYED_EXTENT: | ||
1092 | sgn = 1; | ||
1093 | break; | ||
1094 | case BTRFS_DROP_DELAYED_REF: | ||
1095 | sgn = -1; | ||
1096 | break; | ||
1097 | case BTRFS_UPDATE_DELAYED_HEAD: | ||
1098 | return 0; | ||
1099 | default: | ||
1100 | BUG(); | ||
1101 | } | ||
1102 | |||
1103 | /* | ||
1104 | * the delayed ref sequence number we pass depends on the direction of | ||
1105 | * the operation. for add operations, we pass (node->seq - 1) to skip | ||
1106 | * the delayed ref's current sequence number, because we need the state | ||
1107 | * of the tree before the add operation. for delete operations, we pass | ||
1108 | * (node->seq) to include the delayed ref's current sequence number, | ||
1109 | * because we need the state of the tree after the delete operation. | ||
1110 | */ | ||
1111 | ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, | ||
1112 | sgn > 0 ? node->seq - 1 : node->seq, &roots); | ||
1113 | if (ret < 0) | ||
1114 | goto out; | ||
1115 | |||
1116 | spin_lock(&fs_info->qgroup_lock); | ||
1117 | quota_root = fs_info->quota_root; | ||
1118 | if (!quota_root) | ||
1119 | goto unlock; | ||
1120 | |||
1121 | qgroup = find_qgroup_rb(fs_info, ref_root); | ||
1122 | if (!qgroup) | ||
1123 | goto unlock; | ||
1124 | |||
1125 | /* | ||
1126 | * step 1: for each old ref, visit all nodes once and inc refcnt | ||
1127 | */ | ||
1128 | tmp = ulist_alloc(GFP_ATOMIC); | ||
1129 | if (!tmp) { | ||
1130 | ret = -ENOMEM; | ||
1131 | goto unlock; | ||
1132 | } | ||
1133 | seq = fs_info->qgroup_seq; | ||
1134 | fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ | ||
1135 | |||
1136 | ULIST_ITER_INIT(&uiter); | ||
1137 | while ((unode = ulist_next(roots, &uiter))) { | ||
1138 | struct ulist_node *tmp_unode; | ||
1139 | struct ulist_iterator tmp_uiter; | ||
1140 | struct btrfs_qgroup *qg; | ||
1141 | |||
1142 | qg = find_qgroup_rb(fs_info, unode->val); | ||
1143 | if (!qg) | ||
1144 | continue; | ||
1145 | |||
1146 | ulist_reinit(tmp); | ||
1147 | /* XXX id not needed */ | ||
1148 | ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC); | ||
1149 | ULIST_ITER_INIT(&tmp_uiter); | ||
1150 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { | ||
1151 | struct btrfs_qgroup_list *glist; | ||
1152 | |||
1153 | qg = (struct btrfs_qgroup *)tmp_unode->aux; | ||
1154 | if (qg->refcnt < seq) | ||
1155 | qg->refcnt = seq + 1; | ||
1156 | else | ||
1157 | ++qg->refcnt; | ||
1158 | |||
1159 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
1160 | ulist_add(tmp, glist->group->qgroupid, | ||
1161 | (unsigned long)glist->group, | ||
1162 | GFP_ATOMIC); | ||
1163 | } | ||
1164 | } | ||
1165 | } | ||
1166 | |||
1167 | /* | ||
1168 | * step 2: walk from the new root | ||
1169 | */ | ||
1170 | ulist_reinit(tmp); | ||
1171 | ulist_add(tmp, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | ||
1172 | ULIST_ITER_INIT(&uiter); | ||
1173 | while ((unode = ulist_next(tmp, &uiter))) { | ||
1174 | struct btrfs_qgroup *qg; | ||
1175 | struct btrfs_qgroup_list *glist; | ||
1176 | |||
1177 | qg = (struct btrfs_qgroup *)unode->aux; | ||
1178 | if (qg->refcnt < seq) { | ||
1179 | /* not visited by step 1 */ | ||
1180 | qg->rfer += sgn * node->num_bytes; | ||
1181 | qg->rfer_cmpr += sgn * node->num_bytes; | ||
1182 | if (roots->nnodes == 0) { | ||
1183 | qg->excl += sgn * node->num_bytes; | ||
1184 | qg->excl_cmpr += sgn * node->num_bytes; | ||
1185 | } | ||
1186 | qgroup_dirty(fs_info, qg); | ||
1187 | } | ||
1188 | WARN_ON(qg->tag >= seq); | ||
1189 | qg->tag = seq; | ||
1190 | |||
1191 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
1192 | ulist_add(tmp, glist->group->qgroupid, | ||
1193 | (unsigned long)glist->group, GFP_ATOMIC); | ||
1194 | } | ||
1195 | } | ||
1196 | |||
1197 | /* | ||
1198 | * step 3: walk again from old refs | ||
1199 | */ | ||
1200 | ULIST_ITER_INIT(&uiter); | ||
1201 | while ((unode = ulist_next(roots, &uiter))) { | ||
1202 | struct btrfs_qgroup *qg; | ||
1203 | struct ulist_node *tmp_unode; | ||
1204 | struct ulist_iterator tmp_uiter; | ||
1205 | |||
1206 | qg = find_qgroup_rb(fs_info, unode->val); | ||
1207 | if (!qg) | ||
1208 | continue; | ||
1209 | |||
1210 | ulist_reinit(tmp); | ||
1211 | ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC); | ||
1212 | ULIST_ITER_INIT(&tmp_uiter); | ||
1213 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { | ||
1214 | struct btrfs_qgroup_list *glist; | ||
1215 | |||
1216 | qg = (struct btrfs_qgroup *)tmp_unode->aux; | ||
1217 | if (qg->tag == seq) | ||
1218 | continue; | ||
1219 | |||
1220 | if (qg->refcnt - seq == roots->nnodes) { | ||
1221 | qg->excl -= sgn * node->num_bytes; | ||
1222 | qg->excl_cmpr -= sgn * node->num_bytes; | ||
1223 | qgroup_dirty(fs_info, qg); | ||
1224 | } | ||
1225 | |||
1226 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
1227 | ulist_add(tmp, glist->group->qgroupid, | ||
1228 | (unsigned long)glist->group, | ||
1229 | GFP_ATOMIC); | ||
1230 | } | ||
1231 | } | ||
1232 | } | ||
1233 | ret = 0; | ||
1234 | unlock: | ||
1235 | spin_unlock(&fs_info->qgroup_lock); | ||
1236 | out: | ||
1237 | ulist_free(roots); | ||
1238 | ulist_free(tmp); | ||
1239 | |||
1240 | return ret; | ||
1241 | } | ||
1242 | |||
1243 | /* | ||
1244 | * called from commit_transaction. Writes all changed qgroups to disk. | ||
1245 | */ | ||
1246 | int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | ||
1247 | struct btrfs_fs_info *fs_info) | ||
1248 | { | ||
1249 | struct btrfs_root *quota_root = fs_info->quota_root; | ||
1250 | int ret = 0; | ||
1251 | |||
1252 | if (!quota_root) | ||
1253 | goto out; | ||
1254 | |||
1255 | fs_info->quota_enabled = fs_info->pending_quota_state; | ||
1256 | |||
1257 | spin_lock(&fs_info->qgroup_lock); | ||
1258 | while (!list_empty(&fs_info->dirty_qgroups)) { | ||
1259 | struct btrfs_qgroup *qgroup; | ||
1260 | qgroup = list_first_entry(&fs_info->dirty_qgroups, | ||
1261 | struct btrfs_qgroup, dirty); | ||
1262 | list_del_init(&qgroup->dirty); | ||
1263 | spin_unlock(&fs_info->qgroup_lock); | ||
1264 | ret = update_qgroup_info_item(trans, quota_root, qgroup); | ||
1265 | if (ret) | ||
1266 | fs_info->qgroup_flags |= | ||
1267 | BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
1268 | spin_lock(&fs_info->qgroup_lock); | ||
1269 | } | ||
1270 | if (fs_info->quota_enabled) | ||
1271 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; | ||
1272 | else | ||
1273 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; | ||
1274 | spin_unlock(&fs_info->qgroup_lock); | ||
1275 | |||
1276 | ret = update_qgroup_status_item(trans, fs_info, quota_root); | ||
1277 | if (ret) | ||
1278 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
1279 | |||
1280 | out: | ||
1281 | |||
1282 | return ret; | ||
1283 | } | ||
1284 | |||
1285 | /* | ||
1286 | * copy the acounting information between qgroups. This is necessary when a | ||
1287 | * snapshot or a subvolume is created | ||
1288 | */ | ||
1289 | int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | ||
1290 | struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, | ||
1291 | struct btrfs_qgroup_inherit *inherit) | ||
1292 | { | ||
1293 | int ret = 0; | ||
1294 | int i; | ||
1295 | u64 *i_qgroups; | ||
1296 | struct btrfs_root *quota_root = fs_info->quota_root; | ||
1297 | struct btrfs_qgroup *srcgroup; | ||
1298 | struct btrfs_qgroup *dstgroup; | ||
1299 | u32 level_size = 0; | ||
1300 | |||
1301 | if (!fs_info->quota_enabled) | ||
1302 | return 0; | ||
1303 | |||
1304 | if (!quota_root) | ||
1305 | return -EINVAL; | ||
1306 | |||
1307 | /* | ||
1308 | * create a tracking group for the subvol itself | ||
1309 | */ | ||
1310 | ret = add_qgroup_item(trans, quota_root, objectid); | ||
1311 | if (ret) | ||
1312 | goto out; | ||
1313 | |||
1314 | if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { | ||
1315 | ret = update_qgroup_limit_item(trans, quota_root, objectid, | ||
1316 | inherit->lim.flags, | ||
1317 | inherit->lim.max_rfer, | ||
1318 | inherit->lim.max_excl, | ||
1319 | inherit->lim.rsv_rfer, | ||
1320 | inherit->lim.rsv_excl); | ||
1321 | if (ret) | ||
1322 | goto out; | ||
1323 | } | ||
1324 | |||
1325 | if (srcid) { | ||
1326 | struct btrfs_root *srcroot; | ||
1327 | struct btrfs_key srckey; | ||
1328 | int srcroot_level; | ||
1329 | |||
1330 | srckey.objectid = srcid; | ||
1331 | srckey.type = BTRFS_ROOT_ITEM_KEY; | ||
1332 | srckey.offset = (u64)-1; | ||
1333 | srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); | ||
1334 | if (IS_ERR(srcroot)) { | ||
1335 | ret = PTR_ERR(srcroot); | ||
1336 | goto out; | ||
1337 | } | ||
1338 | |||
1339 | rcu_read_lock(); | ||
1340 | srcroot_level = btrfs_header_level(srcroot->node); | ||
1341 | level_size = btrfs_level_size(srcroot, srcroot_level); | ||
1342 | rcu_read_unlock(); | ||
1343 | } | ||
1344 | |||
1345 | /* | ||
1346 | * add qgroup to all inherited groups | ||
1347 | */ | ||
1348 | if (inherit) { | ||
1349 | i_qgroups = (u64 *)(inherit + 1); | ||
1350 | for (i = 0; i < inherit->num_qgroups; ++i) { | ||
1351 | ret = add_qgroup_relation_item(trans, quota_root, | ||
1352 | objectid, *i_qgroups); | ||
1353 | if (ret) | ||
1354 | goto out; | ||
1355 | ret = add_qgroup_relation_item(trans, quota_root, | ||
1356 | *i_qgroups, objectid); | ||
1357 | if (ret) | ||
1358 | goto out; | ||
1359 | ++i_qgroups; | ||
1360 | } | ||
1361 | } | ||
1362 | |||
1363 | |||
1364 | spin_lock(&fs_info->qgroup_lock); | ||
1365 | |||
1366 | dstgroup = add_qgroup_rb(fs_info, objectid); | ||
1367 | if (!dstgroup) | ||
1368 | goto unlock; | ||
1369 | |||
1370 | if (srcid) { | ||
1371 | srcgroup = find_qgroup_rb(fs_info, srcid); | ||
1372 | if (!srcgroup) | ||
1373 | goto unlock; | ||
1374 | dstgroup->rfer = srcgroup->rfer - level_size; | ||
1375 | dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; | ||
1376 | srcgroup->excl = level_size; | ||
1377 | srcgroup->excl_cmpr = level_size; | ||
1378 | qgroup_dirty(fs_info, dstgroup); | ||
1379 | qgroup_dirty(fs_info, srcgroup); | ||
1380 | } | ||
1381 | |||
1382 | if (!inherit) | ||
1383 | goto unlock; | ||
1384 | |||
1385 | i_qgroups = (u64 *)(inherit + 1); | ||
1386 | for (i = 0; i < inherit->num_qgroups; ++i) { | ||
1387 | ret = add_relation_rb(quota_root->fs_info, objectid, | ||
1388 | *i_qgroups); | ||
1389 | if (ret) | ||
1390 | goto unlock; | ||
1391 | ++i_qgroups; | ||
1392 | } | ||
1393 | |||
1394 | for (i = 0; i < inherit->num_ref_copies; ++i) { | ||
1395 | struct btrfs_qgroup *src; | ||
1396 | struct btrfs_qgroup *dst; | ||
1397 | |||
1398 | src = find_qgroup_rb(fs_info, i_qgroups[0]); | ||
1399 | dst = find_qgroup_rb(fs_info, i_qgroups[1]); | ||
1400 | |||
1401 | if (!src || !dst) { | ||
1402 | ret = -EINVAL; | ||
1403 | goto unlock; | ||
1404 | } | ||
1405 | |||
1406 | dst->rfer = src->rfer - level_size; | ||
1407 | dst->rfer_cmpr = src->rfer_cmpr - level_size; | ||
1408 | i_qgroups += 2; | ||
1409 | } | ||
1410 | for (i = 0; i < inherit->num_excl_copies; ++i) { | ||
1411 | struct btrfs_qgroup *src; | ||
1412 | struct btrfs_qgroup *dst; | ||
1413 | |||
1414 | src = find_qgroup_rb(fs_info, i_qgroups[0]); | ||
1415 | dst = find_qgroup_rb(fs_info, i_qgroups[1]); | ||
1416 | |||
1417 | if (!src || !dst) { | ||
1418 | ret = -EINVAL; | ||
1419 | goto unlock; | ||
1420 | } | ||
1421 | |||
1422 | dst->excl = src->excl + level_size; | ||
1423 | dst->excl_cmpr = src->excl_cmpr + level_size; | ||
1424 | i_qgroups += 2; | ||
1425 | } | ||
1426 | |||
1427 | unlock: | ||
1428 | spin_unlock(&fs_info->qgroup_lock); | ||
1429 | out: | ||
1430 | return ret; | ||
1431 | } | ||
1432 | |||
1433 | /* | ||
1434 | * reserve some space for a qgroup and all its parents. The reservation takes | ||
1435 | * place with start_transaction or dealloc_reserve, similar to ENOSPC | ||
1436 | * accounting. If not enough space is available, EDQUOT is returned. | ||
1437 | * We assume that the requested space is new for all qgroups. | ||
1438 | */ | ||
1439 | int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | ||
1440 | { | ||
1441 | struct btrfs_root *quota_root; | ||
1442 | struct btrfs_qgroup *qgroup; | ||
1443 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1444 | u64 ref_root = root->root_key.objectid; | ||
1445 | int ret = 0; | ||
1446 | struct ulist *ulist = NULL; | ||
1447 | struct ulist_node *unode; | ||
1448 | struct ulist_iterator uiter; | ||
1449 | |||
1450 | if (!is_fstree(ref_root)) | ||
1451 | return 0; | ||
1452 | |||
1453 | if (num_bytes == 0) | ||
1454 | return 0; | ||
1455 | |||
1456 | spin_lock(&fs_info->qgroup_lock); | ||
1457 | quota_root = fs_info->quota_root; | ||
1458 | if (!quota_root) | ||
1459 | goto out; | ||
1460 | |||
1461 | qgroup = find_qgroup_rb(fs_info, ref_root); | ||
1462 | if (!qgroup) | ||
1463 | goto out; | ||
1464 | |||
1465 | /* | ||
1466 | * in a first step, we check all affected qgroups if any limits would | ||
1467 | * be exceeded | ||
1468 | */ | ||
1469 | ulist = ulist_alloc(GFP_ATOMIC); | ||
1470 | ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | ||
1471 | ULIST_ITER_INIT(&uiter); | ||
1472 | while ((unode = ulist_next(ulist, &uiter))) { | ||
1473 | struct btrfs_qgroup *qg; | ||
1474 | struct btrfs_qgroup_list *glist; | ||
1475 | |||
1476 | qg = (struct btrfs_qgroup *)unode->aux; | ||
1477 | |||
1478 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && | ||
1479 | qg->reserved + qg->rfer + num_bytes > | ||
1480 | qg->max_rfer) | ||
1481 | ret = -EDQUOT; | ||
1482 | |||
1483 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && | ||
1484 | qg->reserved + qg->excl + num_bytes > | ||
1485 | qg->max_excl) | ||
1486 | ret = -EDQUOT; | ||
1487 | |||
1488 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
1489 | ulist_add(ulist, glist->group->qgroupid, | ||
1490 | (unsigned long)glist->group, GFP_ATOMIC); | ||
1491 | } | ||
1492 | } | ||
1493 | if (ret) | ||
1494 | goto out; | ||
1495 | |||
1496 | /* | ||
1497 | * no limits exceeded, now record the reservation into all qgroups | ||
1498 | */ | ||
1499 | ULIST_ITER_INIT(&uiter); | ||
1500 | while ((unode = ulist_next(ulist, &uiter))) { | ||
1501 | struct btrfs_qgroup *qg; | ||
1502 | |||
1503 | qg = (struct btrfs_qgroup *)unode->aux; | ||
1504 | |||
1505 | qg->reserved += num_bytes; | ||
1506 | } | ||
1507 | |||
1508 | out: | ||
1509 | spin_unlock(&fs_info->qgroup_lock); | ||
1510 | ulist_free(ulist); | ||
1511 | |||
1512 | return ret; | ||
1513 | } | ||
1514 | |||
1515 | void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | ||
1516 | { | ||
1517 | struct btrfs_root *quota_root; | ||
1518 | struct btrfs_qgroup *qgroup; | ||
1519 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1520 | struct ulist *ulist = NULL; | ||
1521 | struct ulist_node *unode; | ||
1522 | struct ulist_iterator uiter; | ||
1523 | u64 ref_root = root->root_key.objectid; | ||
1524 | |||
1525 | if (!is_fstree(ref_root)) | ||
1526 | return; | ||
1527 | |||
1528 | if (num_bytes == 0) | ||
1529 | return; | ||
1530 | |||
1531 | spin_lock(&fs_info->qgroup_lock); | ||
1532 | |||
1533 | quota_root = fs_info->quota_root; | ||
1534 | if (!quota_root) | ||
1535 | goto out; | ||
1536 | |||
1537 | qgroup = find_qgroup_rb(fs_info, ref_root); | ||
1538 | if (!qgroup) | ||
1539 | goto out; | ||
1540 | |||
1541 | ulist = ulist_alloc(GFP_ATOMIC); | ||
1542 | ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | ||
1543 | ULIST_ITER_INIT(&uiter); | ||
1544 | while ((unode = ulist_next(ulist, &uiter))) { | ||
1545 | struct btrfs_qgroup *qg; | ||
1546 | struct btrfs_qgroup_list *glist; | ||
1547 | |||
1548 | qg = (struct btrfs_qgroup *)unode->aux; | ||
1549 | |||
1550 | qg->reserved -= num_bytes; | ||
1551 | |||
1552 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
1553 | ulist_add(ulist, glist->group->qgroupid, | ||
1554 | (unsigned long)glist->group, GFP_ATOMIC); | ||
1555 | } | ||
1556 | } | ||
1557 | |||
1558 | out: | ||
1559 | spin_unlock(&fs_info->qgroup_lock); | ||
1560 | ulist_free(ulist); | ||
1561 | } | ||
1562 | |||
1563 | void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) | ||
1564 | { | ||
1565 | if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) | ||
1566 | return; | ||
1567 | printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n", | ||
1568 | trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", | ||
1569 | trans->delayed_ref_elem.seq); | ||
1570 | BUG(); | ||
1571 | } | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 646ee21bb03..c5dbd914967 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -1239,10 +1239,11 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) | |||
1239 | node->bytenr, &node->rb_node); | 1239 | node->bytenr, &node->rb_node); |
1240 | spin_unlock(&rc->reloc_root_tree.lock); | 1240 | spin_unlock(&rc->reloc_root_tree.lock); |
1241 | if (rb_node) { | 1241 | if (rb_node) { |
1242 | kfree(node); | ||
1243 | btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " | 1242 | btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " |
1244 | "for start=%llu while inserting into relocation " | 1243 | "for start=%llu while inserting into relocation " |
1245 | "tree\n"); | 1244 | "tree\n"); |
1245 | kfree(node); | ||
1246 | return -EEXIST; | ||
1246 | } | 1247 | } |
1247 | 1248 | ||
1248 | list_add_tail(&root->root_list, &rc->reloc_roots); | 1249 | list_add_tail(&root->root_list, &rc->reloc_roots); |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 24fb8ce4e07..6bb465cca20 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -16,12 +16,55 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/uuid.h> | ||
19 | #include "ctree.h" | 20 | #include "ctree.h" |
20 | #include "transaction.h" | 21 | #include "transaction.h" |
21 | #include "disk-io.h" | 22 | #include "disk-io.h" |
22 | #include "print-tree.h" | 23 | #include "print-tree.h" |
23 | 24 | ||
24 | /* | 25 | /* |
26 | * Read a root item from the tree. In case we detect a root item smaller then | ||
27 | * sizeof(root_item), we know it's an old version of the root structure and | ||
28 | * initialize all new fields to zero. The same happens if we detect mismatching | ||
29 | * generation numbers as then we know the root was once mounted with an older | ||
30 | * kernel that was not aware of the root item structure change. | ||
31 | */ | ||
32 | void btrfs_read_root_item(struct btrfs_root *root, | ||
33 | struct extent_buffer *eb, int slot, | ||
34 | struct btrfs_root_item *item) | ||
35 | { | ||
36 | uuid_le uuid; | ||
37 | int len; | ||
38 | int need_reset = 0; | ||
39 | |||
40 | len = btrfs_item_size_nr(eb, slot); | ||
41 | read_extent_buffer(eb, item, btrfs_item_ptr_offset(eb, slot), | ||
42 | min_t(int, len, (int)sizeof(*item))); | ||
43 | if (len < sizeof(*item)) | ||
44 | need_reset = 1; | ||
45 | if (!need_reset && btrfs_root_generation(item) | ||
46 | != btrfs_root_generation_v2(item)) { | ||
47 | if (btrfs_root_generation_v2(item) != 0) { | ||
48 | printk(KERN_WARNING "btrfs: mismatching " | ||
49 | "generation and generation_v2 " | ||
50 | "found in root item. This root " | ||
51 | "was probably mounted with an " | ||
52 | "older kernel. Resetting all " | ||
53 | "new fields.\n"); | ||
54 | } | ||
55 | need_reset = 1; | ||
56 | } | ||
57 | if (need_reset) { | ||
58 | memset(&item->generation_v2, 0, | ||
59 | sizeof(*item) - offsetof(struct btrfs_root_item, | ||
60 | generation_v2)); | ||
61 | |||
62 | uuid_le_gen(&uuid); | ||
63 | memcpy(item->uuid, uuid.b, BTRFS_UUID_SIZE); | ||
64 | } | ||
65 | } | ||
66 | |||
67 | /* | ||
25 | * lookup the root with the highest offset for a given objectid. The key we do | 68 | * lookup the root with the highest offset for a given objectid. The key we do |
26 | * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 | 69 | * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 |
27 | * on error. | 70 | * on error. |
@@ -61,10 +104,10 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, | |||
61 | goto out; | 104 | goto out; |
62 | } | 105 | } |
63 | if (item) | 106 | if (item) |
64 | read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), | 107 | btrfs_read_root_item(root, l, slot, item); |
65 | sizeof(*item)); | ||
66 | if (key) | 108 | if (key) |
67 | memcpy(key, &found_key, sizeof(found_key)); | 109 | memcpy(key, &found_key, sizeof(found_key)); |
110 | |||
68 | ret = 0; | 111 | ret = 0; |
69 | out: | 112 | out: |
70 | btrfs_free_path(path); | 113 | btrfs_free_path(path); |
@@ -91,16 +134,15 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
91 | int ret; | 134 | int ret; |
92 | int slot; | 135 | int slot; |
93 | unsigned long ptr; | 136 | unsigned long ptr; |
137 | int old_len; | ||
94 | 138 | ||
95 | path = btrfs_alloc_path(); | 139 | path = btrfs_alloc_path(); |
96 | if (!path) | 140 | if (!path) |
97 | return -ENOMEM; | 141 | return -ENOMEM; |
98 | 142 | ||
99 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | 143 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); |
100 | if (ret < 0) { | 144 | if (ret < 0) |
101 | btrfs_abort_transaction(trans, root, ret); | 145 | goto out_abort; |
102 | goto out; | ||
103 | } | ||
104 | 146 | ||
105 | if (ret != 0) { | 147 | if (ret != 0) { |
106 | btrfs_print_leaf(root, path->nodes[0]); | 148 | btrfs_print_leaf(root, path->nodes[0]); |
@@ -113,16 +155,56 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
113 | l = path->nodes[0]; | 155 | l = path->nodes[0]; |
114 | slot = path->slots[0]; | 156 | slot = path->slots[0]; |
115 | ptr = btrfs_item_ptr_offset(l, slot); | 157 | ptr = btrfs_item_ptr_offset(l, slot); |
158 | old_len = btrfs_item_size_nr(l, slot); | ||
159 | |||
160 | /* | ||
161 | * If this is the first time we update the root item which originated | ||
162 | * from an older kernel, we need to enlarge the item size to make room | ||
163 | * for the added fields. | ||
164 | */ | ||
165 | if (old_len < sizeof(*item)) { | ||
166 | btrfs_release_path(path); | ||
167 | ret = btrfs_search_slot(trans, root, key, path, | ||
168 | -1, 1); | ||
169 | if (ret < 0) | ||
170 | goto out_abort; | ||
171 | ret = btrfs_del_item(trans, root, path); | ||
172 | if (ret < 0) | ||
173 | goto out_abort; | ||
174 | btrfs_release_path(path); | ||
175 | ret = btrfs_insert_empty_item(trans, root, path, | ||
176 | key, sizeof(*item)); | ||
177 | if (ret < 0) | ||
178 | goto out_abort; | ||
179 | l = path->nodes[0]; | ||
180 | slot = path->slots[0]; | ||
181 | ptr = btrfs_item_ptr_offset(l, slot); | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * Update generation_v2 so at the next mount we know the new root | ||
186 | * fields are valid. | ||
187 | */ | ||
188 | btrfs_set_root_generation_v2(item, btrfs_root_generation(item)); | ||
189 | |||
116 | write_extent_buffer(l, item, ptr, sizeof(*item)); | 190 | write_extent_buffer(l, item, ptr, sizeof(*item)); |
117 | btrfs_mark_buffer_dirty(path->nodes[0]); | 191 | btrfs_mark_buffer_dirty(path->nodes[0]); |
118 | out: | 192 | out: |
119 | btrfs_free_path(path); | 193 | btrfs_free_path(path); |
120 | return ret; | 194 | return ret; |
195 | |||
196 | out_abort: | ||
197 | btrfs_abort_transaction(trans, root, ret); | ||
198 | goto out; | ||
121 | } | 199 | } |
122 | 200 | ||
123 | int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 201 | int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
124 | struct btrfs_key *key, struct btrfs_root_item *item) | 202 | struct btrfs_key *key, struct btrfs_root_item *item) |
125 | { | 203 | { |
204 | /* | ||
205 | * Make sure generation v1 and v2 match. See update_root for details. | ||
206 | */ | ||
207 | btrfs_set_root_generation_v2(item, btrfs_root_generation(item)); | ||
126 | return btrfs_insert_item(trans, root, key, item, sizeof(*item)); | 208 | return btrfs_insert_item(trans, root, key, item, sizeof(*item)); |
127 | } | 209 | } |
128 | 210 | ||
@@ -454,3 +536,16 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item) | |||
454 | root_item->byte_limit = 0; | 536 | root_item->byte_limit = 0; |
455 | } | 537 | } |
456 | } | 538 | } |
539 | |||
540 | void btrfs_update_root_times(struct btrfs_trans_handle *trans, | ||
541 | struct btrfs_root *root) | ||
542 | { | ||
543 | struct btrfs_root_item *item = &root->root_item; | ||
544 | struct timespec ct = CURRENT_TIME; | ||
545 | |||
546 | spin_lock(&root->root_times_lock); | ||
547 | item->ctransid = trans->transid; | ||
548 | item->ctime.sec = cpu_to_le64(ct.tv_sec); | ||
549 | item->ctime.nsec = cpu_to_le64(ct.tv_nsec); | ||
550 | spin_unlock(&root->root_times_lock); | ||
551 | } | ||
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c new file mode 100644 index 00000000000..c8ca49b1bb4 --- /dev/null +++ b/fs/btrfs/send.c | |||
@@ -0,0 +1,4571 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 Alexander Block. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/bsearch.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/file.h> | ||
22 | #include <linux/sort.h> | ||
23 | #include <linux/mount.h> | ||
24 | #include <linux/xattr.h> | ||
25 | #include <linux/posix_acl_xattr.h> | ||
26 | #include <linux/radix-tree.h> | ||
27 | #include <linux/crc32c.h> | ||
28 | |||
29 | #include "send.h" | ||
30 | #include "backref.h" | ||
31 | #include "locking.h" | ||
32 | #include "disk-io.h" | ||
33 | #include "btrfs_inode.h" | ||
34 | #include "transaction.h" | ||
35 | |||
36 | static int g_verbose = 0; | ||
37 | |||
38 | #define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__) | ||
39 | |||
40 | /* | ||
41 | * A fs_path is a helper to dynamically build path names with unknown size. | ||
42 | * It reallocates the internal buffer on demand. | ||
43 | * It allows fast adding of path elements on the right side (normal path) and | ||
44 | * fast adding to the left side (reversed path). A reversed path can also be | ||
45 | * unreversed if needed. | ||
46 | */ | ||
47 | struct fs_path { | ||
48 | union { | ||
49 | struct { | ||
50 | char *start; | ||
51 | char *end; | ||
52 | char *prepared; | ||
53 | |||
54 | char *buf; | ||
55 | int buf_len; | ||
56 | int reversed:1; | ||
57 | int virtual_mem:1; | ||
58 | char inline_buf[]; | ||
59 | }; | ||
60 | char pad[PAGE_SIZE]; | ||
61 | }; | ||
62 | }; | ||
63 | #define FS_PATH_INLINE_SIZE \ | ||
64 | (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf)) | ||
65 | |||
66 | |||
67 | /* reused for each extent */ | ||
68 | struct clone_root { | ||
69 | struct btrfs_root *root; | ||
70 | u64 ino; | ||
71 | u64 offset; | ||
72 | |||
73 | u64 found_refs; | ||
74 | }; | ||
75 | |||
76 | #define SEND_CTX_MAX_NAME_CACHE_SIZE 128 | ||
77 | #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2) | ||
78 | |||
79 | struct send_ctx { | ||
80 | struct file *send_filp; | ||
81 | loff_t send_off; | ||
82 | char *send_buf; | ||
83 | u32 send_size; | ||
84 | u32 send_max_size; | ||
85 | u64 total_send_size; | ||
86 | u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; | ||
87 | |||
88 | struct vfsmount *mnt; | ||
89 | |||
90 | struct btrfs_root *send_root; | ||
91 | struct btrfs_root *parent_root; | ||
92 | struct clone_root *clone_roots; | ||
93 | int clone_roots_cnt; | ||
94 | |||
95 | /* current state of the compare_tree call */ | ||
96 | struct btrfs_path *left_path; | ||
97 | struct btrfs_path *right_path; | ||
98 | struct btrfs_key *cmp_key; | ||
99 | |||
100 | /* | ||
101 | * infos of the currently processed inode. In case of deleted inodes, | ||
102 | * these are the values from the deleted inode. | ||
103 | */ | ||
104 | u64 cur_ino; | ||
105 | u64 cur_inode_gen; | ||
106 | int cur_inode_new; | ||
107 | int cur_inode_new_gen; | ||
108 | int cur_inode_deleted; | ||
109 | int cur_inode_first_ref_orphan; | ||
110 | u64 cur_inode_size; | ||
111 | u64 cur_inode_mode; | ||
112 | |||
113 | u64 send_progress; | ||
114 | |||
115 | struct list_head new_refs; | ||
116 | struct list_head deleted_refs; | ||
117 | |||
118 | struct radix_tree_root name_cache; | ||
119 | struct list_head name_cache_list; | ||
120 | int name_cache_size; | ||
121 | |||
122 | struct file *cur_inode_filp; | ||
123 | char *read_buf; | ||
124 | }; | ||
125 | |||
126 | struct name_cache_entry { | ||
127 | struct list_head list; | ||
128 | struct list_head use_list; | ||
129 | u64 ino; | ||
130 | u64 gen; | ||
131 | u64 parent_ino; | ||
132 | u64 parent_gen; | ||
133 | int ret; | ||
134 | int need_later_update; | ||
135 | int name_len; | ||
136 | char name[]; | ||
137 | }; | ||
138 | |||
139 | static void fs_path_reset(struct fs_path *p) | ||
140 | { | ||
141 | if (p->reversed) { | ||
142 | p->start = p->buf + p->buf_len - 1; | ||
143 | p->end = p->start; | ||
144 | *p->start = 0; | ||
145 | } else { | ||
146 | p->start = p->buf; | ||
147 | p->end = p->start; | ||
148 | *p->start = 0; | ||
149 | } | ||
150 | } | ||
151 | |||
152 | static struct fs_path *fs_path_alloc(struct send_ctx *sctx) | ||
153 | { | ||
154 | struct fs_path *p; | ||
155 | |||
156 | p = kmalloc(sizeof(*p), GFP_NOFS); | ||
157 | if (!p) | ||
158 | return NULL; | ||
159 | p->reversed = 0; | ||
160 | p->virtual_mem = 0; | ||
161 | p->buf = p->inline_buf; | ||
162 | p->buf_len = FS_PATH_INLINE_SIZE; | ||
163 | fs_path_reset(p); | ||
164 | return p; | ||
165 | } | ||
166 | |||
167 | static struct fs_path *fs_path_alloc_reversed(struct send_ctx *sctx) | ||
168 | { | ||
169 | struct fs_path *p; | ||
170 | |||
171 | p = fs_path_alloc(sctx); | ||
172 | if (!p) | ||
173 | return NULL; | ||
174 | p->reversed = 1; | ||
175 | fs_path_reset(p); | ||
176 | return p; | ||
177 | } | ||
178 | |||
179 | static void fs_path_free(struct send_ctx *sctx, struct fs_path *p) | ||
180 | { | ||
181 | if (!p) | ||
182 | return; | ||
183 | if (p->buf != p->inline_buf) { | ||
184 | if (p->virtual_mem) | ||
185 | vfree(p->buf); | ||
186 | else | ||
187 | kfree(p->buf); | ||
188 | } | ||
189 | kfree(p); | ||
190 | } | ||
191 | |||
192 | static int fs_path_len(struct fs_path *p) | ||
193 | { | ||
194 | return p->end - p->start; | ||
195 | } | ||
196 | |||
197 | static int fs_path_ensure_buf(struct fs_path *p, int len) | ||
198 | { | ||
199 | char *tmp_buf; | ||
200 | int path_len; | ||
201 | int old_buf_len; | ||
202 | |||
203 | len++; | ||
204 | |||
205 | if (p->buf_len >= len) | ||
206 | return 0; | ||
207 | |||
208 | path_len = p->end - p->start; | ||
209 | old_buf_len = p->buf_len; | ||
210 | len = PAGE_ALIGN(len); | ||
211 | |||
212 | if (p->buf == p->inline_buf) { | ||
213 | tmp_buf = kmalloc(len, GFP_NOFS); | ||
214 | if (!tmp_buf) { | ||
215 | tmp_buf = vmalloc(len); | ||
216 | if (!tmp_buf) | ||
217 | return -ENOMEM; | ||
218 | p->virtual_mem = 1; | ||
219 | } | ||
220 | memcpy(tmp_buf, p->buf, p->buf_len); | ||
221 | p->buf = tmp_buf; | ||
222 | p->buf_len = len; | ||
223 | } else { | ||
224 | if (p->virtual_mem) { | ||
225 | tmp_buf = vmalloc(len); | ||
226 | if (!tmp_buf) | ||
227 | return -ENOMEM; | ||
228 | memcpy(tmp_buf, p->buf, p->buf_len); | ||
229 | vfree(p->buf); | ||
230 | } else { | ||
231 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); | ||
232 | if (!tmp_buf) { | ||
233 | tmp_buf = vmalloc(len); | ||
234 | if (!tmp_buf) | ||
235 | return -ENOMEM; | ||
236 | memcpy(tmp_buf, p->buf, p->buf_len); | ||
237 | kfree(p->buf); | ||
238 | p->virtual_mem = 1; | ||
239 | } | ||
240 | } | ||
241 | p->buf = tmp_buf; | ||
242 | p->buf_len = len; | ||
243 | } | ||
244 | if (p->reversed) { | ||
245 | tmp_buf = p->buf + old_buf_len - path_len - 1; | ||
246 | p->end = p->buf + p->buf_len - 1; | ||
247 | p->start = p->end - path_len; | ||
248 | memmove(p->start, tmp_buf, path_len + 1); | ||
249 | } else { | ||
250 | p->start = p->buf; | ||
251 | p->end = p->start + path_len; | ||
252 | } | ||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | static int fs_path_prepare_for_add(struct fs_path *p, int name_len) | ||
257 | { | ||
258 | int ret; | ||
259 | int new_len; | ||
260 | |||
261 | new_len = p->end - p->start + name_len; | ||
262 | if (p->start != p->end) | ||
263 | new_len++; | ||
264 | ret = fs_path_ensure_buf(p, new_len); | ||
265 | if (ret < 0) | ||
266 | goto out; | ||
267 | |||
268 | if (p->reversed) { | ||
269 | if (p->start != p->end) | ||
270 | *--p->start = '/'; | ||
271 | p->start -= name_len; | ||
272 | p->prepared = p->start; | ||
273 | } else { | ||
274 | if (p->start != p->end) | ||
275 | *p->end++ = '/'; | ||
276 | p->prepared = p->end; | ||
277 | p->end += name_len; | ||
278 | *p->end = 0; | ||
279 | } | ||
280 | |||
281 | out: | ||
282 | return ret; | ||
283 | } | ||
284 | |||
285 | static int fs_path_add(struct fs_path *p, const char *name, int name_len) | ||
286 | { | ||
287 | int ret; | ||
288 | |||
289 | ret = fs_path_prepare_for_add(p, name_len); | ||
290 | if (ret < 0) | ||
291 | goto out; | ||
292 | memcpy(p->prepared, name, name_len); | ||
293 | p->prepared = NULL; | ||
294 | |||
295 | out: | ||
296 | return ret; | ||
297 | } | ||
298 | |||
299 | static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) | ||
300 | { | ||
301 | int ret; | ||
302 | |||
303 | ret = fs_path_prepare_for_add(p, p2->end - p2->start); | ||
304 | if (ret < 0) | ||
305 | goto out; | ||
306 | memcpy(p->prepared, p2->start, p2->end - p2->start); | ||
307 | p->prepared = NULL; | ||
308 | |||
309 | out: | ||
310 | return ret; | ||
311 | } | ||
312 | |||
313 | static int fs_path_add_from_extent_buffer(struct fs_path *p, | ||
314 | struct extent_buffer *eb, | ||
315 | unsigned long off, int len) | ||
316 | { | ||
317 | int ret; | ||
318 | |||
319 | ret = fs_path_prepare_for_add(p, len); | ||
320 | if (ret < 0) | ||
321 | goto out; | ||
322 | |||
323 | read_extent_buffer(eb, p->prepared, off, len); | ||
324 | p->prepared = NULL; | ||
325 | |||
326 | out: | ||
327 | return ret; | ||
328 | } | ||
329 | |||
330 | static void fs_path_remove(struct fs_path *p) | ||
331 | { | ||
332 | BUG_ON(p->reversed); | ||
333 | while (p->start != p->end && *p->end != '/') | ||
334 | p->end--; | ||
335 | *p->end = 0; | ||
336 | } | ||
337 | |||
338 | static int fs_path_copy(struct fs_path *p, struct fs_path *from) | ||
339 | { | ||
340 | int ret; | ||
341 | |||
342 | p->reversed = from->reversed; | ||
343 | fs_path_reset(p); | ||
344 | |||
345 | ret = fs_path_add_path(p, from); | ||
346 | |||
347 | return ret; | ||
348 | } | ||
349 | |||
350 | |||
351 | static void fs_path_unreverse(struct fs_path *p) | ||
352 | { | ||
353 | char *tmp; | ||
354 | int len; | ||
355 | |||
356 | if (!p->reversed) | ||
357 | return; | ||
358 | |||
359 | tmp = p->start; | ||
360 | len = p->end - p->start; | ||
361 | p->start = p->buf; | ||
362 | p->end = p->start + len; | ||
363 | memmove(p->start, tmp, len + 1); | ||
364 | p->reversed = 0; | ||
365 | } | ||
366 | |||
367 | static struct btrfs_path *alloc_path_for_send(void) | ||
368 | { | ||
369 | struct btrfs_path *path; | ||
370 | |||
371 | path = btrfs_alloc_path(); | ||
372 | if (!path) | ||
373 | return NULL; | ||
374 | path->search_commit_root = 1; | ||
375 | path->skip_locking = 1; | ||
376 | return path; | ||
377 | } | ||
378 | |||
379 | static int write_buf(struct send_ctx *sctx, const void *buf, u32 len) | ||
380 | { | ||
381 | int ret; | ||
382 | mm_segment_t old_fs; | ||
383 | u32 pos = 0; | ||
384 | |||
385 | old_fs = get_fs(); | ||
386 | set_fs(KERNEL_DS); | ||
387 | |||
388 | while (pos < len) { | ||
389 | ret = vfs_write(sctx->send_filp, (char *)buf + pos, len - pos, | ||
390 | &sctx->send_off); | ||
391 | /* TODO handle that correctly */ | ||
392 | /*if (ret == -ERESTARTSYS) { | ||
393 | continue; | ||
394 | }*/ | ||
395 | if (ret < 0) | ||
396 | goto out; | ||
397 | if (ret == 0) { | ||
398 | ret = -EIO; | ||
399 | goto out; | ||
400 | } | ||
401 | pos += ret; | ||
402 | } | ||
403 | |||
404 | ret = 0; | ||
405 | |||
406 | out: | ||
407 | set_fs(old_fs); | ||
408 | return ret; | ||
409 | } | ||
410 | |||
411 | static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len) | ||
412 | { | ||
413 | struct btrfs_tlv_header *hdr; | ||
414 | int total_len = sizeof(*hdr) + len; | ||
415 | int left = sctx->send_max_size - sctx->send_size; | ||
416 | |||
417 | if (unlikely(left < total_len)) | ||
418 | return -EOVERFLOW; | ||
419 | |||
420 | hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size); | ||
421 | hdr->tlv_type = cpu_to_le16(attr); | ||
422 | hdr->tlv_len = cpu_to_le16(len); | ||
423 | memcpy(hdr + 1, data, len); | ||
424 | sctx->send_size += total_len; | ||
425 | |||
426 | return 0; | ||
427 | } | ||
428 | |||
429 | #if 0 | ||
430 | static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value) | ||
431 | { | ||
432 | return tlv_put(sctx, attr, &value, sizeof(value)); | ||
433 | } | ||
434 | |||
435 | static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value) | ||
436 | { | ||
437 | __le16 tmp = cpu_to_le16(value); | ||
438 | return tlv_put(sctx, attr, &tmp, sizeof(tmp)); | ||
439 | } | ||
440 | |||
441 | static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value) | ||
442 | { | ||
443 | __le32 tmp = cpu_to_le32(value); | ||
444 | return tlv_put(sctx, attr, &tmp, sizeof(tmp)); | ||
445 | } | ||
446 | #endif | ||
447 | |||
448 | static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value) | ||
449 | { | ||
450 | __le64 tmp = cpu_to_le64(value); | ||
451 | return tlv_put(sctx, attr, &tmp, sizeof(tmp)); | ||
452 | } | ||
453 | |||
454 | static int tlv_put_string(struct send_ctx *sctx, u16 attr, | ||
455 | const char *str, int len) | ||
456 | { | ||
457 | if (len == -1) | ||
458 | len = strlen(str); | ||
459 | return tlv_put(sctx, attr, str, len); | ||
460 | } | ||
461 | |||
462 | static int tlv_put_uuid(struct send_ctx *sctx, u16 attr, | ||
463 | const u8 *uuid) | ||
464 | { | ||
465 | return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); | ||
466 | } | ||
467 | |||
468 | #if 0 | ||
469 | static int tlv_put_timespec(struct send_ctx *sctx, u16 attr, | ||
470 | struct timespec *ts) | ||
471 | { | ||
472 | struct btrfs_timespec bts; | ||
473 | bts.sec = cpu_to_le64(ts->tv_sec); | ||
474 | bts.nsec = cpu_to_le32(ts->tv_nsec); | ||
475 | return tlv_put(sctx, attr, &bts, sizeof(bts)); | ||
476 | } | ||
477 | #endif | ||
478 | |||
479 | static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, | ||
480 | struct extent_buffer *eb, | ||
481 | struct btrfs_timespec *ts) | ||
482 | { | ||
483 | struct btrfs_timespec bts; | ||
484 | read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts)); | ||
485 | return tlv_put(sctx, attr, &bts, sizeof(bts)); | ||
486 | } | ||
487 | |||
488 | |||
489 | #define TLV_PUT(sctx, attrtype, attrlen, data) \ | ||
490 | do { \ | ||
491 | ret = tlv_put(sctx, attrtype, attrlen, data); \ | ||
492 | if (ret < 0) \ | ||
493 | goto tlv_put_failure; \ | ||
494 | } while (0) | ||
495 | |||
496 | #define TLV_PUT_INT(sctx, attrtype, bits, value) \ | ||
497 | do { \ | ||
498 | ret = tlv_put_u##bits(sctx, attrtype, value); \ | ||
499 | if (ret < 0) \ | ||
500 | goto tlv_put_failure; \ | ||
501 | } while (0) | ||
502 | |||
503 | #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data) | ||
504 | #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data) | ||
505 | #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data) | ||
506 | #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data) | ||
507 | #define TLV_PUT_STRING(sctx, attrtype, str, len) \ | ||
508 | do { \ | ||
509 | ret = tlv_put_string(sctx, attrtype, str, len); \ | ||
510 | if (ret < 0) \ | ||
511 | goto tlv_put_failure; \ | ||
512 | } while (0) | ||
513 | #define TLV_PUT_PATH(sctx, attrtype, p) \ | ||
514 | do { \ | ||
515 | ret = tlv_put_string(sctx, attrtype, p->start, \ | ||
516 | p->end - p->start); \ | ||
517 | if (ret < 0) \ | ||
518 | goto tlv_put_failure; \ | ||
519 | } while(0) | ||
520 | #define TLV_PUT_UUID(sctx, attrtype, uuid) \ | ||
521 | do { \ | ||
522 | ret = tlv_put_uuid(sctx, attrtype, uuid); \ | ||
523 | if (ret < 0) \ | ||
524 | goto tlv_put_failure; \ | ||
525 | } while (0) | ||
526 | #define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \ | ||
527 | do { \ | ||
528 | ret = tlv_put_timespec(sctx, attrtype, ts); \ | ||
529 | if (ret < 0) \ | ||
530 | goto tlv_put_failure; \ | ||
531 | } while (0) | ||
532 | #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ | ||
533 | do { \ | ||
534 | ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ | ||
535 | if (ret < 0) \ | ||
536 | goto tlv_put_failure; \ | ||
537 | } while (0) | ||
538 | |||
539 | static int send_header(struct send_ctx *sctx) | ||
540 | { | ||
541 | struct btrfs_stream_header hdr; | ||
542 | |||
543 | strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); | ||
544 | hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); | ||
545 | |||
546 | return write_buf(sctx, &hdr, sizeof(hdr)); | ||
547 | } | ||
548 | |||
549 | /* | ||
550 | * For each command/item we want to send to userspace, we call this function. | ||
551 | */ | ||
552 | static int begin_cmd(struct send_ctx *sctx, int cmd) | ||
553 | { | ||
554 | struct btrfs_cmd_header *hdr; | ||
555 | |||
556 | if (!sctx->send_buf) { | ||
557 | WARN_ON(1); | ||
558 | return -EINVAL; | ||
559 | } | ||
560 | |||
561 | BUG_ON(sctx->send_size); | ||
562 | |||
563 | sctx->send_size += sizeof(*hdr); | ||
564 | hdr = (struct btrfs_cmd_header *)sctx->send_buf; | ||
565 | hdr->cmd = cpu_to_le16(cmd); | ||
566 | |||
567 | return 0; | ||
568 | } | ||
569 | |||
570 | static int send_cmd(struct send_ctx *sctx) | ||
571 | { | ||
572 | int ret; | ||
573 | struct btrfs_cmd_header *hdr; | ||
574 | u32 crc; | ||
575 | |||
576 | hdr = (struct btrfs_cmd_header *)sctx->send_buf; | ||
577 | hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); | ||
578 | hdr->crc = 0; | ||
579 | |||
580 | crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); | ||
581 | hdr->crc = cpu_to_le32(crc); | ||
582 | |||
583 | ret = write_buf(sctx, sctx->send_buf, sctx->send_size); | ||
584 | |||
585 | sctx->total_send_size += sctx->send_size; | ||
586 | sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; | ||
587 | sctx->send_size = 0; | ||
588 | |||
589 | return ret; | ||
590 | } | ||
591 | |||
592 | /* | ||
593 | * Sends a move instruction to user space | ||
594 | */ | ||
595 | static int send_rename(struct send_ctx *sctx, | ||
596 | struct fs_path *from, struct fs_path *to) | ||
597 | { | ||
598 | int ret; | ||
599 | |||
600 | verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start); | ||
601 | |||
602 | ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME); | ||
603 | if (ret < 0) | ||
604 | goto out; | ||
605 | |||
606 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from); | ||
607 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to); | ||
608 | |||
609 | ret = send_cmd(sctx); | ||
610 | |||
611 | tlv_put_failure: | ||
612 | out: | ||
613 | return ret; | ||
614 | } | ||
615 | |||
616 | /* | ||
617 | * Sends a link instruction to user space | ||
618 | */ | ||
619 | static int send_link(struct send_ctx *sctx, | ||
620 | struct fs_path *path, struct fs_path *lnk) | ||
621 | { | ||
622 | int ret; | ||
623 | |||
624 | verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start); | ||
625 | |||
626 | ret = begin_cmd(sctx, BTRFS_SEND_C_LINK); | ||
627 | if (ret < 0) | ||
628 | goto out; | ||
629 | |||
630 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); | ||
631 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk); | ||
632 | |||
633 | ret = send_cmd(sctx); | ||
634 | |||
635 | tlv_put_failure: | ||
636 | out: | ||
637 | return ret; | ||
638 | } | ||
639 | |||
640 | /* | ||
641 | * Sends an unlink instruction to user space | ||
642 | */ | ||
643 | static int send_unlink(struct send_ctx *sctx, struct fs_path *path) | ||
644 | { | ||
645 | int ret; | ||
646 | |||
647 | verbose_printk("btrfs: send_unlink %s\n", path->start); | ||
648 | |||
649 | ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK); | ||
650 | if (ret < 0) | ||
651 | goto out; | ||
652 | |||
653 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); | ||
654 | |||
655 | ret = send_cmd(sctx); | ||
656 | |||
657 | tlv_put_failure: | ||
658 | out: | ||
659 | return ret; | ||
660 | } | ||
661 | |||
662 | /* | ||
663 | * Sends a rmdir instruction to user space | ||
664 | */ | ||
665 | static int send_rmdir(struct send_ctx *sctx, struct fs_path *path) | ||
666 | { | ||
667 | int ret; | ||
668 | |||
669 | verbose_printk("btrfs: send_rmdir %s\n", path->start); | ||
670 | |||
671 | ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR); | ||
672 | if (ret < 0) | ||
673 | goto out; | ||
674 | |||
675 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); | ||
676 | |||
677 | ret = send_cmd(sctx); | ||
678 | |||
679 | tlv_put_failure: | ||
680 | out: | ||
681 | return ret; | ||
682 | } | ||
683 | |||
684 | /* | ||
685 | * Helper function to retrieve some fields from an inode item. | ||
686 | */ | ||
687 | static int get_inode_info(struct btrfs_root *root, | ||
688 | u64 ino, u64 *size, u64 *gen, | ||
689 | u64 *mode, u64 *uid, u64 *gid) | ||
690 | { | ||
691 | int ret; | ||
692 | struct btrfs_inode_item *ii; | ||
693 | struct btrfs_key key; | ||
694 | struct btrfs_path *path; | ||
695 | |||
696 | path = alloc_path_for_send(); | ||
697 | if (!path) | ||
698 | return -ENOMEM; | ||
699 | |||
700 | key.objectid = ino; | ||
701 | key.type = BTRFS_INODE_ITEM_KEY; | ||
702 | key.offset = 0; | ||
703 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
704 | if (ret < 0) | ||
705 | goto out; | ||
706 | if (ret) { | ||
707 | ret = -ENOENT; | ||
708 | goto out; | ||
709 | } | ||
710 | |||
711 | ii = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
712 | struct btrfs_inode_item); | ||
713 | if (size) | ||
714 | *size = btrfs_inode_size(path->nodes[0], ii); | ||
715 | if (gen) | ||
716 | *gen = btrfs_inode_generation(path->nodes[0], ii); | ||
717 | if (mode) | ||
718 | *mode = btrfs_inode_mode(path->nodes[0], ii); | ||
719 | if (uid) | ||
720 | *uid = btrfs_inode_uid(path->nodes[0], ii); | ||
721 | if (gid) | ||
722 | *gid = btrfs_inode_gid(path->nodes[0], ii); | ||
723 | |||
724 | out: | ||
725 | btrfs_free_path(path); | ||
726 | return ret; | ||
727 | } | ||
728 | |||
729 | typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index, | ||
730 | struct fs_path *p, | ||
731 | void *ctx); | ||
732 | |||
733 | /* | ||
734 | * Helper function to iterate the entries in ONE btrfs_inode_ref. | ||
735 | * The iterate callback may return a non zero value to stop iteration. This can | ||
736 | * be a negative value for error codes or 1 to simply stop it. | ||
737 | * | ||
738 | * path must point to the INODE_REF when called. | ||
739 | */ | ||
740 | static int iterate_inode_ref(struct send_ctx *sctx, | ||
741 | struct btrfs_root *root, struct btrfs_path *path, | ||
742 | struct btrfs_key *found_key, int resolve, | ||
743 | iterate_inode_ref_t iterate, void *ctx) | ||
744 | { | ||
745 | struct extent_buffer *eb; | ||
746 | struct btrfs_item *item; | ||
747 | struct btrfs_inode_ref *iref; | ||
748 | struct btrfs_path *tmp_path; | ||
749 | struct fs_path *p; | ||
750 | u32 cur; | ||
751 | u32 len; | ||
752 | u32 total; | ||
753 | int slot; | ||
754 | u32 name_len; | ||
755 | char *start; | ||
756 | int ret = 0; | ||
757 | int num; | ||
758 | int index; | ||
759 | |||
760 | p = fs_path_alloc_reversed(sctx); | ||
761 | if (!p) | ||
762 | return -ENOMEM; | ||
763 | |||
764 | tmp_path = alloc_path_for_send(); | ||
765 | if (!tmp_path) { | ||
766 | fs_path_free(sctx, p); | ||
767 | return -ENOMEM; | ||
768 | } | ||
769 | |||
770 | eb = path->nodes[0]; | ||
771 | slot = path->slots[0]; | ||
772 | item = btrfs_item_nr(eb, slot); | ||
773 | iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); | ||
774 | cur = 0; | ||
775 | len = 0; | ||
776 | total = btrfs_item_size(eb, item); | ||
777 | |||
778 | num = 0; | ||
779 | while (cur < total) { | ||
780 | fs_path_reset(p); | ||
781 | |||
782 | name_len = btrfs_inode_ref_name_len(eb, iref); | ||
783 | index = btrfs_inode_ref_index(eb, iref); | ||
784 | if (resolve) { | ||
785 | start = btrfs_iref_to_path(root, tmp_path, iref, eb, | ||
786 | found_key->offset, p->buf, | ||
787 | p->buf_len); | ||
788 | if (IS_ERR(start)) { | ||
789 | ret = PTR_ERR(start); | ||
790 | goto out; | ||
791 | } | ||
792 | if (start < p->buf) { | ||
793 | /* overflow , try again with larger buffer */ | ||
794 | ret = fs_path_ensure_buf(p, | ||
795 | p->buf_len + p->buf - start); | ||
796 | if (ret < 0) | ||
797 | goto out; | ||
798 | start = btrfs_iref_to_path(root, tmp_path, iref, | ||
799 | eb, found_key->offset, p->buf, | ||
800 | p->buf_len); | ||
801 | if (IS_ERR(start)) { | ||
802 | ret = PTR_ERR(start); | ||
803 | goto out; | ||
804 | } | ||
805 | BUG_ON(start < p->buf); | ||
806 | } | ||
807 | p->start = start; | ||
808 | } else { | ||
809 | ret = fs_path_add_from_extent_buffer(p, eb, | ||
810 | (unsigned long)(iref + 1), name_len); | ||
811 | if (ret < 0) | ||
812 | goto out; | ||
813 | } | ||
814 | |||
815 | |||
816 | len = sizeof(*iref) + name_len; | ||
817 | iref = (struct btrfs_inode_ref *)((char *)iref + len); | ||
818 | cur += len; | ||
819 | |||
820 | ret = iterate(num, found_key->offset, index, p, ctx); | ||
821 | if (ret) | ||
822 | goto out; | ||
823 | |||
824 | num++; | ||
825 | } | ||
826 | |||
827 | out: | ||
828 | btrfs_free_path(tmp_path); | ||
829 | fs_path_free(sctx, p); | ||
830 | return ret; | ||
831 | } | ||
832 | |||
833 | typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key, | ||
834 | const char *name, int name_len, | ||
835 | const char *data, int data_len, | ||
836 | u8 type, void *ctx); | ||
837 | |||
838 | /* | ||
839 | * Helper function to iterate the entries in ONE btrfs_dir_item. | ||
840 | * The iterate callback may return a non zero value to stop iteration. This can | ||
841 | * be a negative value for error codes or 1 to simply stop it. | ||
842 | * | ||
843 | * path must point to the dir item when called. | ||
844 | */ | ||
845 | static int iterate_dir_item(struct send_ctx *sctx, | ||
846 | struct btrfs_root *root, struct btrfs_path *path, | ||
847 | struct btrfs_key *found_key, | ||
848 | iterate_dir_item_t iterate, void *ctx) | ||
849 | { | ||
850 | int ret = 0; | ||
851 | struct extent_buffer *eb; | ||
852 | struct btrfs_item *item; | ||
853 | struct btrfs_dir_item *di; | ||
854 | struct btrfs_path *tmp_path = NULL; | ||
855 | struct btrfs_key di_key; | ||
856 | char *buf = NULL; | ||
857 | char *buf2 = NULL; | ||
858 | int buf_len; | ||
859 | int buf_virtual = 0; | ||
860 | u32 name_len; | ||
861 | u32 data_len; | ||
862 | u32 cur; | ||
863 | u32 len; | ||
864 | u32 total; | ||
865 | int slot; | ||
866 | int num; | ||
867 | u8 type; | ||
868 | |||
869 | buf_len = PAGE_SIZE; | ||
870 | buf = kmalloc(buf_len, GFP_NOFS); | ||
871 | if (!buf) { | ||
872 | ret = -ENOMEM; | ||
873 | goto out; | ||
874 | } | ||
875 | |||
876 | tmp_path = alloc_path_for_send(); | ||
877 | if (!tmp_path) { | ||
878 | ret = -ENOMEM; | ||
879 | goto out; | ||
880 | } | ||
881 | |||
882 | eb = path->nodes[0]; | ||
883 | slot = path->slots[0]; | ||
884 | item = btrfs_item_nr(eb, slot); | ||
885 | di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); | ||
886 | cur = 0; | ||
887 | len = 0; | ||
888 | total = btrfs_item_size(eb, item); | ||
889 | |||
890 | num = 0; | ||
891 | while (cur < total) { | ||
892 | name_len = btrfs_dir_name_len(eb, di); | ||
893 | data_len = btrfs_dir_data_len(eb, di); | ||
894 | type = btrfs_dir_type(eb, di); | ||
895 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); | ||
896 | |||
897 | if (name_len + data_len > buf_len) { | ||
898 | buf_len = PAGE_ALIGN(name_len + data_len); | ||
899 | if (buf_virtual) { | ||
900 | buf2 = vmalloc(buf_len); | ||
901 | if (!buf2) { | ||
902 | ret = -ENOMEM; | ||
903 | goto out; | ||
904 | } | ||
905 | vfree(buf); | ||
906 | } else { | ||
907 | buf2 = krealloc(buf, buf_len, GFP_NOFS); | ||
908 | if (!buf2) { | ||
909 | buf2 = vmalloc(buf_len); | ||
910 | if (!buf2) { | ||
911 | ret = -ENOMEM; | ||
912 | goto out; | ||
913 | } | ||
914 | kfree(buf); | ||
915 | buf_virtual = 1; | ||
916 | } | ||
917 | } | ||
918 | |||
919 | buf = buf2; | ||
920 | buf2 = NULL; | ||
921 | } | ||
922 | |||
923 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), | ||
924 | name_len + data_len); | ||
925 | |||
926 | len = sizeof(*di) + name_len + data_len; | ||
927 | di = (struct btrfs_dir_item *)((char *)di + len); | ||
928 | cur += len; | ||
929 | |||
930 | ret = iterate(num, &di_key, buf, name_len, buf + name_len, | ||
931 | data_len, type, ctx); | ||
932 | if (ret < 0) | ||
933 | goto out; | ||
934 | if (ret) { | ||
935 | ret = 0; | ||
936 | goto out; | ||
937 | } | ||
938 | |||
939 | num++; | ||
940 | } | ||
941 | |||
942 | out: | ||
943 | btrfs_free_path(tmp_path); | ||
944 | if (buf_virtual) | ||
945 | vfree(buf); | ||
946 | else | ||
947 | kfree(buf); | ||
948 | return ret; | ||
949 | } | ||
950 | |||
951 | static int __copy_first_ref(int num, u64 dir, int index, | ||
952 | struct fs_path *p, void *ctx) | ||
953 | { | ||
954 | int ret; | ||
955 | struct fs_path *pt = ctx; | ||
956 | |||
957 | ret = fs_path_copy(pt, p); | ||
958 | if (ret < 0) | ||
959 | return ret; | ||
960 | |||
961 | /* we want the first only */ | ||
962 | return 1; | ||
963 | } | ||
964 | |||
965 | /* | ||
966 | * Retrieve the first path of an inode. If an inode has more then one | ||
967 | * ref/hardlink, this is ignored. | ||
968 | */ | ||
969 | static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root, | ||
970 | u64 ino, struct fs_path *path) | ||
971 | { | ||
972 | int ret; | ||
973 | struct btrfs_key key, found_key; | ||
974 | struct btrfs_path *p; | ||
975 | |||
976 | p = alloc_path_for_send(); | ||
977 | if (!p) | ||
978 | return -ENOMEM; | ||
979 | |||
980 | fs_path_reset(path); | ||
981 | |||
982 | key.objectid = ino; | ||
983 | key.type = BTRFS_INODE_REF_KEY; | ||
984 | key.offset = 0; | ||
985 | |||
986 | ret = btrfs_search_slot_for_read(root, &key, p, 1, 0); | ||
987 | if (ret < 0) | ||
988 | goto out; | ||
989 | if (ret) { | ||
990 | ret = 1; | ||
991 | goto out; | ||
992 | } | ||
993 | btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); | ||
994 | if (found_key.objectid != ino || | ||
995 | found_key.type != BTRFS_INODE_REF_KEY) { | ||
996 | ret = -ENOENT; | ||
997 | goto out; | ||
998 | } | ||
999 | |||
1000 | ret = iterate_inode_ref(sctx, root, p, &found_key, 1, | ||
1001 | __copy_first_ref, path); | ||
1002 | if (ret < 0) | ||
1003 | goto out; | ||
1004 | ret = 0; | ||
1005 | |||
1006 | out: | ||
1007 | btrfs_free_path(p); | ||
1008 | return ret; | ||
1009 | } | ||
1010 | |||
1011 | struct backref_ctx { | ||
1012 | struct send_ctx *sctx; | ||
1013 | |||
1014 | /* number of total found references */ | ||
1015 | u64 found; | ||
1016 | |||
1017 | /* | ||
1018 | * used for clones found in send_root. clones found behind cur_objectid | ||
1019 | * and cur_offset are not considered as allowed clones. | ||
1020 | */ | ||
1021 | u64 cur_objectid; | ||
1022 | u64 cur_offset; | ||
1023 | |||
1024 | /* may be truncated in case it's the last extent in a file */ | ||
1025 | u64 extent_len; | ||
1026 | |||
1027 | /* Just to check for bugs in backref resolving */ | ||
1028 | int found_in_send_root; | ||
1029 | }; | ||
1030 | |||
1031 | static int __clone_root_cmp_bsearch(const void *key, const void *elt) | ||
1032 | { | ||
1033 | u64 root = (u64)key; | ||
1034 | struct clone_root *cr = (struct clone_root *)elt; | ||
1035 | |||
1036 | if (root < cr->root->objectid) | ||
1037 | return -1; | ||
1038 | if (root > cr->root->objectid) | ||
1039 | return 1; | ||
1040 | return 0; | ||
1041 | } | ||
1042 | |||
1043 | static int __clone_root_cmp_sort(const void *e1, const void *e2) | ||
1044 | { | ||
1045 | struct clone_root *cr1 = (struct clone_root *)e1; | ||
1046 | struct clone_root *cr2 = (struct clone_root *)e2; | ||
1047 | |||
1048 | if (cr1->root->objectid < cr2->root->objectid) | ||
1049 | return -1; | ||
1050 | if (cr1->root->objectid > cr2->root->objectid) | ||
1051 | return 1; | ||
1052 | return 0; | ||
1053 | } | ||
1054 | |||
1055 | /* | ||
1056 | * Called for every backref that is found for the current extent. | ||
1057 | */ | ||
1058 | static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | ||
1059 | { | ||
1060 | struct backref_ctx *bctx = ctx_; | ||
1061 | struct clone_root *found; | ||
1062 | int ret; | ||
1063 | u64 i_size; | ||
1064 | |||
1065 | /* First check if the root is in the list of accepted clone sources */ | ||
1066 | found = bsearch((void *)root, bctx->sctx->clone_roots, | ||
1067 | bctx->sctx->clone_roots_cnt, | ||
1068 | sizeof(struct clone_root), | ||
1069 | __clone_root_cmp_bsearch); | ||
1070 | if (!found) | ||
1071 | return 0; | ||
1072 | |||
1073 | if (found->root == bctx->sctx->send_root && | ||
1074 | ino == bctx->cur_objectid && | ||
1075 | offset == bctx->cur_offset) { | ||
1076 | bctx->found_in_send_root = 1; | ||
1077 | } | ||
1078 | |||
1079 | /* | ||
1080 | * There are inodes that have extents that lie behind it's i_size. Don't | ||
1081 | * accept clones from these extents. | ||
1082 | */ | ||
1083 | ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL); | ||
1084 | if (ret < 0) | ||
1085 | return ret; | ||
1086 | |||
1087 | if (offset + bctx->extent_len > i_size) | ||
1088 | return 0; | ||
1089 | |||
1090 | /* | ||
1091 | * Make sure we don't consider clones from send_root that are | ||
1092 | * behind the current inode/offset. | ||
1093 | */ | ||
1094 | if (found->root == bctx->sctx->send_root) { | ||
1095 | /* | ||
1096 | * TODO for the moment we don't accept clones from the inode | ||
1097 | * that is currently send. We may change this when | ||
1098 | * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same | ||
1099 | * file. | ||
1100 | */ | ||
1101 | if (ino >= bctx->cur_objectid) | ||
1102 | return 0; | ||
1103 | /*if (ino > ctx->cur_objectid) | ||
1104 | return 0; | ||
1105 | if (offset + ctx->extent_len > ctx->cur_offset) | ||
1106 | return 0;*/ | ||
1107 | |||
1108 | bctx->found++; | ||
1109 | found->found_refs++; | ||
1110 | found->ino = ino; | ||
1111 | found->offset = offset; | ||
1112 | return 0; | ||
1113 | } | ||
1114 | |||
1115 | bctx->found++; | ||
1116 | found->found_refs++; | ||
1117 | if (ino < found->ino) { | ||
1118 | found->ino = ino; | ||
1119 | found->offset = offset; | ||
1120 | } else if (found->ino == ino) { | ||
1121 | /* | ||
1122 | * same extent found more then once in the same file. | ||
1123 | */ | ||
1124 | if (found->offset > offset + bctx->extent_len) | ||
1125 | found->offset = offset; | ||
1126 | } | ||
1127 | |||
1128 | return 0; | ||
1129 | } | ||
1130 | |||
1131 | /* | ||
1132 | * path must point to the extent item when called. | ||
1133 | */ | ||
1134 | static int find_extent_clone(struct send_ctx *sctx, | ||
1135 | struct btrfs_path *path, | ||
1136 | u64 ino, u64 data_offset, | ||
1137 | u64 ino_size, | ||
1138 | struct clone_root **found) | ||
1139 | { | ||
1140 | int ret; | ||
1141 | int extent_type; | ||
1142 | u64 logical; | ||
1143 | u64 num_bytes; | ||
1144 | u64 extent_item_pos; | ||
1145 | struct btrfs_file_extent_item *fi; | ||
1146 | struct extent_buffer *eb = path->nodes[0]; | ||
1147 | struct backref_ctx backref_ctx; | ||
1148 | struct clone_root *cur_clone_root; | ||
1149 | struct btrfs_key found_key; | ||
1150 | struct btrfs_path *tmp_path; | ||
1151 | u32 i; | ||
1152 | |||
1153 | tmp_path = alloc_path_for_send(); | ||
1154 | if (!tmp_path) | ||
1155 | return -ENOMEM; | ||
1156 | |||
1157 | if (data_offset >= ino_size) { | ||
1158 | /* | ||
1159 | * There may be extents that lie behind the file's size. | ||
1160 | * I at least had this in combination with snapshotting while | ||
1161 | * writing large files. | ||
1162 | */ | ||
1163 | ret = 0; | ||
1164 | goto out; | ||
1165 | } | ||
1166 | |||
1167 | fi = btrfs_item_ptr(eb, path->slots[0], | ||
1168 | struct btrfs_file_extent_item); | ||
1169 | extent_type = btrfs_file_extent_type(eb, fi); | ||
1170 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | ||
1171 | ret = -ENOENT; | ||
1172 | goto out; | ||
1173 | } | ||
1174 | |||
1175 | num_bytes = btrfs_file_extent_num_bytes(eb, fi); | ||
1176 | logical = btrfs_file_extent_disk_bytenr(eb, fi); | ||
1177 | if (logical == 0) { | ||
1178 | ret = -ENOENT; | ||
1179 | goto out; | ||
1180 | } | ||
1181 | logical += btrfs_file_extent_offset(eb, fi); | ||
1182 | |||
1183 | ret = extent_from_logical(sctx->send_root->fs_info, | ||
1184 | logical, tmp_path, &found_key); | ||
1185 | btrfs_release_path(tmp_path); | ||
1186 | |||
1187 | if (ret < 0) | ||
1188 | goto out; | ||
1189 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
1190 | ret = -EIO; | ||
1191 | goto out; | ||
1192 | } | ||
1193 | |||
1194 | /* | ||
1195 | * Setup the clone roots. | ||
1196 | */ | ||
1197 | for (i = 0; i < sctx->clone_roots_cnt; i++) { | ||
1198 | cur_clone_root = sctx->clone_roots + i; | ||
1199 | cur_clone_root->ino = (u64)-1; | ||
1200 | cur_clone_root->offset = 0; | ||
1201 | cur_clone_root->found_refs = 0; | ||
1202 | } | ||
1203 | |||
1204 | backref_ctx.sctx = sctx; | ||
1205 | backref_ctx.found = 0; | ||
1206 | backref_ctx.cur_objectid = ino; | ||
1207 | backref_ctx.cur_offset = data_offset; | ||
1208 | backref_ctx.found_in_send_root = 0; | ||
1209 | backref_ctx.extent_len = num_bytes; | ||
1210 | |||
1211 | /* | ||
1212 | * The last extent of a file may be too large due to page alignment. | ||
1213 | * We need to adjust extent_len in this case so that the checks in | ||
1214 | * __iterate_backrefs work. | ||
1215 | */ | ||
1216 | if (data_offset + num_bytes >= ino_size) | ||
1217 | backref_ctx.extent_len = ino_size - data_offset; | ||
1218 | |||
1219 | /* | ||
1220 | * Now collect all backrefs. | ||
1221 | */ | ||
1222 | extent_item_pos = logical - found_key.objectid; | ||
1223 | ret = iterate_extent_inodes(sctx->send_root->fs_info, | ||
1224 | found_key.objectid, extent_item_pos, 1, | ||
1225 | __iterate_backrefs, &backref_ctx); | ||
1226 | if (ret < 0) | ||
1227 | goto out; | ||
1228 | |||
1229 | if (!backref_ctx.found_in_send_root) { | ||
1230 | /* found a bug in backref code? */ | ||
1231 | ret = -EIO; | ||
1232 | printk(KERN_ERR "btrfs: ERROR did not find backref in " | ||
1233 | "send_root. inode=%llu, offset=%llu, " | ||
1234 | "logical=%llu\n", | ||
1235 | ino, data_offset, logical); | ||
1236 | goto out; | ||
1237 | } | ||
1238 | |||
1239 | verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " | ||
1240 | "ino=%llu, " | ||
1241 | "num_bytes=%llu, logical=%llu\n", | ||
1242 | data_offset, ino, num_bytes, logical); | ||
1243 | |||
1244 | if (!backref_ctx.found) | ||
1245 | verbose_printk("btrfs: no clones found\n"); | ||
1246 | |||
1247 | cur_clone_root = NULL; | ||
1248 | for (i = 0; i < sctx->clone_roots_cnt; i++) { | ||
1249 | if (sctx->clone_roots[i].found_refs) { | ||
1250 | if (!cur_clone_root) | ||
1251 | cur_clone_root = sctx->clone_roots + i; | ||
1252 | else if (sctx->clone_roots[i].root == sctx->send_root) | ||
1253 | /* prefer clones from send_root over others */ | ||
1254 | cur_clone_root = sctx->clone_roots + i; | ||
1255 | break; | ||
1256 | } | ||
1257 | |||
1258 | } | ||
1259 | |||
1260 | if (cur_clone_root) { | ||
1261 | *found = cur_clone_root; | ||
1262 | ret = 0; | ||
1263 | } else { | ||
1264 | ret = -ENOENT; | ||
1265 | } | ||
1266 | |||
1267 | out: | ||
1268 | btrfs_free_path(tmp_path); | ||
1269 | return ret; | ||
1270 | } | ||
1271 | |||
1272 | static int read_symlink(struct send_ctx *sctx, | ||
1273 | struct btrfs_root *root, | ||
1274 | u64 ino, | ||
1275 | struct fs_path *dest) | ||
1276 | { | ||
1277 | int ret; | ||
1278 | struct btrfs_path *path; | ||
1279 | struct btrfs_key key; | ||
1280 | struct btrfs_file_extent_item *ei; | ||
1281 | u8 type; | ||
1282 | u8 compression; | ||
1283 | unsigned long off; | ||
1284 | int len; | ||
1285 | |||
1286 | path = alloc_path_for_send(); | ||
1287 | if (!path) | ||
1288 | return -ENOMEM; | ||
1289 | |||
1290 | key.objectid = ino; | ||
1291 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
1292 | key.offset = 0; | ||
1293 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
1294 | if (ret < 0) | ||
1295 | goto out; | ||
1296 | BUG_ON(ret); | ||
1297 | |||
1298 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
1299 | struct btrfs_file_extent_item); | ||
1300 | type = btrfs_file_extent_type(path->nodes[0], ei); | ||
1301 | compression = btrfs_file_extent_compression(path->nodes[0], ei); | ||
1302 | BUG_ON(type != BTRFS_FILE_EXTENT_INLINE); | ||
1303 | BUG_ON(compression); | ||
1304 | |||
1305 | off = btrfs_file_extent_inline_start(ei); | ||
1306 | len = btrfs_file_extent_inline_len(path->nodes[0], ei); | ||
1307 | |||
1308 | ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); | ||
1309 | if (ret < 0) | ||
1310 | goto out; | ||
1311 | |||
1312 | out: | ||
1313 | btrfs_free_path(path); | ||
1314 | return ret; | ||
1315 | } | ||
1316 | |||
1317 | /* | ||
1318 | * Helper function to generate a file name that is unique in the root of | ||
1319 | * send_root and parent_root. This is used to generate names for orphan inodes. | ||
1320 | */ | ||
1321 | static int gen_unique_name(struct send_ctx *sctx, | ||
1322 | u64 ino, u64 gen, | ||
1323 | struct fs_path *dest) | ||
1324 | { | ||
1325 | int ret = 0; | ||
1326 | struct btrfs_path *path; | ||
1327 | struct btrfs_dir_item *di; | ||
1328 | char tmp[64]; | ||
1329 | int len; | ||
1330 | u64 idx = 0; | ||
1331 | |||
1332 | path = alloc_path_for_send(); | ||
1333 | if (!path) | ||
1334 | return -ENOMEM; | ||
1335 | |||
1336 | while (1) { | ||
1337 | len = snprintf(tmp, sizeof(tmp) - 1, "o%llu-%llu-%llu", | ||
1338 | ino, gen, idx); | ||
1339 | if (len >= sizeof(tmp)) { | ||
1340 | /* should really not happen */ | ||
1341 | ret = -EOVERFLOW; | ||
1342 | goto out; | ||
1343 | } | ||
1344 | |||
1345 | di = btrfs_lookup_dir_item(NULL, sctx->send_root, | ||
1346 | path, BTRFS_FIRST_FREE_OBJECTID, | ||
1347 | tmp, strlen(tmp), 0); | ||
1348 | btrfs_release_path(path); | ||
1349 | if (IS_ERR(di)) { | ||
1350 | ret = PTR_ERR(di); | ||
1351 | goto out; | ||
1352 | } | ||
1353 | if (di) { | ||
1354 | /* not unique, try again */ | ||
1355 | idx++; | ||
1356 | continue; | ||
1357 | } | ||
1358 | |||
1359 | if (!sctx->parent_root) { | ||
1360 | /* unique */ | ||
1361 | ret = 0; | ||
1362 | break; | ||
1363 | } | ||
1364 | |||
1365 | di = btrfs_lookup_dir_item(NULL, sctx->parent_root, | ||
1366 | path, BTRFS_FIRST_FREE_OBJECTID, | ||
1367 | tmp, strlen(tmp), 0); | ||
1368 | btrfs_release_path(path); | ||
1369 | if (IS_ERR(di)) { | ||
1370 | ret = PTR_ERR(di); | ||
1371 | goto out; | ||
1372 | } | ||
1373 | if (di) { | ||
1374 | /* not unique, try again */ | ||
1375 | idx++; | ||
1376 | continue; | ||
1377 | } | ||
1378 | /* unique */ | ||
1379 | break; | ||
1380 | } | ||
1381 | |||
1382 | ret = fs_path_add(dest, tmp, strlen(tmp)); | ||
1383 | |||
1384 | out: | ||
1385 | btrfs_free_path(path); | ||
1386 | return ret; | ||
1387 | } | ||
1388 | |||
1389 | enum inode_state { | ||
1390 | inode_state_no_change, | ||
1391 | inode_state_will_create, | ||
1392 | inode_state_did_create, | ||
1393 | inode_state_will_delete, | ||
1394 | inode_state_did_delete, | ||
1395 | }; | ||
1396 | |||
1397 | static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) | ||
1398 | { | ||
1399 | int ret; | ||
1400 | int left_ret; | ||
1401 | int right_ret; | ||
1402 | u64 left_gen; | ||
1403 | u64 right_gen; | ||
1404 | |||
1405 | ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, | ||
1406 | NULL); | ||
1407 | if (ret < 0 && ret != -ENOENT) | ||
1408 | goto out; | ||
1409 | left_ret = ret; | ||
1410 | |||
1411 | if (!sctx->parent_root) { | ||
1412 | right_ret = -ENOENT; | ||
1413 | } else { | ||
1414 | ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, | ||
1415 | NULL, NULL, NULL); | ||
1416 | if (ret < 0 && ret != -ENOENT) | ||
1417 | goto out; | ||
1418 | right_ret = ret; | ||
1419 | } | ||
1420 | |||
1421 | if (!left_ret && !right_ret) { | ||
1422 | if (left_gen == gen && right_gen == gen) | ||
1423 | ret = inode_state_no_change; | ||
1424 | else if (left_gen == gen) { | ||
1425 | if (ino < sctx->send_progress) | ||
1426 | ret = inode_state_did_create; | ||
1427 | else | ||
1428 | ret = inode_state_will_create; | ||
1429 | } else if (right_gen == gen) { | ||
1430 | if (ino < sctx->send_progress) | ||
1431 | ret = inode_state_did_delete; | ||
1432 | else | ||
1433 | ret = inode_state_will_delete; | ||
1434 | } else { | ||
1435 | ret = -ENOENT; | ||
1436 | } | ||
1437 | } else if (!left_ret) { | ||
1438 | if (left_gen == gen) { | ||
1439 | if (ino < sctx->send_progress) | ||
1440 | ret = inode_state_did_create; | ||
1441 | else | ||
1442 | ret = inode_state_will_create; | ||
1443 | } else { | ||
1444 | ret = -ENOENT; | ||
1445 | } | ||
1446 | } else if (!right_ret) { | ||
1447 | if (right_gen == gen) { | ||
1448 | if (ino < sctx->send_progress) | ||
1449 | ret = inode_state_did_delete; | ||
1450 | else | ||
1451 | ret = inode_state_will_delete; | ||
1452 | } else { | ||
1453 | ret = -ENOENT; | ||
1454 | } | ||
1455 | } else { | ||
1456 | ret = -ENOENT; | ||
1457 | } | ||
1458 | |||
1459 | out: | ||
1460 | return ret; | ||
1461 | } | ||
1462 | |||
1463 | static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen) | ||
1464 | { | ||
1465 | int ret; | ||
1466 | |||
1467 | ret = get_cur_inode_state(sctx, ino, gen); | ||
1468 | if (ret < 0) | ||
1469 | goto out; | ||
1470 | |||
1471 | if (ret == inode_state_no_change || | ||
1472 | ret == inode_state_did_create || | ||
1473 | ret == inode_state_will_delete) | ||
1474 | ret = 1; | ||
1475 | else | ||
1476 | ret = 0; | ||
1477 | |||
1478 | out: | ||
1479 | return ret; | ||
1480 | } | ||
1481 | |||
1482 | /* | ||
1483 | * Helper function to lookup a dir item in a dir. | ||
1484 | */ | ||
1485 | static int lookup_dir_item_inode(struct btrfs_root *root, | ||
1486 | u64 dir, const char *name, int name_len, | ||
1487 | u64 *found_inode, | ||
1488 | u8 *found_type) | ||
1489 | { | ||
1490 | int ret = 0; | ||
1491 | struct btrfs_dir_item *di; | ||
1492 | struct btrfs_key key; | ||
1493 | struct btrfs_path *path; | ||
1494 | |||
1495 | path = alloc_path_for_send(); | ||
1496 | if (!path) | ||
1497 | return -ENOMEM; | ||
1498 | |||
1499 | di = btrfs_lookup_dir_item(NULL, root, path, | ||
1500 | dir, name, name_len, 0); | ||
1501 | if (!di) { | ||
1502 | ret = -ENOENT; | ||
1503 | goto out; | ||
1504 | } | ||
1505 | if (IS_ERR(di)) { | ||
1506 | ret = PTR_ERR(di); | ||
1507 | goto out; | ||
1508 | } | ||
1509 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); | ||
1510 | *found_inode = key.objectid; | ||
1511 | *found_type = btrfs_dir_type(path->nodes[0], di); | ||
1512 | |||
1513 | out: | ||
1514 | btrfs_free_path(path); | ||
1515 | return ret; | ||
1516 | } | ||
1517 | |||
1518 | static int get_first_ref(struct send_ctx *sctx, | ||
1519 | struct btrfs_root *root, u64 ino, | ||
1520 | u64 *dir, u64 *dir_gen, struct fs_path *name) | ||
1521 | { | ||
1522 | int ret; | ||
1523 | struct btrfs_key key; | ||
1524 | struct btrfs_key found_key; | ||
1525 | struct btrfs_path *path; | ||
1526 | struct btrfs_inode_ref *iref; | ||
1527 | int len; | ||
1528 | |||
1529 | path = alloc_path_for_send(); | ||
1530 | if (!path) | ||
1531 | return -ENOMEM; | ||
1532 | |||
1533 | key.objectid = ino; | ||
1534 | key.type = BTRFS_INODE_REF_KEY; | ||
1535 | key.offset = 0; | ||
1536 | |||
1537 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | ||
1538 | if (ret < 0) | ||
1539 | goto out; | ||
1540 | if (!ret) | ||
1541 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
1542 | path->slots[0]); | ||
1543 | if (ret || found_key.objectid != key.objectid || | ||
1544 | found_key.type != key.type) { | ||
1545 | ret = -ENOENT; | ||
1546 | goto out; | ||
1547 | } | ||
1548 | |||
1549 | iref = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
1550 | struct btrfs_inode_ref); | ||
1551 | len = btrfs_inode_ref_name_len(path->nodes[0], iref); | ||
1552 | ret = fs_path_add_from_extent_buffer(name, path->nodes[0], | ||
1553 | (unsigned long)(iref + 1), len); | ||
1554 | if (ret < 0) | ||
1555 | goto out; | ||
1556 | btrfs_release_path(path); | ||
1557 | |||
1558 | ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL, | ||
1559 | NULL); | ||
1560 | if (ret < 0) | ||
1561 | goto out; | ||
1562 | |||
1563 | *dir = found_key.offset; | ||
1564 | |||
1565 | out: | ||
1566 | btrfs_free_path(path); | ||
1567 | return ret; | ||
1568 | } | ||
1569 | |||
1570 | static int is_first_ref(struct send_ctx *sctx, | ||
1571 | struct btrfs_root *root, | ||
1572 | u64 ino, u64 dir, | ||
1573 | const char *name, int name_len) | ||
1574 | { | ||
1575 | int ret; | ||
1576 | struct fs_path *tmp_name; | ||
1577 | u64 tmp_dir; | ||
1578 | u64 tmp_dir_gen; | ||
1579 | |||
1580 | tmp_name = fs_path_alloc(sctx); | ||
1581 | if (!tmp_name) | ||
1582 | return -ENOMEM; | ||
1583 | |||
1584 | ret = get_first_ref(sctx, root, ino, &tmp_dir, &tmp_dir_gen, tmp_name); | ||
1585 | if (ret < 0) | ||
1586 | goto out; | ||
1587 | |||
1588 | if (name_len != fs_path_len(tmp_name)) { | ||
1589 | ret = 0; | ||
1590 | goto out; | ||
1591 | } | ||
1592 | |||
1593 | ret = memcmp(tmp_name->start, name, name_len); | ||
1594 | if (ret) | ||
1595 | ret = 0; | ||
1596 | else | ||
1597 | ret = 1; | ||
1598 | |||
1599 | out: | ||
1600 | fs_path_free(sctx, tmp_name); | ||
1601 | return ret; | ||
1602 | } | ||
1603 | |||
1604 | static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, | ||
1605 | const char *name, int name_len, | ||
1606 | u64 *who_ino, u64 *who_gen) | ||
1607 | { | ||
1608 | int ret = 0; | ||
1609 | u64 other_inode = 0; | ||
1610 | u8 other_type = 0; | ||
1611 | |||
1612 | if (!sctx->parent_root) | ||
1613 | goto out; | ||
1614 | |||
1615 | ret = is_inode_existent(sctx, dir, dir_gen); | ||
1616 | if (ret <= 0) | ||
1617 | goto out; | ||
1618 | |||
1619 | ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, | ||
1620 | &other_inode, &other_type); | ||
1621 | if (ret < 0 && ret != -ENOENT) | ||
1622 | goto out; | ||
1623 | if (ret) { | ||
1624 | ret = 0; | ||
1625 | goto out; | ||
1626 | } | ||
1627 | |||
1628 | if (other_inode > sctx->send_progress) { | ||
1629 | ret = get_inode_info(sctx->parent_root, other_inode, NULL, | ||
1630 | who_gen, NULL, NULL, NULL); | ||
1631 | if (ret < 0) | ||
1632 | goto out; | ||
1633 | |||
1634 | ret = 1; | ||
1635 | *who_ino = other_inode; | ||
1636 | } else { | ||
1637 | ret = 0; | ||
1638 | } | ||
1639 | |||
1640 | out: | ||
1641 | return ret; | ||
1642 | } | ||
1643 | |||
1644 | static int did_overwrite_ref(struct send_ctx *sctx, | ||
1645 | u64 dir, u64 dir_gen, | ||
1646 | u64 ino, u64 ino_gen, | ||
1647 | const char *name, int name_len) | ||
1648 | { | ||
1649 | int ret = 0; | ||
1650 | u64 gen; | ||
1651 | u64 ow_inode; | ||
1652 | u8 other_type; | ||
1653 | |||
1654 | if (!sctx->parent_root) | ||
1655 | goto out; | ||
1656 | |||
1657 | ret = is_inode_existent(sctx, dir, dir_gen); | ||
1658 | if (ret <= 0) | ||
1659 | goto out; | ||
1660 | |||
1661 | /* check if the ref was overwritten by another ref */ | ||
1662 | ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len, | ||
1663 | &ow_inode, &other_type); | ||
1664 | if (ret < 0 && ret != -ENOENT) | ||
1665 | goto out; | ||
1666 | if (ret) { | ||
1667 | /* was never and will never be overwritten */ | ||
1668 | ret = 0; | ||
1669 | goto out; | ||
1670 | } | ||
1671 | |||
1672 | ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, | ||
1673 | NULL); | ||
1674 | if (ret < 0) | ||
1675 | goto out; | ||
1676 | |||
1677 | if (ow_inode == ino && gen == ino_gen) { | ||
1678 | ret = 0; | ||
1679 | goto out; | ||
1680 | } | ||
1681 | |||
1682 | /* we know that it is or will be overwritten. check this now */ | ||
1683 | if (ow_inode < sctx->send_progress) | ||
1684 | ret = 1; | ||
1685 | else | ||
1686 | ret = 0; | ||
1687 | |||
1688 | out: | ||
1689 | return ret; | ||
1690 | } | ||
1691 | |||
1692 | static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) | ||
1693 | { | ||
1694 | int ret = 0; | ||
1695 | struct fs_path *name = NULL; | ||
1696 | u64 dir; | ||
1697 | u64 dir_gen; | ||
1698 | |||
1699 | if (!sctx->parent_root) | ||
1700 | goto out; | ||
1701 | |||
1702 | name = fs_path_alloc(sctx); | ||
1703 | if (!name) | ||
1704 | return -ENOMEM; | ||
1705 | |||
1706 | ret = get_first_ref(sctx, sctx->parent_root, ino, &dir, &dir_gen, name); | ||
1707 | if (ret < 0) | ||
1708 | goto out; | ||
1709 | |||
1710 | ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, | ||
1711 | name->start, fs_path_len(name)); | ||
1712 | if (ret < 0) | ||
1713 | goto out; | ||
1714 | |||
1715 | out: | ||
1716 | fs_path_free(sctx, name); | ||
1717 | return ret; | ||
1718 | } | ||
1719 | |||
1720 | static int name_cache_insert(struct send_ctx *sctx, | ||
1721 | struct name_cache_entry *nce) | ||
1722 | { | ||
1723 | int ret = 0; | ||
1724 | struct name_cache_entry **ncea; | ||
1725 | |||
1726 | ncea = radix_tree_lookup(&sctx->name_cache, nce->ino); | ||
1727 | if (ncea) { | ||
1728 | if (!ncea[0]) | ||
1729 | ncea[0] = nce; | ||
1730 | else if (!ncea[1]) | ||
1731 | ncea[1] = nce; | ||
1732 | else | ||
1733 | BUG(); | ||
1734 | } else { | ||
1735 | ncea = kmalloc(sizeof(void *) * 2, GFP_NOFS); | ||
1736 | if (!ncea) | ||
1737 | return -ENOMEM; | ||
1738 | |||
1739 | ncea[0] = nce; | ||
1740 | ncea[1] = NULL; | ||
1741 | ret = radix_tree_insert(&sctx->name_cache, nce->ino, ncea); | ||
1742 | if (ret < 0) | ||
1743 | return ret; | ||
1744 | } | ||
1745 | list_add_tail(&nce->list, &sctx->name_cache_list); | ||
1746 | sctx->name_cache_size++; | ||
1747 | |||
1748 | return ret; | ||
1749 | } | ||
1750 | |||
1751 | static void name_cache_delete(struct send_ctx *sctx, | ||
1752 | struct name_cache_entry *nce) | ||
1753 | { | ||
1754 | struct name_cache_entry **ncea; | ||
1755 | |||
1756 | ncea = radix_tree_lookup(&sctx->name_cache, nce->ino); | ||
1757 | BUG_ON(!ncea); | ||
1758 | |||
1759 | if (ncea[0] == nce) | ||
1760 | ncea[0] = NULL; | ||
1761 | else if (ncea[1] == nce) | ||
1762 | ncea[1] = NULL; | ||
1763 | else | ||
1764 | BUG(); | ||
1765 | |||
1766 | if (!ncea[0] && !ncea[1]) { | ||
1767 | radix_tree_delete(&sctx->name_cache, nce->ino); | ||
1768 | kfree(ncea); | ||
1769 | } | ||
1770 | |||
1771 | list_del(&nce->list); | ||
1772 | |||
1773 | sctx->name_cache_size--; | ||
1774 | } | ||
1775 | |||
1776 | static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, | ||
1777 | u64 ino, u64 gen) | ||
1778 | { | ||
1779 | struct name_cache_entry **ncea; | ||
1780 | |||
1781 | ncea = radix_tree_lookup(&sctx->name_cache, ino); | ||
1782 | if (!ncea) | ||
1783 | return NULL; | ||
1784 | |||
1785 | if (ncea[0] && ncea[0]->gen == gen) | ||
1786 | return ncea[0]; | ||
1787 | else if (ncea[1] && ncea[1]->gen == gen) | ||
1788 | return ncea[1]; | ||
1789 | return NULL; | ||
1790 | } | ||
1791 | |||
1792 | static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) | ||
1793 | { | ||
1794 | list_del(&nce->list); | ||
1795 | list_add_tail(&nce->list, &sctx->name_cache_list); | ||
1796 | } | ||
1797 | |||
1798 | static void name_cache_clean_unused(struct send_ctx *sctx) | ||
1799 | { | ||
1800 | struct name_cache_entry *nce; | ||
1801 | |||
1802 | if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE) | ||
1803 | return; | ||
1804 | |||
1805 | while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) { | ||
1806 | nce = list_entry(sctx->name_cache_list.next, | ||
1807 | struct name_cache_entry, list); | ||
1808 | name_cache_delete(sctx, nce); | ||
1809 | kfree(nce); | ||
1810 | } | ||
1811 | } | ||
1812 | |||
1813 | static void name_cache_free(struct send_ctx *sctx) | ||
1814 | { | ||
1815 | struct name_cache_entry *nce; | ||
1816 | struct name_cache_entry *tmp; | ||
1817 | |||
1818 | list_for_each_entry_safe(nce, tmp, &sctx->name_cache_list, list) { | ||
1819 | name_cache_delete(sctx, nce); | ||
1820 | } | ||
1821 | } | ||
1822 | |||
1823 | static int __get_cur_name_and_parent(struct send_ctx *sctx, | ||
1824 | u64 ino, u64 gen, | ||
1825 | u64 *parent_ino, | ||
1826 | u64 *parent_gen, | ||
1827 | struct fs_path *dest) | ||
1828 | { | ||
1829 | int ret; | ||
1830 | int nce_ret; | ||
1831 | struct btrfs_path *path = NULL; | ||
1832 | struct name_cache_entry *nce = NULL; | ||
1833 | |||
1834 | nce = name_cache_search(sctx, ino, gen); | ||
1835 | if (nce) { | ||
1836 | if (ino < sctx->send_progress && nce->need_later_update) { | ||
1837 | name_cache_delete(sctx, nce); | ||
1838 | kfree(nce); | ||
1839 | nce = NULL; | ||
1840 | } else { | ||
1841 | name_cache_used(sctx, nce); | ||
1842 | *parent_ino = nce->parent_ino; | ||
1843 | *parent_gen = nce->parent_gen; | ||
1844 | ret = fs_path_add(dest, nce->name, nce->name_len); | ||
1845 | if (ret < 0) | ||
1846 | goto out; | ||
1847 | ret = nce->ret; | ||
1848 | goto out; | ||
1849 | } | ||
1850 | } | ||
1851 | |||
1852 | path = alloc_path_for_send(); | ||
1853 | if (!path) | ||
1854 | return -ENOMEM; | ||
1855 | |||
1856 | ret = is_inode_existent(sctx, ino, gen); | ||
1857 | if (ret < 0) | ||
1858 | goto out; | ||
1859 | |||
1860 | if (!ret) { | ||
1861 | ret = gen_unique_name(sctx, ino, gen, dest); | ||
1862 | if (ret < 0) | ||
1863 | goto out; | ||
1864 | ret = 1; | ||
1865 | goto out_cache; | ||
1866 | } | ||
1867 | |||
1868 | if (ino < sctx->send_progress) | ||
1869 | ret = get_first_ref(sctx, sctx->send_root, ino, | ||
1870 | parent_ino, parent_gen, dest); | ||
1871 | else | ||
1872 | ret = get_first_ref(sctx, sctx->parent_root, ino, | ||
1873 | parent_ino, parent_gen, dest); | ||
1874 | if (ret < 0) | ||
1875 | goto out; | ||
1876 | |||
1877 | ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, | ||
1878 | dest->start, dest->end - dest->start); | ||
1879 | if (ret < 0) | ||
1880 | goto out; | ||
1881 | if (ret) { | ||
1882 | fs_path_reset(dest); | ||
1883 | ret = gen_unique_name(sctx, ino, gen, dest); | ||
1884 | if (ret < 0) | ||
1885 | goto out; | ||
1886 | ret = 1; | ||
1887 | } | ||
1888 | |||
1889 | out_cache: | ||
1890 | nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); | ||
1891 | if (!nce) { | ||
1892 | ret = -ENOMEM; | ||
1893 | goto out; | ||
1894 | } | ||
1895 | |||
1896 | nce->ino = ino; | ||
1897 | nce->gen = gen; | ||
1898 | nce->parent_ino = *parent_ino; | ||
1899 | nce->parent_gen = *parent_gen; | ||
1900 | nce->name_len = fs_path_len(dest); | ||
1901 | nce->ret = ret; | ||
1902 | strcpy(nce->name, dest->start); | ||
1903 | memset(&nce->use_list, 0, sizeof(nce->use_list)); | ||
1904 | |||
1905 | if (ino < sctx->send_progress) | ||
1906 | nce->need_later_update = 0; | ||
1907 | else | ||
1908 | nce->need_later_update = 1; | ||
1909 | |||
1910 | nce_ret = name_cache_insert(sctx, nce); | ||
1911 | if (nce_ret < 0) | ||
1912 | ret = nce_ret; | ||
1913 | name_cache_clean_unused(sctx); | ||
1914 | |||
1915 | out: | ||
1916 | btrfs_free_path(path); | ||
1917 | return ret; | ||
1918 | } | ||
1919 | |||
1920 | /* | ||
1921 | * Magic happens here. This function returns the first ref to an inode as it | ||
1922 | * would look like while receiving the stream at this point in time. | ||
1923 | * We walk the path up to the root. For every inode in between, we check if it | ||
1924 | * was already processed/sent. If yes, we continue with the parent as found | ||
1925 | * in send_root. If not, we continue with the parent as found in parent_root. | ||
1926 | * If we encounter an inode that was deleted at this point in time, we use the | ||
1927 | * inodes "orphan" name instead of the real name and stop. Same with new inodes | ||
1928 | * that were not created yet and overwritten inodes/refs. | ||
1929 | * | ||
1930 | * When do we have have orphan inodes: | ||
1931 | * 1. When an inode is freshly created and thus no valid refs are available yet | ||
1932 | * 2. When a directory lost all it's refs (deleted) but still has dir items | ||
1933 | * inside which were not processed yet (pending for move/delete). If anyone | ||
1934 | * tried to get the path to the dir items, it would get a path inside that | ||
1935 | * orphan directory. | ||
1936 | * 3. When an inode is moved around or gets new links, it may overwrite the ref | ||
1937 | * of an unprocessed inode. If in that case the first ref would be | ||
1938 | * overwritten, the overwritten inode gets "orphanized". Later when we | ||
1939 | * process this overwritten inode, it is restored at a new place by moving | ||
1940 | * the orphan inode. | ||
1941 | * | ||
1942 | * sctx->send_progress tells this function at which point in time receiving | ||
1943 | * would be. | ||
1944 | */ | ||
1945 | static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | ||
1946 | struct fs_path *dest) | ||
1947 | { | ||
1948 | int ret = 0; | ||
1949 | struct fs_path *name = NULL; | ||
1950 | u64 parent_inode = 0; | ||
1951 | u64 parent_gen = 0; | ||
1952 | int stop = 0; | ||
1953 | |||
1954 | name = fs_path_alloc(sctx); | ||
1955 | if (!name) { | ||
1956 | ret = -ENOMEM; | ||
1957 | goto out; | ||
1958 | } | ||
1959 | |||
1960 | dest->reversed = 1; | ||
1961 | fs_path_reset(dest); | ||
1962 | |||
1963 | while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { | ||
1964 | fs_path_reset(name); | ||
1965 | |||
1966 | ret = __get_cur_name_and_parent(sctx, ino, gen, | ||
1967 | &parent_inode, &parent_gen, name); | ||
1968 | if (ret < 0) | ||
1969 | goto out; | ||
1970 | if (ret) | ||
1971 | stop = 1; | ||
1972 | |||
1973 | ret = fs_path_add_path(dest, name); | ||
1974 | if (ret < 0) | ||
1975 | goto out; | ||
1976 | |||
1977 | ino = parent_inode; | ||
1978 | gen = parent_gen; | ||
1979 | } | ||
1980 | |||
1981 | out: | ||
1982 | fs_path_free(sctx, name); | ||
1983 | if (!ret) | ||
1984 | fs_path_unreverse(dest); | ||
1985 | return ret; | ||
1986 | } | ||
1987 | |||
1988 | /* | ||
1989 | * Called for regular files when sending extents data. Opens a struct file | ||
1990 | * to read from the file. | ||
1991 | */ | ||
1992 | static int open_cur_inode_file(struct send_ctx *sctx) | ||
1993 | { | ||
1994 | int ret = 0; | ||
1995 | struct btrfs_key key; | ||
1996 | struct path path; | ||
1997 | struct inode *inode; | ||
1998 | struct dentry *dentry; | ||
1999 | struct file *filp; | ||
2000 | int new = 0; | ||
2001 | |||
2002 | if (sctx->cur_inode_filp) | ||
2003 | goto out; | ||
2004 | |||
2005 | key.objectid = sctx->cur_ino; | ||
2006 | key.type = BTRFS_INODE_ITEM_KEY; | ||
2007 | key.offset = 0; | ||
2008 | |||
2009 | inode = btrfs_iget(sctx->send_root->fs_info->sb, &key, sctx->send_root, | ||
2010 | &new); | ||
2011 | if (IS_ERR(inode)) { | ||
2012 | ret = PTR_ERR(inode); | ||
2013 | goto out; | ||
2014 | } | ||
2015 | |||
2016 | dentry = d_obtain_alias(inode); | ||
2017 | inode = NULL; | ||
2018 | if (IS_ERR(dentry)) { | ||
2019 | ret = PTR_ERR(dentry); | ||
2020 | goto out; | ||
2021 | } | ||
2022 | |||
2023 | path.mnt = sctx->mnt; | ||
2024 | path.dentry = dentry; | ||
2025 | filp = dentry_open(&path, O_RDONLY | O_LARGEFILE, current_cred()); | ||
2026 | dput(dentry); | ||
2027 | dentry = NULL; | ||
2028 | if (IS_ERR(filp)) { | ||
2029 | ret = PTR_ERR(filp); | ||
2030 | goto out; | ||
2031 | } | ||
2032 | sctx->cur_inode_filp = filp; | ||
2033 | |||
2034 | out: | ||
2035 | /* | ||
2036 | * no xxxput required here as every vfs op | ||
2037 | * does it by itself on failure | ||
2038 | */ | ||
2039 | return ret; | ||
2040 | } | ||
2041 | |||
2042 | /* | ||
2043 | * Closes the struct file that was created in open_cur_inode_file | ||
2044 | */ | ||
2045 | static int close_cur_inode_file(struct send_ctx *sctx) | ||
2046 | { | ||
2047 | int ret = 0; | ||
2048 | |||
2049 | if (!sctx->cur_inode_filp) | ||
2050 | goto out; | ||
2051 | |||
2052 | ret = filp_close(sctx->cur_inode_filp, NULL); | ||
2053 | sctx->cur_inode_filp = NULL; | ||
2054 | |||
2055 | out: | ||
2056 | return ret; | ||
2057 | } | ||
2058 | |||
2059 | /* | ||
2060 | * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace | ||
2061 | */ | ||
2062 | static int send_subvol_begin(struct send_ctx *sctx) | ||
2063 | { | ||
2064 | int ret; | ||
2065 | struct btrfs_root *send_root = sctx->send_root; | ||
2066 | struct btrfs_root *parent_root = sctx->parent_root; | ||
2067 | struct btrfs_path *path; | ||
2068 | struct btrfs_key key; | ||
2069 | struct btrfs_root_ref *ref; | ||
2070 | struct extent_buffer *leaf; | ||
2071 | char *name = NULL; | ||
2072 | int namelen; | ||
2073 | |||
2074 | path = alloc_path_for_send(); | ||
2075 | if (!path) | ||
2076 | return -ENOMEM; | ||
2077 | |||
2078 | name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS); | ||
2079 | if (!name) { | ||
2080 | btrfs_free_path(path); | ||
2081 | return -ENOMEM; | ||
2082 | } | ||
2083 | |||
2084 | key.objectid = send_root->objectid; | ||
2085 | key.type = BTRFS_ROOT_BACKREF_KEY; | ||
2086 | key.offset = 0; | ||
2087 | |||
2088 | ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root, | ||
2089 | &key, path, 1, 0); | ||
2090 | if (ret < 0) | ||
2091 | goto out; | ||
2092 | if (ret) { | ||
2093 | ret = -ENOENT; | ||
2094 | goto out; | ||
2095 | } | ||
2096 | |||
2097 | leaf = path->nodes[0]; | ||
2098 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
2099 | if (key.type != BTRFS_ROOT_BACKREF_KEY || | ||
2100 | key.objectid != send_root->objectid) { | ||
2101 | ret = -ENOENT; | ||
2102 | goto out; | ||
2103 | } | ||
2104 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); | ||
2105 | namelen = btrfs_root_ref_name_len(leaf, ref); | ||
2106 | read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); | ||
2107 | btrfs_release_path(path); | ||
2108 | |||
2109 | if (ret < 0) | ||
2110 | goto out; | ||
2111 | |||
2112 | if (parent_root) { | ||
2113 | ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); | ||
2114 | if (ret < 0) | ||
2115 | goto out; | ||
2116 | } else { | ||
2117 | ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL); | ||
2118 | if (ret < 0) | ||
2119 | goto out; | ||
2120 | } | ||
2121 | |||
2122 | TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); | ||
2123 | TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, | ||
2124 | sctx->send_root->root_item.uuid); | ||
2125 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, | ||
2126 | sctx->send_root->root_item.ctransid); | ||
2127 | if (parent_root) { | ||
2128 | TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, | ||
2129 | sctx->parent_root->root_item.uuid); | ||
2130 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, | ||
2131 | sctx->parent_root->root_item.ctransid); | ||
2132 | } | ||
2133 | |||
2134 | ret = send_cmd(sctx); | ||
2135 | |||
2136 | tlv_put_failure: | ||
2137 | out: | ||
2138 | btrfs_free_path(path); | ||
2139 | kfree(name); | ||
2140 | return ret; | ||
2141 | } | ||
2142 | |||
2143 | static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) | ||
2144 | { | ||
2145 | int ret = 0; | ||
2146 | struct fs_path *p; | ||
2147 | |||
2148 | verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); | ||
2149 | |||
2150 | p = fs_path_alloc(sctx); | ||
2151 | if (!p) | ||
2152 | return -ENOMEM; | ||
2153 | |||
2154 | ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE); | ||
2155 | if (ret < 0) | ||
2156 | goto out; | ||
2157 | |||
2158 | ret = get_cur_path(sctx, ino, gen, p); | ||
2159 | if (ret < 0) | ||
2160 | goto out; | ||
2161 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | ||
2162 | TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size); | ||
2163 | |||
2164 | ret = send_cmd(sctx); | ||
2165 | |||
2166 | tlv_put_failure: | ||
2167 | out: | ||
2168 | fs_path_free(sctx, p); | ||
2169 | return ret; | ||
2170 | } | ||
2171 | |||
2172 | static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) | ||
2173 | { | ||
2174 | int ret = 0; | ||
2175 | struct fs_path *p; | ||
2176 | |||
2177 | verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); | ||
2178 | |||
2179 | p = fs_path_alloc(sctx); | ||
2180 | if (!p) | ||
2181 | return -ENOMEM; | ||
2182 | |||
2183 | ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD); | ||
2184 | if (ret < 0) | ||
2185 | goto out; | ||
2186 | |||
2187 | ret = get_cur_path(sctx, ino, gen, p); | ||
2188 | if (ret < 0) | ||
2189 | goto out; | ||
2190 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | ||
2191 | TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777); | ||
2192 | |||
2193 | ret = send_cmd(sctx); | ||
2194 | |||
2195 | tlv_put_failure: | ||
2196 | out: | ||
2197 | fs_path_free(sctx, p); | ||
2198 | return ret; | ||
2199 | } | ||
2200 | |||
2201 | static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) | ||
2202 | { | ||
2203 | int ret = 0; | ||
2204 | struct fs_path *p; | ||
2205 | |||
2206 | verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); | ||
2207 | |||
2208 | p = fs_path_alloc(sctx); | ||
2209 | if (!p) | ||
2210 | return -ENOMEM; | ||
2211 | |||
2212 | ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN); | ||
2213 | if (ret < 0) | ||
2214 | goto out; | ||
2215 | |||
2216 | ret = get_cur_path(sctx, ino, gen, p); | ||
2217 | if (ret < 0) | ||
2218 | goto out; | ||
2219 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | ||
2220 | TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid); | ||
2221 | TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid); | ||
2222 | |||
2223 | ret = send_cmd(sctx); | ||
2224 | |||
2225 | tlv_put_failure: | ||
2226 | out: | ||
2227 | fs_path_free(sctx, p); | ||
2228 | return ret; | ||
2229 | } | ||
2230 | |||
2231 | static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) | ||
2232 | { | ||
2233 | int ret = 0; | ||
2234 | struct fs_path *p = NULL; | ||
2235 | struct btrfs_inode_item *ii; | ||
2236 | struct btrfs_path *path = NULL; | ||
2237 | struct extent_buffer *eb; | ||
2238 | struct btrfs_key key; | ||
2239 | int slot; | ||
2240 | |||
2241 | verbose_printk("btrfs: send_utimes %llu\n", ino); | ||
2242 | |||
2243 | p = fs_path_alloc(sctx); | ||
2244 | if (!p) | ||
2245 | return -ENOMEM; | ||
2246 | |||
2247 | path = alloc_path_for_send(); | ||
2248 | if (!path) { | ||
2249 | ret = -ENOMEM; | ||
2250 | goto out; | ||
2251 | } | ||
2252 | |||
2253 | key.objectid = ino; | ||
2254 | key.type = BTRFS_INODE_ITEM_KEY; | ||
2255 | key.offset = 0; | ||
2256 | ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); | ||
2257 | if (ret < 0) | ||
2258 | goto out; | ||
2259 | |||
2260 | eb = path->nodes[0]; | ||
2261 | slot = path->slots[0]; | ||
2262 | ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); | ||
2263 | |||
2264 | ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES); | ||
2265 | if (ret < 0) | ||
2266 | goto out; | ||
2267 | |||
2268 | ret = get_cur_path(sctx, ino, gen, p); | ||
2269 | if (ret < 0) | ||
2270 | goto out; | ||
2271 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | ||
2272 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, | ||
2273 | btrfs_inode_atime(ii)); | ||
2274 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, | ||
2275 | btrfs_inode_mtime(ii)); | ||
2276 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, | ||
2277 | btrfs_inode_ctime(ii)); | ||
2278 | /* TODO otime? */ | ||
2279 | |||
2280 | ret = send_cmd(sctx); | ||
2281 | |||
2282 | tlv_put_failure: | ||
2283 | out: | ||
2284 | fs_path_free(sctx, p); | ||
2285 | btrfs_free_path(path); | ||
2286 | return ret; | ||
2287 | } | ||
2288 | |||
2289 | /* | ||
2290 | * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have | ||
2291 | * a valid path yet because we did not process the refs yet. So, the inode | ||
2292 | * is created as orphan. | ||
2293 | */ | ||
2294 | static int send_create_inode(struct send_ctx *sctx, struct btrfs_path *path, | ||
2295 | struct btrfs_key *key) | ||
2296 | { | ||
2297 | int ret = 0; | ||
2298 | struct extent_buffer *eb = path->nodes[0]; | ||
2299 | struct btrfs_inode_item *ii; | ||
2300 | struct fs_path *p; | ||
2301 | int slot = path->slots[0]; | ||
2302 | int cmd; | ||
2303 | u64 mode; | ||
2304 | |||
2305 | verbose_printk("btrfs: send_create_inode %llu\n", sctx->cur_ino); | ||
2306 | |||
2307 | p = fs_path_alloc(sctx); | ||
2308 | if (!p) | ||
2309 | return -ENOMEM; | ||
2310 | |||
2311 | ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); | ||
2312 | mode = btrfs_inode_mode(eb, ii); | ||
2313 | |||
2314 | if (S_ISREG(mode)) | ||
2315 | cmd = BTRFS_SEND_C_MKFILE; | ||
2316 | else if (S_ISDIR(mode)) | ||
2317 | cmd = BTRFS_SEND_C_MKDIR; | ||
2318 | else if (S_ISLNK(mode)) | ||
2319 | cmd = BTRFS_SEND_C_SYMLINK; | ||
2320 | else if (S_ISCHR(mode) || S_ISBLK(mode)) | ||
2321 | cmd = BTRFS_SEND_C_MKNOD; | ||
2322 | else if (S_ISFIFO(mode)) | ||
2323 | cmd = BTRFS_SEND_C_MKFIFO; | ||
2324 | else if (S_ISSOCK(mode)) | ||
2325 | cmd = BTRFS_SEND_C_MKSOCK; | ||
2326 | else { | ||
2327 | printk(KERN_WARNING "btrfs: unexpected inode type %o", | ||
2328 | (int)(mode & S_IFMT)); | ||
2329 | ret = -ENOTSUPP; | ||
2330 | goto out; | ||
2331 | } | ||
2332 | |||
2333 | ret = begin_cmd(sctx, cmd); | ||
2334 | if (ret < 0) | ||
2335 | goto out; | ||
2336 | |||
2337 | ret = gen_unique_name(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); | ||
2338 | if (ret < 0) | ||
2339 | goto out; | ||
2340 | |||
2341 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | ||
2342 | TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, sctx->cur_ino); | ||
2343 | |||
2344 | if (S_ISLNK(mode)) { | ||
2345 | fs_path_reset(p); | ||
2346 | ret = read_symlink(sctx, sctx->send_root, sctx->cur_ino, p); | ||
2347 | if (ret < 0) | ||
2348 | goto out; | ||
2349 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); | ||
2350 | } else if (S_ISCHR(mode) || S_ISBLK(mode) || | ||
2351 | S_ISFIFO(mode) || S_ISSOCK(mode)) { | ||
2352 | TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, btrfs_inode_rdev(eb, ii)); | ||
2353 | } | ||
2354 | |||
2355 | ret = send_cmd(sctx); | ||
2356 | if (ret < 0) | ||
2357 | goto out; | ||
2358 | |||
2359 | |||
2360 | tlv_put_failure: | ||
2361 | out: | ||
2362 | fs_path_free(sctx, p); | ||
2363 | return ret; | ||
2364 | } | ||
2365 | |||
2366 | struct recorded_ref { | ||
2367 | struct list_head list; | ||
2368 | char *dir_path; | ||
2369 | char *name; | ||
2370 | struct fs_path *full_path; | ||
2371 | u64 dir; | ||
2372 | u64 dir_gen; | ||
2373 | int dir_path_len; | ||
2374 | int name_len; | ||
2375 | }; | ||
2376 | |||
2377 | /* | ||
2378 | * We need to process new refs before deleted refs, but compare_tree gives us | ||
2379 | * everything mixed. So we first record all refs and later process them. | ||
2380 | * This function is a helper to record one ref. | ||
2381 | */ | ||
2382 | static int record_ref(struct list_head *head, u64 dir, | ||
2383 | u64 dir_gen, struct fs_path *path) | ||
2384 | { | ||
2385 | struct recorded_ref *ref; | ||
2386 | char *tmp; | ||
2387 | |||
2388 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | ||
2389 | if (!ref) | ||
2390 | return -ENOMEM; | ||
2391 | |||
2392 | ref->dir = dir; | ||
2393 | ref->dir_gen = dir_gen; | ||
2394 | ref->full_path = path; | ||
2395 | |||
2396 | tmp = strrchr(ref->full_path->start, '/'); | ||
2397 | if (!tmp) { | ||
2398 | ref->name_len = ref->full_path->end - ref->full_path->start; | ||
2399 | ref->name = ref->full_path->start; | ||
2400 | ref->dir_path_len = 0; | ||
2401 | ref->dir_path = ref->full_path->start; | ||
2402 | } else { | ||
2403 | tmp++; | ||
2404 | ref->name_len = ref->full_path->end - tmp; | ||
2405 | ref->name = tmp; | ||
2406 | ref->dir_path = ref->full_path->start; | ||
2407 | ref->dir_path_len = ref->full_path->end - | ||
2408 | ref->full_path->start - 1 - ref->name_len; | ||
2409 | } | ||
2410 | |||
2411 | list_add_tail(&ref->list, head); | ||
2412 | return 0; | ||
2413 | } | ||
2414 | |||
2415 | static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head) | ||
2416 | { | ||
2417 | struct recorded_ref *cur; | ||
2418 | struct recorded_ref *tmp; | ||
2419 | |||
2420 | list_for_each_entry_safe(cur, tmp, head, list) { | ||
2421 | fs_path_free(sctx, cur->full_path); | ||
2422 | kfree(cur); | ||
2423 | } | ||
2424 | INIT_LIST_HEAD(head); | ||
2425 | } | ||
2426 | |||
2427 | static void free_recorded_refs(struct send_ctx *sctx) | ||
2428 | { | ||
2429 | __free_recorded_refs(sctx, &sctx->new_refs); | ||
2430 | __free_recorded_refs(sctx, &sctx->deleted_refs); | ||
2431 | } | ||
2432 | |||
2433 | /* | ||
2434 | * Renames/moves a file/dir to it's orphan name. Used when the first | ||
2435 | * ref of an unprocessed inode gets overwritten and for all non empty | ||
2436 | * directories. | ||
2437 | */ | ||
2438 | static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, | ||
2439 | struct fs_path *path) | ||
2440 | { | ||
2441 | int ret; | ||
2442 | struct fs_path *orphan; | ||
2443 | |||
2444 | orphan = fs_path_alloc(sctx); | ||
2445 | if (!orphan) | ||
2446 | return -ENOMEM; | ||
2447 | |||
2448 | ret = gen_unique_name(sctx, ino, gen, orphan); | ||
2449 | if (ret < 0) | ||
2450 | goto out; | ||
2451 | |||
2452 | ret = send_rename(sctx, path, orphan); | ||
2453 | |||
2454 | out: | ||
2455 | fs_path_free(sctx, orphan); | ||
2456 | return ret; | ||
2457 | } | ||
2458 | |||
2459 | /* | ||
2460 | * Returns 1 if a directory can be removed at this point in time. | ||
2461 | * We check this by iterating all dir items and checking if the inode behind | ||
2462 | * the dir item was already processed. | ||
2463 | */ | ||
2464 | static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) | ||
2465 | { | ||
2466 | int ret = 0; | ||
2467 | struct btrfs_root *root = sctx->parent_root; | ||
2468 | struct btrfs_path *path; | ||
2469 | struct btrfs_key key; | ||
2470 | struct btrfs_key found_key; | ||
2471 | struct btrfs_key loc; | ||
2472 | struct btrfs_dir_item *di; | ||
2473 | |||
2474 | path = alloc_path_for_send(); | ||
2475 | if (!path) | ||
2476 | return -ENOMEM; | ||
2477 | |||
2478 | key.objectid = dir; | ||
2479 | key.type = BTRFS_DIR_INDEX_KEY; | ||
2480 | key.offset = 0; | ||
2481 | |||
2482 | while (1) { | ||
2483 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | ||
2484 | if (ret < 0) | ||
2485 | goto out; | ||
2486 | if (!ret) { | ||
2487 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
2488 | path->slots[0]); | ||
2489 | } | ||
2490 | if (ret || found_key.objectid != key.objectid || | ||
2491 | found_key.type != key.type) { | ||
2492 | break; | ||
2493 | } | ||
2494 | |||
2495 | di = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
2496 | struct btrfs_dir_item); | ||
2497 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); | ||
2498 | |||
2499 | if (loc.objectid > send_progress) { | ||
2500 | ret = 0; | ||
2501 | goto out; | ||
2502 | } | ||
2503 | |||
2504 | btrfs_release_path(path); | ||
2505 | key.offset = found_key.offset + 1; | ||
2506 | } | ||
2507 | |||
2508 | ret = 1; | ||
2509 | |||
2510 | out: | ||
2511 | btrfs_free_path(path); | ||
2512 | return ret; | ||
2513 | } | ||
2514 | |||
2515 | struct finish_unordered_dir_ctx { | ||
2516 | struct send_ctx *sctx; | ||
2517 | struct fs_path *cur_path; | ||
2518 | struct fs_path *dir_path; | ||
2519 | u64 dir_ino; | ||
2520 | int need_delete; | ||
2521 | int delete_pass; | ||
2522 | }; | ||
2523 | |||
2524 | int __finish_unordered_dir(int num, struct btrfs_key *di_key, | ||
2525 | const char *name, int name_len, | ||
2526 | const char *data, int data_len, | ||
2527 | u8 type, void *ctx) | ||
2528 | { | ||
2529 | int ret = 0; | ||
2530 | struct finish_unordered_dir_ctx *fctx = ctx; | ||
2531 | struct send_ctx *sctx = fctx->sctx; | ||
2532 | u64 di_gen; | ||
2533 | u64 di_mode; | ||
2534 | int is_orphan = 0; | ||
2535 | |||
2536 | if (di_key->objectid >= fctx->dir_ino) | ||
2537 | goto out; | ||
2538 | |||
2539 | fs_path_reset(fctx->cur_path); | ||
2540 | |||
2541 | ret = get_inode_info(sctx->send_root, di_key->objectid, | ||
2542 | NULL, &di_gen, &di_mode, NULL, NULL); | ||
2543 | if (ret < 0) | ||
2544 | goto out; | ||
2545 | |||
2546 | ret = is_first_ref(sctx, sctx->send_root, di_key->objectid, | ||
2547 | fctx->dir_ino, name, name_len); | ||
2548 | if (ret < 0) | ||
2549 | goto out; | ||
2550 | if (ret) { | ||
2551 | is_orphan = 1; | ||
2552 | ret = gen_unique_name(sctx, di_key->objectid, di_gen, | ||
2553 | fctx->cur_path); | ||
2554 | } else { | ||
2555 | ret = get_cur_path(sctx, di_key->objectid, di_gen, | ||
2556 | fctx->cur_path); | ||
2557 | } | ||
2558 | if (ret < 0) | ||
2559 | goto out; | ||
2560 | |||
2561 | ret = fs_path_add(fctx->dir_path, name, name_len); | ||
2562 | if (ret < 0) | ||
2563 | goto out; | ||
2564 | |||
2565 | if (!fctx->delete_pass) { | ||
2566 | if (S_ISDIR(di_mode)) { | ||
2567 | ret = send_rename(sctx, fctx->cur_path, | ||
2568 | fctx->dir_path); | ||
2569 | } else { | ||
2570 | ret = send_link(sctx, fctx->dir_path, | ||
2571 | fctx->cur_path); | ||
2572 | if (is_orphan) | ||
2573 | fctx->need_delete = 1; | ||
2574 | } | ||
2575 | } else if (!S_ISDIR(di_mode)) { | ||
2576 | ret = send_unlink(sctx, fctx->cur_path); | ||
2577 | } else { | ||
2578 | ret = 0; | ||
2579 | } | ||
2580 | |||
2581 | fs_path_remove(fctx->dir_path); | ||
2582 | |||
2583 | out: | ||
2584 | return ret; | ||
2585 | } | ||
2586 | |||
2587 | /* | ||
2588 | * Go through all dir items and see if we find refs which could not be created | ||
2589 | * in the past because the dir did not exist at that time. | ||
2590 | */ | ||
2591 | static int finish_outoforder_dir(struct send_ctx *sctx, u64 dir, u64 dir_gen) | ||
2592 | { | ||
2593 | int ret = 0; | ||
2594 | struct btrfs_path *path = NULL; | ||
2595 | struct btrfs_key key; | ||
2596 | struct btrfs_key found_key; | ||
2597 | struct extent_buffer *eb; | ||
2598 | struct finish_unordered_dir_ctx fctx; | ||
2599 | int slot; | ||
2600 | |||
2601 | path = alloc_path_for_send(); | ||
2602 | if (!path) { | ||
2603 | ret = -ENOMEM; | ||
2604 | goto out; | ||
2605 | } | ||
2606 | |||
2607 | memset(&fctx, 0, sizeof(fctx)); | ||
2608 | fctx.sctx = sctx; | ||
2609 | fctx.cur_path = fs_path_alloc(sctx); | ||
2610 | fctx.dir_path = fs_path_alloc(sctx); | ||
2611 | if (!fctx.cur_path || !fctx.dir_path) { | ||
2612 | ret = -ENOMEM; | ||
2613 | goto out; | ||
2614 | } | ||
2615 | fctx.dir_ino = dir; | ||
2616 | |||
2617 | ret = get_cur_path(sctx, dir, dir_gen, fctx.dir_path); | ||
2618 | if (ret < 0) | ||
2619 | goto out; | ||
2620 | |||
2621 | /* | ||
2622 | * We do two passes. The first links in the new refs and the second | ||
2623 | * deletes orphans if required. Deletion of orphans is not required for | ||
2624 | * directory inodes, as we always have only one ref and use rename | ||
2625 | * instead of link for those. | ||
2626 | */ | ||
2627 | |||
2628 | again: | ||
2629 | key.objectid = dir; | ||
2630 | key.type = BTRFS_DIR_ITEM_KEY; | ||
2631 | key.offset = 0; | ||
2632 | while (1) { | ||
2633 | ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, | ||
2634 | 1, 0); | ||
2635 | if (ret < 0) | ||
2636 | goto out; | ||
2637 | eb = path->nodes[0]; | ||
2638 | slot = path->slots[0]; | ||
2639 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
2640 | |||
2641 | if (found_key.objectid != key.objectid || | ||
2642 | found_key.type != key.type) { | ||
2643 | btrfs_release_path(path); | ||
2644 | break; | ||
2645 | } | ||
2646 | |||
2647 | ret = iterate_dir_item(sctx, sctx->send_root, path, | ||
2648 | &found_key, __finish_unordered_dir, | ||
2649 | &fctx); | ||
2650 | if (ret < 0) | ||
2651 | goto out; | ||
2652 | |||
2653 | key.offset = found_key.offset + 1; | ||
2654 | btrfs_release_path(path); | ||
2655 | } | ||
2656 | |||
2657 | if (!fctx.delete_pass && fctx.need_delete) { | ||
2658 | fctx.delete_pass = 1; | ||
2659 | goto again; | ||
2660 | } | ||
2661 | |||
2662 | out: | ||
2663 | btrfs_free_path(path); | ||
2664 | fs_path_free(sctx, fctx.cur_path); | ||
2665 | fs_path_free(sctx, fctx.dir_path); | ||
2666 | return ret; | ||
2667 | } | ||
2668 | |||
2669 | /* | ||
2670 | * This does all the move/link/unlink/rmdir magic. | ||
2671 | */ | ||
2672 | static int process_recorded_refs(struct send_ctx *sctx) | ||
2673 | { | ||
2674 | int ret = 0; | ||
2675 | struct recorded_ref *cur; | ||
2676 | struct ulist *check_dirs = NULL; | ||
2677 | struct ulist_iterator uit; | ||
2678 | struct ulist_node *un; | ||
2679 | struct fs_path *valid_path = NULL; | ||
2680 | u64 ow_inode = 0; | ||
2681 | u64 ow_gen; | ||
2682 | int did_overwrite = 0; | ||
2683 | int is_orphan = 0; | ||
2684 | |||
2685 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | ||
2686 | |||
2687 | valid_path = fs_path_alloc(sctx); | ||
2688 | if (!valid_path) { | ||
2689 | ret = -ENOMEM; | ||
2690 | goto out; | ||
2691 | } | ||
2692 | |||
2693 | check_dirs = ulist_alloc(GFP_NOFS); | ||
2694 | if (!check_dirs) { | ||
2695 | ret = -ENOMEM; | ||
2696 | goto out; | ||
2697 | } | ||
2698 | |||
2699 | /* | ||
2700 | * First, check if the first ref of the current inode was overwritten | ||
2701 | * before. If yes, we know that the current inode was already orphanized | ||
2702 | * and thus use the orphan name. If not, we can use get_cur_path to | ||
2703 | * get the path of the first ref as it would like while receiving at | ||
2704 | * this point in time. | ||
2705 | * New inodes are always orphan at the beginning, so force to use the | ||
2706 | * orphan name in this case. | ||
2707 | * The first ref is stored in valid_path and will be updated if it | ||
2708 | * gets moved around. | ||
2709 | */ | ||
2710 | if (!sctx->cur_inode_new) { | ||
2711 | ret = did_overwrite_first_ref(sctx, sctx->cur_ino, | ||
2712 | sctx->cur_inode_gen); | ||
2713 | if (ret < 0) | ||
2714 | goto out; | ||
2715 | if (ret) | ||
2716 | did_overwrite = 1; | ||
2717 | } | ||
2718 | if (sctx->cur_inode_new || did_overwrite) { | ||
2719 | ret = gen_unique_name(sctx, sctx->cur_ino, | ||
2720 | sctx->cur_inode_gen, valid_path); | ||
2721 | if (ret < 0) | ||
2722 | goto out; | ||
2723 | is_orphan = 1; | ||
2724 | } else { | ||
2725 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, | ||
2726 | valid_path); | ||
2727 | if (ret < 0) | ||
2728 | goto out; | ||
2729 | } | ||
2730 | |||
2731 | list_for_each_entry(cur, &sctx->new_refs, list) { | ||
2732 | /* | ||
2733 | * Check if this new ref would overwrite the first ref of | ||
2734 | * another unprocessed inode. If yes, orphanize the | ||
2735 | * overwritten inode. If we find an overwritten ref that is | ||
2736 | * not the first ref, simply unlink it. | ||
2737 | */ | ||
2738 | ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen, | ||
2739 | cur->name, cur->name_len, | ||
2740 | &ow_inode, &ow_gen); | ||
2741 | if (ret < 0) | ||
2742 | goto out; | ||
2743 | if (ret) { | ||
2744 | ret = is_first_ref(sctx, sctx->parent_root, | ||
2745 | ow_inode, cur->dir, cur->name, | ||
2746 | cur->name_len); | ||
2747 | if (ret < 0) | ||
2748 | goto out; | ||
2749 | if (ret) { | ||
2750 | ret = orphanize_inode(sctx, ow_inode, ow_gen, | ||
2751 | cur->full_path); | ||
2752 | if (ret < 0) | ||
2753 | goto out; | ||
2754 | } else { | ||
2755 | ret = send_unlink(sctx, cur->full_path); | ||
2756 | if (ret < 0) | ||
2757 | goto out; | ||
2758 | } | ||
2759 | } | ||
2760 | |||
2761 | /* | ||
2762 | * link/move the ref to the new place. If we have an orphan | ||
2763 | * inode, move it and update valid_path. If not, link or move | ||
2764 | * it depending on the inode mode. | ||
2765 | */ | ||
2766 | if (is_orphan && !sctx->cur_inode_first_ref_orphan) { | ||
2767 | ret = send_rename(sctx, valid_path, cur->full_path); | ||
2768 | if (ret < 0) | ||
2769 | goto out; | ||
2770 | is_orphan = 0; | ||
2771 | ret = fs_path_copy(valid_path, cur->full_path); | ||
2772 | if (ret < 0) | ||
2773 | goto out; | ||
2774 | } else { | ||
2775 | if (S_ISDIR(sctx->cur_inode_mode)) { | ||
2776 | /* | ||
2777 | * Dirs can't be linked, so move it. For moved | ||
2778 | * dirs, we always have one new and one deleted | ||
2779 | * ref. The deleted ref is ignored later. | ||
2780 | */ | ||
2781 | ret = send_rename(sctx, valid_path, | ||
2782 | cur->full_path); | ||
2783 | if (ret < 0) | ||
2784 | goto out; | ||
2785 | ret = fs_path_copy(valid_path, cur->full_path); | ||
2786 | if (ret < 0) | ||
2787 | goto out; | ||
2788 | } else { | ||
2789 | ret = send_link(sctx, cur->full_path, | ||
2790 | valid_path); | ||
2791 | if (ret < 0) | ||
2792 | goto out; | ||
2793 | } | ||
2794 | } | ||
2795 | ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, | ||
2796 | GFP_NOFS); | ||
2797 | if (ret < 0) | ||
2798 | goto out; | ||
2799 | } | ||
2800 | |||
2801 | if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) { | ||
2802 | /* | ||
2803 | * Check if we can already rmdir the directory. If not, | ||
2804 | * orphanize it. For every dir item inside that gets deleted | ||
2805 | * later, we do this check again and rmdir it then if possible. | ||
2806 | * See the use of check_dirs for more details. | ||
2807 | */ | ||
2808 | ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino); | ||
2809 | if (ret < 0) | ||
2810 | goto out; | ||
2811 | if (ret) { | ||
2812 | ret = send_rmdir(sctx, valid_path); | ||
2813 | if (ret < 0) | ||
2814 | goto out; | ||
2815 | } else if (!is_orphan) { | ||
2816 | ret = orphanize_inode(sctx, sctx->cur_ino, | ||
2817 | sctx->cur_inode_gen, valid_path); | ||
2818 | if (ret < 0) | ||
2819 | goto out; | ||
2820 | is_orphan = 1; | ||
2821 | } | ||
2822 | |||
2823 | list_for_each_entry(cur, &sctx->deleted_refs, list) { | ||
2824 | ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, | ||
2825 | GFP_NOFS); | ||
2826 | if (ret < 0) | ||
2827 | goto out; | ||
2828 | } | ||
2829 | } else if (!S_ISDIR(sctx->cur_inode_mode)) { | ||
2830 | /* | ||
2831 | * We have a non dir inode. Go through all deleted refs and | ||
2832 | * unlink them if they were not already overwritten by other | ||
2833 | * inodes. | ||
2834 | */ | ||
2835 | list_for_each_entry(cur, &sctx->deleted_refs, list) { | ||
2836 | ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen, | ||
2837 | sctx->cur_ino, sctx->cur_inode_gen, | ||
2838 | cur->name, cur->name_len); | ||
2839 | if (ret < 0) | ||
2840 | goto out; | ||
2841 | if (!ret) { | ||
2842 | /* | ||
2843 | * In case the inode was moved to a directory | ||
2844 | * that was not created yet (see | ||
2845 | * __record_new_ref), we can not unlink the ref | ||
2846 | * as it will be needed later when the parent | ||
2847 | * directory is created, so that we can move in | ||
2848 | * the inode to the new dir. | ||
2849 | */ | ||
2850 | if (!is_orphan && | ||
2851 | sctx->cur_inode_first_ref_orphan) { | ||
2852 | ret = orphanize_inode(sctx, | ||
2853 | sctx->cur_ino, | ||
2854 | sctx->cur_inode_gen, | ||
2855 | cur->full_path); | ||
2856 | if (ret < 0) | ||
2857 | goto out; | ||
2858 | ret = gen_unique_name(sctx, | ||
2859 | sctx->cur_ino, | ||
2860 | sctx->cur_inode_gen, | ||
2861 | valid_path); | ||
2862 | if (ret < 0) | ||
2863 | goto out; | ||
2864 | is_orphan = 1; | ||
2865 | |||
2866 | } else { | ||
2867 | ret = send_unlink(sctx, cur->full_path); | ||
2868 | if (ret < 0) | ||
2869 | goto out; | ||
2870 | } | ||
2871 | } | ||
2872 | ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, | ||
2873 | GFP_NOFS); | ||
2874 | if (ret < 0) | ||
2875 | goto out; | ||
2876 | } | ||
2877 | |||
2878 | /* | ||
2879 | * If the inode is still orphan, unlink the orphan. This may | ||
2880 | * happen when a previous inode did overwrite the first ref | ||
2881 | * of this inode and no new refs were added for the current | ||
2882 | * inode. | ||
2883 | * We can however not delete the orphan in case the inode relies | ||
2884 | * in a directory that was not created yet (see | ||
2885 | * __record_new_ref) | ||
2886 | */ | ||
2887 | if (is_orphan && !sctx->cur_inode_first_ref_orphan) { | ||
2888 | ret = send_unlink(sctx, valid_path); | ||
2889 | if (ret < 0) | ||
2890 | goto out; | ||
2891 | } | ||
2892 | } | ||
2893 | |||
2894 | /* | ||
2895 | * We did collect all parent dirs where cur_inode was once located. We | ||
2896 | * now go through all these dirs and check if they are pending for | ||
2897 | * deletion and if it's finally possible to perform the rmdir now. | ||
2898 | * We also update the inode stats of the parent dirs here. | ||
2899 | */ | ||
2900 | ULIST_ITER_INIT(&uit); | ||
2901 | while ((un = ulist_next(check_dirs, &uit))) { | ||
2902 | if (un->val > sctx->cur_ino) | ||
2903 | continue; | ||
2904 | |||
2905 | ret = get_cur_inode_state(sctx, un->val, un->aux); | ||
2906 | if (ret < 0) | ||
2907 | goto out; | ||
2908 | |||
2909 | if (ret == inode_state_did_create || | ||
2910 | ret == inode_state_no_change) { | ||
2911 | /* TODO delayed utimes */ | ||
2912 | ret = send_utimes(sctx, un->val, un->aux); | ||
2913 | if (ret < 0) | ||
2914 | goto out; | ||
2915 | } else if (ret == inode_state_did_delete) { | ||
2916 | ret = can_rmdir(sctx, un->val, sctx->cur_ino); | ||
2917 | if (ret < 0) | ||
2918 | goto out; | ||
2919 | if (ret) { | ||
2920 | ret = get_cur_path(sctx, un->val, un->aux, | ||
2921 | valid_path); | ||
2922 | if (ret < 0) | ||
2923 | goto out; | ||
2924 | ret = send_rmdir(sctx, valid_path); | ||
2925 | if (ret < 0) | ||
2926 | goto out; | ||
2927 | } | ||
2928 | } | ||
2929 | } | ||
2930 | |||
2931 | /* | ||
2932 | * Current inode is now at it's new position, so we must increase | ||
2933 | * send_progress | ||
2934 | */ | ||
2935 | sctx->send_progress = sctx->cur_ino + 1; | ||
2936 | |||
2937 | /* | ||
2938 | * We may have a directory here that has pending refs which could not | ||
2939 | * be created before (because the dir did not exist before, see | ||
2940 | * __record_new_ref). finish_outoforder_dir will link/move the pending | ||
2941 | * refs. | ||
2942 | */ | ||
2943 | if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_new) { | ||
2944 | ret = finish_outoforder_dir(sctx, sctx->cur_ino, | ||
2945 | sctx->cur_inode_gen); | ||
2946 | if (ret < 0) | ||
2947 | goto out; | ||
2948 | } | ||
2949 | |||
2950 | ret = 0; | ||
2951 | |||
2952 | out: | ||
2953 | free_recorded_refs(sctx); | ||
2954 | ulist_free(check_dirs); | ||
2955 | fs_path_free(sctx, valid_path); | ||
2956 | return ret; | ||
2957 | } | ||
2958 | |||
2959 | static int __record_new_ref(int num, u64 dir, int index, | ||
2960 | struct fs_path *name, | ||
2961 | void *ctx) | ||
2962 | { | ||
2963 | int ret = 0; | ||
2964 | struct send_ctx *sctx = ctx; | ||
2965 | struct fs_path *p; | ||
2966 | u64 gen; | ||
2967 | |||
2968 | p = fs_path_alloc(sctx); | ||
2969 | if (!p) | ||
2970 | return -ENOMEM; | ||
2971 | |||
2972 | ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, | ||
2973 | NULL); | ||
2974 | if (ret < 0) | ||
2975 | goto out; | ||
2976 | |||
2977 | /* | ||
2978 | * The parent may be non-existent at this point in time. This happens | ||
2979 | * if the ino of the parent dir is higher then the current ino. In this | ||
2980 | * case, we can not process this ref until the parent dir is finally | ||
2981 | * created. If we reach the parent dir later, process_recorded_refs | ||
2982 | * will go through all dir items and process the refs that could not be | ||
2983 | * processed before. In case this is the first ref, we set | ||
2984 | * cur_inode_first_ref_orphan to 1 to inform process_recorded_refs to | ||
2985 | * keep an orphan of the inode so that it later can be used for | ||
2986 | * link/move | ||
2987 | */ | ||
2988 | ret = is_inode_existent(sctx, dir, gen); | ||
2989 | if (ret < 0) | ||
2990 | goto out; | ||
2991 | if (!ret) { | ||
2992 | ret = is_first_ref(sctx, sctx->send_root, sctx->cur_ino, dir, | ||
2993 | name->start, fs_path_len(name)); | ||
2994 | if (ret < 0) | ||
2995 | goto out; | ||
2996 | if (ret) | ||
2997 | sctx->cur_inode_first_ref_orphan = 1; | ||
2998 | ret = 0; | ||
2999 | goto out; | ||
3000 | } | ||
3001 | |||
3002 | ret = get_cur_path(sctx, dir, gen, p); | ||
3003 | if (ret < 0) | ||
3004 | goto out; | ||
3005 | ret = fs_path_add_path(p, name); | ||
3006 | if (ret < 0) | ||
3007 | goto out; | ||
3008 | |||
3009 | ret = record_ref(&sctx->new_refs, dir, gen, p); | ||
3010 | |||
3011 | out: | ||
3012 | if (ret) | ||
3013 | fs_path_free(sctx, p); | ||
3014 | return ret; | ||
3015 | } | ||
3016 | |||
3017 | static int __record_deleted_ref(int num, u64 dir, int index, | ||
3018 | struct fs_path *name, | ||
3019 | void *ctx) | ||
3020 | { | ||
3021 | int ret = 0; | ||
3022 | struct send_ctx *sctx = ctx; | ||
3023 | struct fs_path *p; | ||
3024 | u64 gen; | ||
3025 | |||
3026 | p = fs_path_alloc(sctx); | ||
3027 | if (!p) | ||
3028 | return -ENOMEM; | ||
3029 | |||
3030 | ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, | ||
3031 | NULL); | ||
3032 | if (ret < 0) | ||
3033 | goto out; | ||
3034 | |||
3035 | ret = get_cur_path(sctx, dir, gen, p); | ||
3036 | if (ret < 0) | ||
3037 | goto out; | ||
3038 | ret = fs_path_add_path(p, name); | ||
3039 | if (ret < 0) | ||
3040 | goto out; | ||
3041 | |||
3042 | ret = record_ref(&sctx->deleted_refs, dir, gen, p); | ||
3043 | |||
3044 | out: | ||
3045 | if (ret) | ||
3046 | fs_path_free(sctx, p); | ||
3047 | return ret; | ||
3048 | } | ||
3049 | |||
3050 | static int record_new_ref(struct send_ctx *sctx) | ||
3051 | { | ||
3052 | int ret; | ||
3053 | |||
3054 | ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path, | ||
3055 | sctx->cmp_key, 0, __record_new_ref, sctx); | ||
3056 | if (ret < 0) | ||
3057 | goto out; | ||
3058 | ret = 0; | ||
3059 | |||
3060 | out: | ||
3061 | return ret; | ||
3062 | } | ||
3063 | |||
3064 | static int record_deleted_ref(struct send_ctx *sctx) | ||
3065 | { | ||
3066 | int ret; | ||
3067 | |||
3068 | ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path, | ||
3069 | sctx->cmp_key, 0, __record_deleted_ref, sctx); | ||
3070 | if (ret < 0) | ||
3071 | goto out; | ||
3072 | ret = 0; | ||
3073 | |||
3074 | out: | ||
3075 | return ret; | ||
3076 | } | ||
3077 | |||
3078 | struct find_ref_ctx { | ||
3079 | u64 dir; | ||
3080 | struct fs_path *name; | ||
3081 | int found_idx; | ||
3082 | }; | ||
3083 | |||
3084 | static int __find_iref(int num, u64 dir, int index, | ||
3085 | struct fs_path *name, | ||
3086 | void *ctx_) | ||
3087 | { | ||
3088 | struct find_ref_ctx *ctx = ctx_; | ||
3089 | |||
3090 | if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) && | ||
3091 | strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { | ||
3092 | ctx->found_idx = num; | ||
3093 | return 1; | ||
3094 | } | ||
3095 | return 0; | ||
3096 | } | ||
3097 | |||
3098 | static int find_iref(struct send_ctx *sctx, | ||
3099 | struct btrfs_root *root, | ||
3100 | struct btrfs_path *path, | ||
3101 | struct btrfs_key *key, | ||
3102 | u64 dir, struct fs_path *name) | ||
3103 | { | ||
3104 | int ret; | ||
3105 | struct find_ref_ctx ctx; | ||
3106 | |||
3107 | ctx.dir = dir; | ||
3108 | ctx.name = name; | ||
3109 | ctx.found_idx = -1; | ||
3110 | |||
3111 | ret = iterate_inode_ref(sctx, root, path, key, 0, __find_iref, &ctx); | ||
3112 | if (ret < 0) | ||
3113 | return ret; | ||
3114 | |||
3115 | if (ctx.found_idx == -1) | ||
3116 | return -ENOENT; | ||
3117 | |||
3118 | return ctx.found_idx; | ||
3119 | } | ||
3120 | |||
3121 | static int __record_changed_new_ref(int num, u64 dir, int index, | ||
3122 | struct fs_path *name, | ||
3123 | void *ctx) | ||
3124 | { | ||
3125 | int ret; | ||
3126 | struct send_ctx *sctx = ctx; | ||
3127 | |||
3128 | ret = find_iref(sctx, sctx->parent_root, sctx->right_path, | ||
3129 | sctx->cmp_key, dir, name); | ||
3130 | if (ret == -ENOENT) | ||
3131 | ret = __record_new_ref(num, dir, index, name, sctx); | ||
3132 | else if (ret > 0) | ||
3133 | ret = 0; | ||
3134 | |||
3135 | return ret; | ||
3136 | } | ||
3137 | |||
3138 | static int __record_changed_deleted_ref(int num, u64 dir, int index, | ||
3139 | struct fs_path *name, | ||
3140 | void *ctx) | ||
3141 | { | ||
3142 | int ret; | ||
3143 | struct send_ctx *sctx = ctx; | ||
3144 | |||
3145 | ret = find_iref(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key, | ||
3146 | dir, name); | ||
3147 | if (ret == -ENOENT) | ||
3148 | ret = __record_deleted_ref(num, dir, index, name, sctx); | ||
3149 | else if (ret > 0) | ||
3150 | ret = 0; | ||
3151 | |||
3152 | return ret; | ||
3153 | } | ||
3154 | |||
3155 | static int record_changed_ref(struct send_ctx *sctx) | ||
3156 | { | ||
3157 | int ret = 0; | ||
3158 | |||
3159 | ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path, | ||
3160 | sctx->cmp_key, 0, __record_changed_new_ref, sctx); | ||
3161 | if (ret < 0) | ||
3162 | goto out; | ||
3163 | ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path, | ||
3164 | sctx->cmp_key, 0, __record_changed_deleted_ref, sctx); | ||
3165 | if (ret < 0) | ||
3166 | goto out; | ||
3167 | ret = 0; | ||
3168 | |||
3169 | out: | ||
3170 | return ret; | ||
3171 | } | ||
3172 | |||
3173 | /* | ||
3174 | * Record and process all refs at once. Needed when an inode changes the | ||
3175 | * generation number, which means that it was deleted and recreated. | ||
3176 | */ | ||
3177 | static int process_all_refs(struct send_ctx *sctx, | ||
3178 | enum btrfs_compare_tree_result cmd) | ||
3179 | { | ||
3180 | int ret; | ||
3181 | struct btrfs_root *root; | ||
3182 | struct btrfs_path *path; | ||
3183 | struct btrfs_key key; | ||
3184 | struct btrfs_key found_key; | ||
3185 | struct extent_buffer *eb; | ||
3186 | int slot; | ||
3187 | iterate_inode_ref_t cb; | ||
3188 | |||
3189 | path = alloc_path_for_send(); | ||
3190 | if (!path) | ||
3191 | return -ENOMEM; | ||
3192 | |||
3193 | if (cmd == BTRFS_COMPARE_TREE_NEW) { | ||
3194 | root = sctx->send_root; | ||
3195 | cb = __record_new_ref; | ||
3196 | } else if (cmd == BTRFS_COMPARE_TREE_DELETED) { | ||
3197 | root = sctx->parent_root; | ||
3198 | cb = __record_deleted_ref; | ||
3199 | } else { | ||
3200 | BUG(); | ||
3201 | } | ||
3202 | |||
3203 | key.objectid = sctx->cmp_key->objectid; | ||
3204 | key.type = BTRFS_INODE_REF_KEY; | ||
3205 | key.offset = 0; | ||
3206 | while (1) { | ||
3207 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | ||
3208 | if (ret < 0) { | ||
3209 | btrfs_release_path(path); | ||
3210 | goto out; | ||
3211 | } | ||
3212 | if (ret) { | ||
3213 | btrfs_release_path(path); | ||
3214 | break; | ||
3215 | } | ||
3216 | |||
3217 | eb = path->nodes[0]; | ||
3218 | slot = path->slots[0]; | ||
3219 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
3220 | |||
3221 | if (found_key.objectid != key.objectid || | ||
3222 | found_key.type != key.type) { | ||
3223 | btrfs_release_path(path); | ||
3224 | break; | ||
3225 | } | ||
3226 | |||
3227 | ret = iterate_inode_ref(sctx, sctx->parent_root, path, | ||
3228 | &found_key, 0, cb, sctx); | ||
3229 | btrfs_release_path(path); | ||
3230 | if (ret < 0) | ||
3231 | goto out; | ||
3232 | |||
3233 | key.offset = found_key.offset + 1; | ||
3234 | } | ||
3235 | |||
3236 | ret = process_recorded_refs(sctx); | ||
3237 | |||
3238 | out: | ||
3239 | btrfs_free_path(path); | ||
3240 | return ret; | ||
3241 | } | ||
3242 | |||
3243 | static int send_set_xattr(struct send_ctx *sctx, | ||
3244 | struct fs_path *path, | ||
3245 | const char *name, int name_len, | ||
3246 | const char *data, int data_len) | ||
3247 | { | ||
3248 | int ret = 0; | ||
3249 | |||
3250 | ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR); | ||
3251 | if (ret < 0) | ||
3252 | goto out; | ||
3253 | |||
3254 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); | ||
3255 | TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); | ||
3256 | TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len); | ||
3257 | |||
3258 | ret = send_cmd(sctx); | ||
3259 | |||
3260 | tlv_put_failure: | ||
3261 | out: | ||
3262 | return ret; | ||
3263 | } | ||
3264 | |||
3265 | static int send_remove_xattr(struct send_ctx *sctx, | ||
3266 | struct fs_path *path, | ||
3267 | const char *name, int name_len) | ||
3268 | { | ||
3269 | int ret = 0; | ||
3270 | |||
3271 | ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR); | ||
3272 | if (ret < 0) | ||
3273 | goto out; | ||
3274 | |||
3275 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); | ||
3276 | TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); | ||
3277 | |||
3278 | ret = send_cmd(sctx); | ||
3279 | |||
3280 | tlv_put_failure: | ||
3281 | out: | ||
3282 | return ret; | ||
3283 | } | ||
3284 | |||
3285 | static int __process_new_xattr(int num, struct btrfs_key *di_key, | ||
3286 | const char *name, int name_len, | ||
3287 | const char *data, int data_len, | ||
3288 | u8 type, void *ctx) | ||
3289 | { | ||
3290 | int ret; | ||
3291 | struct send_ctx *sctx = ctx; | ||
3292 | struct fs_path *p; | ||
3293 | posix_acl_xattr_header dummy_acl; | ||
3294 | |||
3295 | p = fs_path_alloc(sctx); | ||
3296 | if (!p) | ||
3297 | return -ENOMEM; | ||
3298 | |||
3299 | /* | ||
3300 | * This hack is needed because empty acl's are stored as zero byte | ||
3301 | * data in xattrs. Problem with that is, that receiving these zero byte | ||
3302 | * acl's will fail later. To fix this, we send a dummy acl list that | ||
3303 | * only contains the version number and no entries. | ||
3304 | */ | ||
3305 | if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) || | ||
3306 | !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) { | ||
3307 | if (data_len == 0) { | ||
3308 | dummy_acl.a_version = | ||
3309 | cpu_to_le32(POSIX_ACL_XATTR_VERSION); | ||
3310 | data = (char *)&dummy_acl; | ||
3311 | data_len = sizeof(dummy_acl); | ||
3312 | } | ||
3313 | } | ||
3314 | |||
3315 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); | ||
3316 | if (ret < 0) | ||
3317 | goto out; | ||
3318 | |||
3319 | ret = send_set_xattr(sctx, p, name, name_len, data, data_len); | ||
3320 | |||
3321 | out: | ||
3322 | fs_path_free(sctx, p); | ||
3323 | return ret; | ||
3324 | } | ||
3325 | |||
3326 | static int __process_deleted_xattr(int num, struct btrfs_key *di_key, | ||
3327 | const char *name, int name_len, | ||
3328 | const char *data, int data_len, | ||
3329 | u8 type, void *ctx) | ||
3330 | { | ||
3331 | int ret; | ||
3332 | struct send_ctx *sctx = ctx; | ||
3333 | struct fs_path *p; | ||
3334 | |||
3335 | p = fs_path_alloc(sctx); | ||
3336 | if (!p) | ||
3337 | return -ENOMEM; | ||
3338 | |||
3339 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); | ||
3340 | if (ret < 0) | ||
3341 | goto out; | ||
3342 | |||
3343 | ret = send_remove_xattr(sctx, p, name, name_len); | ||
3344 | |||
3345 | out: | ||
3346 | fs_path_free(sctx, p); | ||
3347 | return ret; | ||
3348 | } | ||
3349 | |||
3350 | static int process_new_xattr(struct send_ctx *sctx) | ||
3351 | { | ||
3352 | int ret = 0; | ||
3353 | |||
3354 | ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path, | ||
3355 | sctx->cmp_key, __process_new_xattr, sctx); | ||
3356 | |||
3357 | return ret; | ||
3358 | } | ||
3359 | |||
3360 | static int process_deleted_xattr(struct send_ctx *sctx) | ||
3361 | { | ||
3362 | int ret; | ||
3363 | |||
3364 | ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path, | ||
3365 | sctx->cmp_key, __process_deleted_xattr, sctx); | ||
3366 | |||
3367 | return ret; | ||
3368 | } | ||
3369 | |||
3370 | struct find_xattr_ctx { | ||
3371 | const char *name; | ||
3372 | int name_len; | ||
3373 | int found_idx; | ||
3374 | char *found_data; | ||
3375 | int found_data_len; | ||
3376 | }; | ||
3377 | |||
3378 | static int __find_xattr(int num, struct btrfs_key *di_key, | ||
3379 | const char *name, int name_len, | ||
3380 | const char *data, int data_len, | ||
3381 | u8 type, void *vctx) | ||
3382 | { | ||
3383 | struct find_xattr_ctx *ctx = vctx; | ||
3384 | |||
3385 | if (name_len == ctx->name_len && | ||
3386 | strncmp(name, ctx->name, name_len) == 0) { | ||
3387 | ctx->found_idx = num; | ||
3388 | ctx->found_data_len = data_len; | ||
3389 | ctx->found_data = kmalloc(data_len, GFP_NOFS); | ||
3390 | if (!ctx->found_data) | ||
3391 | return -ENOMEM; | ||
3392 | memcpy(ctx->found_data, data, data_len); | ||
3393 | return 1; | ||
3394 | } | ||
3395 | return 0; | ||
3396 | } | ||
3397 | |||
3398 | static int find_xattr(struct send_ctx *sctx, | ||
3399 | struct btrfs_root *root, | ||
3400 | struct btrfs_path *path, | ||
3401 | struct btrfs_key *key, | ||
3402 | const char *name, int name_len, | ||
3403 | char **data, int *data_len) | ||
3404 | { | ||
3405 | int ret; | ||
3406 | struct find_xattr_ctx ctx; | ||
3407 | |||
3408 | ctx.name = name; | ||
3409 | ctx.name_len = name_len; | ||
3410 | ctx.found_idx = -1; | ||
3411 | ctx.found_data = NULL; | ||
3412 | ctx.found_data_len = 0; | ||
3413 | |||
3414 | ret = iterate_dir_item(sctx, root, path, key, __find_xattr, &ctx); | ||
3415 | if (ret < 0) | ||
3416 | return ret; | ||
3417 | |||
3418 | if (ctx.found_idx == -1) | ||
3419 | return -ENOENT; | ||
3420 | if (data) { | ||
3421 | *data = ctx.found_data; | ||
3422 | *data_len = ctx.found_data_len; | ||
3423 | } else { | ||
3424 | kfree(ctx.found_data); | ||
3425 | } | ||
3426 | return ctx.found_idx; | ||
3427 | } | ||
3428 | |||
3429 | |||
3430 | static int __process_changed_new_xattr(int num, struct btrfs_key *di_key, | ||
3431 | const char *name, int name_len, | ||
3432 | const char *data, int data_len, | ||
3433 | u8 type, void *ctx) | ||
3434 | { | ||
3435 | int ret; | ||
3436 | struct send_ctx *sctx = ctx; | ||
3437 | char *found_data = NULL; | ||
3438 | int found_data_len = 0; | ||
3439 | struct fs_path *p = NULL; | ||
3440 | |||
3441 | ret = find_xattr(sctx, sctx->parent_root, sctx->right_path, | ||
3442 | sctx->cmp_key, name, name_len, &found_data, | ||
3443 | &found_data_len); | ||
3444 | if (ret == -ENOENT) { | ||
3445 | ret = __process_new_xattr(num, di_key, name, name_len, data, | ||
3446 | data_len, type, ctx); | ||
3447 | } else if (ret >= 0) { | ||
3448 | if (data_len != found_data_len || | ||
3449 | memcmp(data, found_data, data_len)) { | ||
3450 | ret = __process_new_xattr(num, di_key, name, name_len, | ||
3451 | data, data_len, type, ctx); | ||
3452 | } else { | ||
3453 | ret = 0; | ||
3454 | } | ||
3455 | } | ||
3456 | |||
3457 | kfree(found_data); | ||
3458 | fs_path_free(sctx, p); | ||
3459 | return ret; | ||
3460 | } | ||
3461 | |||
3462 | static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key, | ||
3463 | const char *name, int name_len, | ||
3464 | const char *data, int data_len, | ||
3465 | u8 type, void *ctx) | ||
3466 | { | ||
3467 | int ret; | ||
3468 | struct send_ctx *sctx = ctx; | ||
3469 | |||
3470 | ret = find_xattr(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key, | ||
3471 | name, name_len, NULL, NULL); | ||
3472 | if (ret == -ENOENT) | ||
3473 | ret = __process_deleted_xattr(num, di_key, name, name_len, data, | ||
3474 | data_len, type, ctx); | ||
3475 | else if (ret >= 0) | ||
3476 | ret = 0; | ||
3477 | |||
3478 | return ret; | ||
3479 | } | ||
3480 | |||
3481 | static int process_changed_xattr(struct send_ctx *sctx) | ||
3482 | { | ||
3483 | int ret = 0; | ||
3484 | |||
3485 | ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path, | ||
3486 | sctx->cmp_key, __process_changed_new_xattr, sctx); | ||
3487 | if (ret < 0) | ||
3488 | goto out; | ||
3489 | ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path, | ||
3490 | sctx->cmp_key, __process_changed_deleted_xattr, sctx); | ||
3491 | |||
3492 | out: | ||
3493 | return ret; | ||
3494 | } | ||
3495 | |||
3496 | static int process_all_new_xattrs(struct send_ctx *sctx) | ||
3497 | { | ||
3498 | int ret; | ||
3499 | struct btrfs_root *root; | ||
3500 | struct btrfs_path *path; | ||
3501 | struct btrfs_key key; | ||
3502 | struct btrfs_key found_key; | ||
3503 | struct extent_buffer *eb; | ||
3504 | int slot; | ||
3505 | |||
3506 | path = alloc_path_for_send(); | ||
3507 | if (!path) | ||
3508 | return -ENOMEM; | ||
3509 | |||
3510 | root = sctx->send_root; | ||
3511 | |||
3512 | key.objectid = sctx->cmp_key->objectid; | ||
3513 | key.type = BTRFS_XATTR_ITEM_KEY; | ||
3514 | key.offset = 0; | ||
3515 | while (1) { | ||
3516 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | ||
3517 | if (ret < 0) | ||
3518 | goto out; | ||
3519 | if (ret) { | ||
3520 | ret = 0; | ||
3521 | goto out; | ||
3522 | } | ||
3523 | |||
3524 | eb = path->nodes[0]; | ||
3525 | slot = path->slots[0]; | ||
3526 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
3527 | |||
3528 | if (found_key.objectid != key.objectid || | ||
3529 | found_key.type != key.type) { | ||
3530 | ret = 0; | ||
3531 | goto out; | ||
3532 | } | ||
3533 | |||
3534 | ret = iterate_dir_item(sctx, root, path, &found_key, | ||
3535 | __process_new_xattr, sctx); | ||
3536 | if (ret < 0) | ||
3537 | goto out; | ||
3538 | |||
3539 | btrfs_release_path(path); | ||
3540 | key.offset = found_key.offset + 1; | ||
3541 | } | ||
3542 | |||
3543 | out: | ||
3544 | btrfs_free_path(path); | ||
3545 | return ret; | ||
3546 | } | ||
3547 | |||
3548 | /* | ||
3549 | * Read some bytes from the current inode/file and send a write command to | ||
3550 | * user space. | ||
3551 | */ | ||
3552 | static int send_write(struct send_ctx *sctx, u64 offset, u32 len) | ||
3553 | { | ||
3554 | int ret = 0; | ||
3555 | struct fs_path *p; | ||
3556 | loff_t pos = offset; | ||
3557 | int readed = 0; | ||
3558 | mm_segment_t old_fs; | ||
3559 | |||
3560 | p = fs_path_alloc(sctx); | ||
3561 | if (!p) | ||
3562 | return -ENOMEM; | ||
3563 | |||
3564 | /* | ||
3565 | * vfs normally only accepts user space buffers for security reasons. | ||
3566 | * we only read from the file and also only provide the read_buf buffer | ||
3567 | * to vfs. As this buffer does not come from a user space call, it's | ||
3568 | * ok to temporary allow kernel space buffers. | ||
3569 | */ | ||
3570 | old_fs = get_fs(); | ||
3571 | set_fs(KERNEL_DS); | ||
3572 | |||
3573 | verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); | ||
3574 | |||
3575 | ret = open_cur_inode_file(sctx); | ||
3576 | if (ret < 0) | ||
3577 | goto out; | ||
3578 | |||
3579 | ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos); | ||
3580 | if (ret < 0) | ||
3581 | goto out; | ||
3582 | readed = ret; | ||
3583 | if (!readed) | ||
3584 | goto out; | ||
3585 | |||
3586 | ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); | ||
3587 | if (ret < 0) | ||
3588 | goto out; | ||
3589 | |||
3590 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); | ||
3591 | if (ret < 0) | ||
3592 | goto out; | ||
3593 | |||
3594 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | ||
3595 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); | ||
3596 | TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, readed); | ||
3597 | |||
3598 | ret = send_cmd(sctx); | ||
3599 | |||
3600 | tlv_put_failure: | ||
3601 | out: | ||
3602 | fs_path_free(sctx, p); | ||
3603 | set_fs(old_fs); | ||
3604 | if (ret < 0) | ||
3605 | return ret; | ||
3606 | return readed; | ||
3607 | } | ||
3608 | |||
3609 | /* | ||
3610 | * Send a clone command to user space. | ||
3611 | */ | ||
3612 | static int send_clone(struct send_ctx *sctx, | ||
3613 | u64 offset, u32 len, | ||
3614 | struct clone_root *clone_root) | ||
3615 | { | ||
3616 | int ret = 0; | ||
3617 | struct btrfs_root *clone_root2 = clone_root->root; | ||
3618 | struct fs_path *p; | ||
3619 | u64 gen; | ||
3620 | |||
3621 | verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " | ||
3622 | "clone_inode=%llu, clone_offset=%llu\n", offset, len, | ||
3623 | clone_root->root->objectid, clone_root->ino, | ||
3624 | clone_root->offset); | ||
3625 | |||
3626 | p = fs_path_alloc(sctx); | ||
3627 | if (!p) | ||
3628 | return -ENOMEM; | ||
3629 | |||
3630 | ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE); | ||
3631 | if (ret < 0) | ||
3632 | goto out; | ||
3633 | |||
3634 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); | ||
3635 | if (ret < 0) | ||
3636 | goto out; | ||
3637 | |||
3638 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); | ||
3639 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); | ||
3640 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | ||
3641 | |||
3642 | if (clone_root2 == sctx->send_root) { | ||
3643 | ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, | ||
3644 | &gen, NULL, NULL, NULL); | ||
3645 | if (ret < 0) | ||
3646 | goto out; | ||
3647 | ret = get_cur_path(sctx, clone_root->ino, gen, p); | ||
3648 | } else { | ||
3649 | ret = get_inode_path(sctx, clone_root2, clone_root->ino, p); | ||
3650 | } | ||
3651 | if (ret < 0) | ||
3652 | goto out; | ||
3653 | |||
3654 | TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, | ||
3655 | clone_root2->root_item.uuid); | ||
3656 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, | ||
3657 | clone_root2->root_item.ctransid); | ||
3658 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); | ||
3659 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, | ||
3660 | clone_root->offset); | ||
3661 | |||
3662 | ret = send_cmd(sctx); | ||
3663 | |||
3664 | tlv_put_failure: | ||
3665 | out: | ||
3666 | fs_path_free(sctx, p); | ||
3667 | return ret; | ||
3668 | } | ||
3669 | |||
3670 | static int send_write_or_clone(struct send_ctx *sctx, | ||
3671 | struct btrfs_path *path, | ||
3672 | struct btrfs_key *key, | ||
3673 | struct clone_root *clone_root) | ||
3674 | { | ||
3675 | int ret = 0; | ||
3676 | struct btrfs_file_extent_item *ei; | ||
3677 | u64 offset = key->offset; | ||
3678 | u64 pos = 0; | ||
3679 | u64 len; | ||
3680 | u32 l; | ||
3681 | u8 type; | ||
3682 | |||
3683 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
3684 | struct btrfs_file_extent_item); | ||
3685 | type = btrfs_file_extent_type(path->nodes[0], ei); | ||
3686 | if (type == BTRFS_FILE_EXTENT_INLINE) | ||
3687 | len = btrfs_file_extent_inline_len(path->nodes[0], ei); | ||
3688 | else | ||
3689 | len = btrfs_file_extent_num_bytes(path->nodes[0], ei); | ||
3690 | |||
3691 | if (offset + len > sctx->cur_inode_size) | ||
3692 | len = sctx->cur_inode_size - offset; | ||
3693 | if (len == 0) { | ||
3694 | ret = 0; | ||
3695 | goto out; | ||
3696 | } | ||
3697 | |||
3698 | if (!clone_root) { | ||
3699 | while (pos < len) { | ||
3700 | l = len - pos; | ||
3701 | if (l > BTRFS_SEND_READ_SIZE) | ||
3702 | l = BTRFS_SEND_READ_SIZE; | ||
3703 | ret = send_write(sctx, pos + offset, l); | ||
3704 | if (ret < 0) | ||
3705 | goto out; | ||
3706 | if (!ret) | ||
3707 | break; | ||
3708 | pos += ret; | ||
3709 | } | ||
3710 | ret = 0; | ||
3711 | } else { | ||
3712 | ret = send_clone(sctx, offset, len, clone_root); | ||
3713 | } | ||
3714 | |||
3715 | out: | ||
3716 | return ret; | ||
3717 | } | ||
3718 | |||
3719 | static int is_extent_unchanged(struct send_ctx *sctx, | ||
3720 | struct btrfs_path *left_path, | ||
3721 | struct btrfs_key *ekey) | ||
3722 | { | ||
3723 | int ret = 0; | ||
3724 | struct btrfs_key key; | ||
3725 | struct btrfs_path *path = NULL; | ||
3726 | struct extent_buffer *eb; | ||
3727 | int slot; | ||
3728 | struct btrfs_key found_key; | ||
3729 | struct btrfs_file_extent_item *ei; | ||
3730 | u64 left_disknr; | ||
3731 | u64 right_disknr; | ||
3732 | u64 left_offset; | ||
3733 | u64 right_offset; | ||
3734 | u64 left_offset_fixed; | ||
3735 | u64 left_len; | ||
3736 | u64 right_len; | ||
3737 | u8 left_type; | ||
3738 | u8 right_type; | ||
3739 | |||
3740 | path = alloc_path_for_send(); | ||
3741 | if (!path) | ||
3742 | return -ENOMEM; | ||
3743 | |||
3744 | eb = left_path->nodes[0]; | ||
3745 | slot = left_path->slots[0]; | ||
3746 | |||
3747 | ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
3748 | left_type = btrfs_file_extent_type(eb, ei); | ||
3749 | left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); | ||
3750 | left_len = btrfs_file_extent_num_bytes(eb, ei); | ||
3751 | left_offset = btrfs_file_extent_offset(eb, ei); | ||
3752 | |||
3753 | if (left_type != BTRFS_FILE_EXTENT_REG) { | ||
3754 | ret = 0; | ||
3755 | goto out; | ||
3756 | } | ||
3757 | |||
3758 | /* | ||
3759 | * Following comments will refer to these graphics. L is the left | ||
3760 | * extents which we are checking at the moment. 1-8 are the right | ||
3761 | * extents that we iterate. | ||
3762 | * | ||
3763 | * |-----L-----| | ||
3764 | * |-1-|-2a-|-3-|-4-|-5-|-6-| | ||
3765 | * | ||
3766 | * |-----L-----| | ||
3767 | * |--1--|-2b-|...(same as above) | ||
3768 | * | ||
3769 | * Alternative situation. Happens on files where extents got split. | ||
3770 | * |-----L-----| | ||
3771 | * |-----------7-----------|-6-| | ||
3772 | * | ||
3773 | * Alternative situation. Happens on files which got larger. | ||
3774 | * |-----L-----| | ||
3775 | * |-8-| | ||
3776 | * Nothing follows after 8. | ||
3777 | */ | ||
3778 | |||
3779 | key.objectid = ekey->objectid; | ||
3780 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
3781 | key.offset = ekey->offset; | ||
3782 | ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0); | ||
3783 | if (ret < 0) | ||
3784 | goto out; | ||
3785 | if (ret) { | ||
3786 | ret = 0; | ||
3787 | goto out; | ||
3788 | } | ||
3789 | |||
3790 | /* | ||
3791 | * Handle special case where the right side has no extents at all. | ||
3792 | */ | ||
3793 | eb = path->nodes[0]; | ||
3794 | slot = path->slots[0]; | ||
3795 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
3796 | if (found_key.objectid != key.objectid || | ||
3797 | found_key.type != key.type) { | ||
3798 | ret = 0; | ||
3799 | goto out; | ||
3800 | } | ||
3801 | |||
3802 | /* | ||
3803 | * We're now on 2a, 2b or 7. | ||
3804 | */ | ||
3805 | key = found_key; | ||
3806 | while (key.offset < ekey->offset + left_len) { | ||
3807 | ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
3808 | right_type = btrfs_file_extent_type(eb, ei); | ||
3809 | right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); | ||
3810 | right_len = btrfs_file_extent_num_bytes(eb, ei); | ||
3811 | right_offset = btrfs_file_extent_offset(eb, ei); | ||
3812 | |||
3813 | if (right_type != BTRFS_FILE_EXTENT_REG) { | ||
3814 | ret = 0; | ||
3815 | goto out; | ||
3816 | } | ||
3817 | |||
3818 | /* | ||
3819 | * Are we at extent 8? If yes, we know the extent is changed. | ||
3820 | * This may only happen on the first iteration. | ||
3821 | */ | ||
3822 | if (found_key.offset + right_len < ekey->offset) { | ||
3823 | ret = 0; | ||
3824 | goto out; | ||
3825 | } | ||
3826 | |||
3827 | left_offset_fixed = left_offset; | ||
3828 | if (key.offset < ekey->offset) { | ||
3829 | /* Fix the right offset for 2a and 7. */ | ||
3830 | right_offset += ekey->offset - key.offset; | ||
3831 | } else { | ||
3832 | /* Fix the left offset for all behind 2a and 2b */ | ||
3833 | left_offset_fixed += key.offset - ekey->offset; | ||
3834 | } | ||
3835 | |||
3836 | /* | ||
3837 | * Check if we have the same extent. | ||
3838 | */ | ||
3839 | if (left_disknr + left_offset_fixed != | ||
3840 | right_disknr + right_offset) { | ||
3841 | ret = 0; | ||
3842 | goto out; | ||
3843 | } | ||
3844 | |||
3845 | /* | ||
3846 | * Go to the next extent. | ||
3847 | */ | ||
3848 | ret = btrfs_next_item(sctx->parent_root, path); | ||
3849 | if (ret < 0) | ||
3850 | goto out; | ||
3851 | if (!ret) { | ||
3852 | eb = path->nodes[0]; | ||
3853 | slot = path->slots[0]; | ||
3854 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
3855 | } | ||
3856 | if (ret || found_key.objectid != key.objectid || | ||
3857 | found_key.type != key.type) { | ||
3858 | key.offset += right_len; | ||
3859 | break; | ||
3860 | } else { | ||
3861 | if (found_key.offset != key.offset + right_len) { | ||
3862 | /* Should really not happen */ | ||
3863 | ret = -EIO; | ||
3864 | goto out; | ||
3865 | } | ||
3866 | } | ||
3867 | key = found_key; | ||
3868 | } | ||
3869 | |||
3870 | /* | ||
3871 | * We're now behind the left extent (treat as unchanged) or at the end | ||
3872 | * of the right side (treat as changed). | ||
3873 | */ | ||
3874 | if (key.offset >= ekey->offset + left_len) | ||
3875 | ret = 1; | ||
3876 | else | ||
3877 | ret = 0; | ||
3878 | |||
3879 | |||
3880 | out: | ||
3881 | btrfs_free_path(path); | ||
3882 | return ret; | ||
3883 | } | ||
3884 | |||
3885 | static int process_extent(struct send_ctx *sctx, | ||
3886 | struct btrfs_path *path, | ||
3887 | struct btrfs_key *key) | ||
3888 | { | ||
3889 | int ret = 0; | ||
3890 | struct clone_root *found_clone = NULL; | ||
3891 | |||
3892 | if (S_ISLNK(sctx->cur_inode_mode)) | ||
3893 | return 0; | ||
3894 | |||
3895 | if (sctx->parent_root && !sctx->cur_inode_new) { | ||
3896 | ret = is_extent_unchanged(sctx, path, key); | ||
3897 | if (ret < 0) | ||
3898 | goto out; | ||
3899 | if (ret) { | ||
3900 | ret = 0; | ||
3901 | goto out; | ||
3902 | } | ||
3903 | } | ||
3904 | |||
3905 | ret = find_extent_clone(sctx, path, key->objectid, key->offset, | ||
3906 | sctx->cur_inode_size, &found_clone); | ||
3907 | if (ret != -ENOENT && ret < 0) | ||
3908 | goto out; | ||
3909 | |||
3910 | ret = send_write_or_clone(sctx, path, key, found_clone); | ||
3911 | |||
3912 | out: | ||
3913 | return ret; | ||
3914 | } | ||
3915 | |||
3916 | static int process_all_extents(struct send_ctx *sctx) | ||
3917 | { | ||
3918 | int ret; | ||
3919 | struct btrfs_root *root; | ||
3920 | struct btrfs_path *path; | ||
3921 | struct btrfs_key key; | ||
3922 | struct btrfs_key found_key; | ||
3923 | struct extent_buffer *eb; | ||
3924 | int slot; | ||
3925 | |||
3926 | root = sctx->send_root; | ||
3927 | path = alloc_path_for_send(); | ||
3928 | if (!path) | ||
3929 | return -ENOMEM; | ||
3930 | |||
3931 | key.objectid = sctx->cmp_key->objectid; | ||
3932 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
3933 | key.offset = 0; | ||
3934 | while (1) { | ||
3935 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | ||
3936 | if (ret < 0) | ||
3937 | goto out; | ||
3938 | if (ret) { | ||
3939 | ret = 0; | ||
3940 | goto out; | ||
3941 | } | ||
3942 | |||
3943 | eb = path->nodes[0]; | ||
3944 | slot = path->slots[0]; | ||
3945 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
3946 | |||
3947 | if (found_key.objectid != key.objectid || | ||
3948 | found_key.type != key.type) { | ||
3949 | ret = 0; | ||
3950 | goto out; | ||
3951 | } | ||
3952 | |||
3953 | ret = process_extent(sctx, path, &found_key); | ||
3954 | if (ret < 0) | ||
3955 | goto out; | ||
3956 | |||
3957 | btrfs_release_path(path); | ||
3958 | key.offset = found_key.offset + 1; | ||
3959 | } | ||
3960 | |||
3961 | out: | ||
3962 | btrfs_free_path(path); | ||
3963 | return ret; | ||
3964 | } | ||
3965 | |||
3966 | static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end) | ||
3967 | { | ||
3968 | int ret = 0; | ||
3969 | |||
3970 | if (sctx->cur_ino == 0) | ||
3971 | goto out; | ||
3972 | if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid && | ||
3973 | sctx->cmp_key->type <= BTRFS_INODE_REF_KEY) | ||
3974 | goto out; | ||
3975 | if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) | ||
3976 | goto out; | ||
3977 | |||
3978 | ret = process_recorded_refs(sctx); | ||
3979 | |||
3980 | out: | ||
3981 | return ret; | ||
3982 | } | ||
3983 | |||
3984 | static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) | ||
3985 | { | ||
3986 | int ret = 0; | ||
3987 | u64 left_mode; | ||
3988 | u64 left_uid; | ||
3989 | u64 left_gid; | ||
3990 | u64 right_mode; | ||
3991 | u64 right_uid; | ||
3992 | u64 right_gid; | ||
3993 | int need_chmod = 0; | ||
3994 | int need_chown = 0; | ||
3995 | |||
3996 | ret = process_recorded_refs_if_needed(sctx, at_end); | ||
3997 | if (ret < 0) | ||
3998 | goto out; | ||
3999 | |||
4000 | if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) | ||
4001 | goto out; | ||
4002 | if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino) | ||
4003 | goto out; | ||
4004 | |||
4005 | ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, | ||
4006 | &left_mode, &left_uid, &left_gid); | ||
4007 | if (ret < 0) | ||
4008 | goto out; | ||
4009 | |||
4010 | if (!S_ISLNK(sctx->cur_inode_mode)) { | ||
4011 | if (!sctx->parent_root || sctx->cur_inode_new) { | ||
4012 | need_chmod = 1; | ||
4013 | need_chown = 1; | ||
4014 | } else { | ||
4015 | ret = get_inode_info(sctx->parent_root, sctx->cur_ino, | ||
4016 | NULL, NULL, &right_mode, &right_uid, | ||
4017 | &right_gid); | ||
4018 | if (ret < 0) | ||
4019 | goto out; | ||
4020 | |||
4021 | if (left_uid != right_uid || left_gid != right_gid) | ||
4022 | need_chown = 1; | ||
4023 | if (left_mode != right_mode) | ||
4024 | need_chmod = 1; | ||
4025 | } | ||
4026 | } | ||
4027 | |||
4028 | if (S_ISREG(sctx->cur_inode_mode)) { | ||
4029 | ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, | ||
4030 | sctx->cur_inode_size); | ||
4031 | if (ret < 0) | ||
4032 | goto out; | ||
4033 | } | ||
4034 | |||
4035 | if (need_chown) { | ||
4036 | ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen, | ||
4037 | left_uid, left_gid); | ||
4038 | if (ret < 0) | ||
4039 | goto out; | ||
4040 | } | ||
4041 | if (need_chmod) { | ||
4042 | ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen, | ||
4043 | left_mode); | ||
4044 | if (ret < 0) | ||
4045 | goto out; | ||
4046 | } | ||
4047 | |||
4048 | /* | ||
4049 | * Need to send that every time, no matter if it actually changed | ||
4050 | * between the two trees as we have done changes to the inode before. | ||
4051 | */ | ||
4052 | ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); | ||
4053 | if (ret < 0) | ||
4054 | goto out; | ||
4055 | |||
4056 | out: | ||
4057 | return ret; | ||
4058 | } | ||
4059 | |||
4060 | static int changed_inode(struct send_ctx *sctx, | ||
4061 | enum btrfs_compare_tree_result result) | ||
4062 | { | ||
4063 | int ret = 0; | ||
4064 | struct btrfs_key *key = sctx->cmp_key; | ||
4065 | struct btrfs_inode_item *left_ii = NULL; | ||
4066 | struct btrfs_inode_item *right_ii = NULL; | ||
4067 | u64 left_gen = 0; | ||
4068 | u64 right_gen = 0; | ||
4069 | |||
4070 | ret = close_cur_inode_file(sctx); | ||
4071 | if (ret < 0) | ||
4072 | goto out; | ||
4073 | |||
4074 | sctx->cur_ino = key->objectid; | ||
4075 | sctx->cur_inode_new_gen = 0; | ||
4076 | sctx->cur_inode_first_ref_orphan = 0; | ||
4077 | sctx->send_progress = sctx->cur_ino; | ||
4078 | |||
4079 | if (result == BTRFS_COMPARE_TREE_NEW || | ||
4080 | result == BTRFS_COMPARE_TREE_CHANGED) { | ||
4081 | left_ii = btrfs_item_ptr(sctx->left_path->nodes[0], | ||
4082 | sctx->left_path->slots[0], | ||
4083 | struct btrfs_inode_item); | ||
4084 | left_gen = btrfs_inode_generation(sctx->left_path->nodes[0], | ||
4085 | left_ii); | ||
4086 | } else { | ||
4087 | right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], | ||
4088 | sctx->right_path->slots[0], | ||
4089 | struct btrfs_inode_item); | ||
4090 | right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], | ||
4091 | right_ii); | ||
4092 | } | ||
4093 | if (result == BTRFS_COMPARE_TREE_CHANGED) { | ||
4094 | right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], | ||
4095 | sctx->right_path->slots[0], | ||
4096 | struct btrfs_inode_item); | ||
4097 | |||
4098 | right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], | ||
4099 | right_ii); | ||
4100 | if (left_gen != right_gen) | ||
4101 | sctx->cur_inode_new_gen = 1; | ||
4102 | } | ||
4103 | |||
4104 | if (result == BTRFS_COMPARE_TREE_NEW) { | ||
4105 | sctx->cur_inode_gen = left_gen; | ||
4106 | sctx->cur_inode_new = 1; | ||
4107 | sctx->cur_inode_deleted = 0; | ||
4108 | sctx->cur_inode_size = btrfs_inode_size( | ||
4109 | sctx->left_path->nodes[0], left_ii); | ||
4110 | sctx->cur_inode_mode = btrfs_inode_mode( | ||
4111 | sctx->left_path->nodes[0], left_ii); | ||
4112 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
4113 | ret = send_create_inode(sctx, sctx->left_path, | ||
4114 | sctx->cmp_key); | ||
4115 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { | ||
4116 | sctx->cur_inode_gen = right_gen; | ||
4117 | sctx->cur_inode_new = 0; | ||
4118 | sctx->cur_inode_deleted = 1; | ||
4119 | sctx->cur_inode_size = btrfs_inode_size( | ||
4120 | sctx->right_path->nodes[0], right_ii); | ||
4121 | sctx->cur_inode_mode = btrfs_inode_mode( | ||
4122 | sctx->right_path->nodes[0], right_ii); | ||
4123 | } else if (result == BTRFS_COMPARE_TREE_CHANGED) { | ||
4124 | if (sctx->cur_inode_new_gen) { | ||
4125 | sctx->cur_inode_gen = right_gen; | ||
4126 | sctx->cur_inode_new = 0; | ||
4127 | sctx->cur_inode_deleted = 1; | ||
4128 | sctx->cur_inode_size = btrfs_inode_size( | ||
4129 | sctx->right_path->nodes[0], right_ii); | ||
4130 | sctx->cur_inode_mode = btrfs_inode_mode( | ||
4131 | sctx->right_path->nodes[0], right_ii); | ||
4132 | ret = process_all_refs(sctx, | ||
4133 | BTRFS_COMPARE_TREE_DELETED); | ||
4134 | if (ret < 0) | ||
4135 | goto out; | ||
4136 | |||
4137 | sctx->cur_inode_gen = left_gen; | ||
4138 | sctx->cur_inode_new = 1; | ||
4139 | sctx->cur_inode_deleted = 0; | ||
4140 | sctx->cur_inode_size = btrfs_inode_size( | ||
4141 | sctx->left_path->nodes[0], left_ii); | ||
4142 | sctx->cur_inode_mode = btrfs_inode_mode( | ||
4143 | sctx->left_path->nodes[0], left_ii); | ||
4144 | ret = send_create_inode(sctx, sctx->left_path, | ||
4145 | sctx->cmp_key); | ||
4146 | if (ret < 0) | ||
4147 | goto out; | ||
4148 | |||
4149 | ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); | ||
4150 | if (ret < 0) | ||
4151 | goto out; | ||
4152 | ret = process_all_extents(sctx); | ||
4153 | if (ret < 0) | ||
4154 | goto out; | ||
4155 | ret = process_all_new_xattrs(sctx); | ||
4156 | if (ret < 0) | ||
4157 | goto out; | ||
4158 | } else { | ||
4159 | sctx->cur_inode_gen = left_gen; | ||
4160 | sctx->cur_inode_new = 0; | ||
4161 | sctx->cur_inode_new_gen = 0; | ||
4162 | sctx->cur_inode_deleted = 0; | ||
4163 | sctx->cur_inode_size = btrfs_inode_size( | ||
4164 | sctx->left_path->nodes[0], left_ii); | ||
4165 | sctx->cur_inode_mode = btrfs_inode_mode( | ||
4166 | sctx->left_path->nodes[0], left_ii); | ||
4167 | } | ||
4168 | } | ||
4169 | |||
4170 | out: | ||
4171 | return ret; | ||
4172 | } | ||
4173 | |||
4174 | static int changed_ref(struct send_ctx *sctx, | ||
4175 | enum btrfs_compare_tree_result result) | ||
4176 | { | ||
4177 | int ret = 0; | ||
4178 | |||
4179 | BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); | ||
4180 | |||
4181 | if (!sctx->cur_inode_new_gen && | ||
4182 | sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { | ||
4183 | if (result == BTRFS_COMPARE_TREE_NEW) | ||
4184 | ret = record_new_ref(sctx); | ||
4185 | else if (result == BTRFS_COMPARE_TREE_DELETED) | ||
4186 | ret = record_deleted_ref(sctx); | ||
4187 | else if (result == BTRFS_COMPARE_TREE_CHANGED) | ||
4188 | ret = record_changed_ref(sctx); | ||
4189 | } | ||
4190 | |||
4191 | return ret; | ||
4192 | } | ||
4193 | |||
4194 | static int changed_xattr(struct send_ctx *sctx, | ||
4195 | enum btrfs_compare_tree_result result) | ||
4196 | { | ||
4197 | int ret = 0; | ||
4198 | |||
4199 | BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); | ||
4200 | |||
4201 | if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { | ||
4202 | if (result == BTRFS_COMPARE_TREE_NEW) | ||
4203 | ret = process_new_xattr(sctx); | ||
4204 | else if (result == BTRFS_COMPARE_TREE_DELETED) | ||
4205 | ret = process_deleted_xattr(sctx); | ||
4206 | else if (result == BTRFS_COMPARE_TREE_CHANGED) | ||
4207 | ret = process_changed_xattr(sctx); | ||
4208 | } | ||
4209 | |||
4210 | return ret; | ||
4211 | } | ||
4212 | |||
4213 | static int changed_extent(struct send_ctx *sctx, | ||
4214 | enum btrfs_compare_tree_result result) | ||
4215 | { | ||
4216 | int ret = 0; | ||
4217 | |||
4218 | BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); | ||
4219 | |||
4220 | if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { | ||
4221 | if (result != BTRFS_COMPARE_TREE_DELETED) | ||
4222 | ret = process_extent(sctx, sctx->left_path, | ||
4223 | sctx->cmp_key); | ||
4224 | } | ||
4225 | |||
4226 | return ret; | ||
4227 | } | ||
4228 | |||
4229 | |||
4230 | static int changed_cb(struct btrfs_root *left_root, | ||
4231 | struct btrfs_root *right_root, | ||
4232 | struct btrfs_path *left_path, | ||
4233 | struct btrfs_path *right_path, | ||
4234 | struct btrfs_key *key, | ||
4235 | enum btrfs_compare_tree_result result, | ||
4236 | void *ctx) | ||
4237 | { | ||
4238 | int ret = 0; | ||
4239 | struct send_ctx *sctx = ctx; | ||
4240 | |||
4241 | sctx->left_path = left_path; | ||
4242 | sctx->right_path = right_path; | ||
4243 | sctx->cmp_key = key; | ||
4244 | |||
4245 | ret = finish_inode_if_needed(sctx, 0); | ||
4246 | if (ret < 0) | ||
4247 | goto out; | ||
4248 | |||
4249 | if (key->type == BTRFS_INODE_ITEM_KEY) | ||
4250 | ret = changed_inode(sctx, result); | ||
4251 | else if (key->type == BTRFS_INODE_REF_KEY) | ||
4252 | ret = changed_ref(sctx, result); | ||
4253 | else if (key->type == BTRFS_XATTR_ITEM_KEY) | ||
4254 | ret = changed_xattr(sctx, result); | ||
4255 | else if (key->type == BTRFS_EXTENT_DATA_KEY) | ||
4256 | ret = changed_extent(sctx, result); | ||
4257 | |||
4258 | out: | ||
4259 | return ret; | ||
4260 | } | ||
4261 | |||
4262 | static int full_send_tree(struct send_ctx *sctx) | ||
4263 | { | ||
4264 | int ret; | ||
4265 | struct btrfs_trans_handle *trans = NULL; | ||
4266 | struct btrfs_root *send_root = sctx->send_root; | ||
4267 | struct btrfs_key key; | ||
4268 | struct btrfs_key found_key; | ||
4269 | struct btrfs_path *path; | ||
4270 | struct extent_buffer *eb; | ||
4271 | int slot; | ||
4272 | u64 start_ctransid; | ||
4273 | u64 ctransid; | ||
4274 | |||
4275 | path = alloc_path_for_send(); | ||
4276 | if (!path) | ||
4277 | return -ENOMEM; | ||
4278 | |||
4279 | spin_lock(&send_root->root_times_lock); | ||
4280 | start_ctransid = btrfs_root_ctransid(&send_root->root_item); | ||
4281 | spin_unlock(&send_root->root_times_lock); | ||
4282 | |||
4283 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; | ||
4284 | key.type = BTRFS_INODE_ITEM_KEY; | ||
4285 | key.offset = 0; | ||
4286 | |||
4287 | join_trans: | ||
4288 | /* | ||
4289 | * We need to make sure the transaction does not get committed | ||
4290 | * while we do anything on commit roots. Join a transaction to prevent | ||
4291 | * this. | ||
4292 | */ | ||
4293 | trans = btrfs_join_transaction(send_root); | ||
4294 | if (IS_ERR(trans)) { | ||
4295 | ret = PTR_ERR(trans); | ||
4296 | trans = NULL; | ||
4297 | goto out; | ||
4298 | } | ||
4299 | |||
4300 | /* | ||
4301 | * Make sure the tree has not changed | ||
4302 | */ | ||
4303 | spin_lock(&send_root->root_times_lock); | ||
4304 | ctransid = btrfs_root_ctransid(&send_root->root_item); | ||
4305 | spin_unlock(&send_root->root_times_lock); | ||
4306 | |||
4307 | if (ctransid != start_ctransid) { | ||
4308 | WARN(1, KERN_WARNING "btrfs: the root that you're trying to " | ||
4309 | "send was modified in between. This is " | ||
4310 | "probably a bug.\n"); | ||
4311 | ret = -EIO; | ||
4312 | goto out; | ||
4313 | } | ||
4314 | |||
4315 | ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); | ||
4316 | if (ret < 0) | ||
4317 | goto out; | ||
4318 | if (ret) | ||
4319 | goto out_finish; | ||
4320 | |||
4321 | while (1) { | ||
4322 | /* | ||
4323 | * When someone want to commit while we iterate, end the | ||
4324 | * joined transaction and rejoin. | ||
4325 | */ | ||
4326 | if (btrfs_should_end_transaction(trans, send_root)) { | ||
4327 | ret = btrfs_end_transaction(trans, send_root); | ||
4328 | trans = NULL; | ||
4329 | if (ret < 0) | ||
4330 | goto out; | ||
4331 | btrfs_release_path(path); | ||
4332 | goto join_trans; | ||
4333 | } | ||
4334 | |||
4335 | eb = path->nodes[0]; | ||
4336 | slot = path->slots[0]; | ||
4337 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
4338 | |||
4339 | ret = changed_cb(send_root, NULL, path, NULL, | ||
4340 | &found_key, BTRFS_COMPARE_TREE_NEW, sctx); | ||
4341 | if (ret < 0) | ||
4342 | goto out; | ||
4343 | |||
4344 | key.objectid = found_key.objectid; | ||
4345 | key.type = found_key.type; | ||
4346 | key.offset = found_key.offset + 1; | ||
4347 | |||
4348 | ret = btrfs_next_item(send_root, path); | ||
4349 | if (ret < 0) | ||
4350 | goto out; | ||
4351 | if (ret) { | ||
4352 | ret = 0; | ||
4353 | break; | ||
4354 | } | ||
4355 | } | ||
4356 | |||
4357 | out_finish: | ||
4358 | ret = finish_inode_if_needed(sctx, 1); | ||
4359 | |||
4360 | out: | ||
4361 | btrfs_free_path(path); | ||
4362 | if (trans) { | ||
4363 | if (!ret) | ||
4364 | ret = btrfs_end_transaction(trans, send_root); | ||
4365 | else | ||
4366 | btrfs_end_transaction(trans, send_root); | ||
4367 | } | ||
4368 | return ret; | ||
4369 | } | ||
4370 | |||
4371 | static int send_subvol(struct send_ctx *sctx) | ||
4372 | { | ||
4373 | int ret; | ||
4374 | |||
4375 | ret = send_header(sctx); | ||
4376 | if (ret < 0) | ||
4377 | goto out; | ||
4378 | |||
4379 | ret = send_subvol_begin(sctx); | ||
4380 | if (ret < 0) | ||
4381 | goto out; | ||
4382 | |||
4383 | if (sctx->parent_root) { | ||
4384 | ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, | ||
4385 | changed_cb, sctx); | ||
4386 | if (ret < 0) | ||
4387 | goto out; | ||
4388 | ret = finish_inode_if_needed(sctx, 1); | ||
4389 | if (ret < 0) | ||
4390 | goto out; | ||
4391 | } else { | ||
4392 | ret = full_send_tree(sctx); | ||
4393 | if (ret < 0) | ||
4394 | goto out; | ||
4395 | } | ||
4396 | |||
4397 | out: | ||
4398 | if (!ret) | ||
4399 | ret = close_cur_inode_file(sctx); | ||
4400 | else | ||
4401 | close_cur_inode_file(sctx); | ||
4402 | |||
4403 | free_recorded_refs(sctx); | ||
4404 | return ret; | ||
4405 | } | ||
4406 | |||
4407 | long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | ||
4408 | { | ||
4409 | int ret = 0; | ||
4410 | struct btrfs_root *send_root; | ||
4411 | struct btrfs_root *clone_root; | ||
4412 | struct btrfs_fs_info *fs_info; | ||
4413 | struct btrfs_ioctl_send_args *arg = NULL; | ||
4414 | struct btrfs_key key; | ||
4415 | struct file *filp = NULL; | ||
4416 | struct send_ctx *sctx = NULL; | ||
4417 | u32 i; | ||
4418 | u64 *clone_sources_tmp = NULL; | ||
4419 | |||
4420 | if (!capable(CAP_SYS_ADMIN)) | ||
4421 | return -EPERM; | ||
4422 | |||
4423 | send_root = BTRFS_I(fdentry(mnt_file)->d_inode)->root; | ||
4424 | fs_info = send_root->fs_info; | ||
4425 | |||
4426 | arg = memdup_user(arg_, sizeof(*arg)); | ||
4427 | if (IS_ERR(arg)) { | ||
4428 | ret = PTR_ERR(arg); | ||
4429 | arg = NULL; | ||
4430 | goto out; | ||
4431 | } | ||
4432 | |||
4433 | if (!access_ok(VERIFY_READ, arg->clone_sources, | ||
4434 | sizeof(*arg->clone_sources * | ||
4435 | arg->clone_sources_count))) { | ||
4436 | ret = -EFAULT; | ||
4437 | goto out; | ||
4438 | } | ||
4439 | |||
4440 | sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); | ||
4441 | if (!sctx) { | ||
4442 | ret = -ENOMEM; | ||
4443 | goto out; | ||
4444 | } | ||
4445 | |||
4446 | INIT_LIST_HEAD(&sctx->new_refs); | ||
4447 | INIT_LIST_HEAD(&sctx->deleted_refs); | ||
4448 | INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); | ||
4449 | INIT_LIST_HEAD(&sctx->name_cache_list); | ||
4450 | |||
4451 | sctx->send_filp = fget(arg->send_fd); | ||
4452 | if (IS_ERR(sctx->send_filp)) { | ||
4453 | ret = PTR_ERR(sctx->send_filp); | ||
4454 | goto out; | ||
4455 | } | ||
4456 | |||
4457 | sctx->mnt = mnt_file->f_path.mnt; | ||
4458 | |||
4459 | sctx->send_root = send_root; | ||
4460 | sctx->clone_roots_cnt = arg->clone_sources_count; | ||
4461 | |||
4462 | sctx->send_max_size = BTRFS_SEND_BUF_SIZE; | ||
4463 | sctx->send_buf = vmalloc(sctx->send_max_size); | ||
4464 | if (!sctx->send_buf) { | ||
4465 | ret = -ENOMEM; | ||
4466 | goto out; | ||
4467 | } | ||
4468 | |||
4469 | sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); | ||
4470 | if (!sctx->read_buf) { | ||
4471 | ret = -ENOMEM; | ||
4472 | goto out; | ||
4473 | } | ||
4474 | |||
4475 | sctx->clone_roots = vzalloc(sizeof(struct clone_root) * | ||
4476 | (arg->clone_sources_count + 1)); | ||
4477 | if (!sctx->clone_roots) { | ||
4478 | ret = -ENOMEM; | ||
4479 | goto out; | ||
4480 | } | ||
4481 | |||
4482 | if (arg->clone_sources_count) { | ||
4483 | clone_sources_tmp = vmalloc(arg->clone_sources_count * | ||
4484 | sizeof(*arg->clone_sources)); | ||
4485 | if (!clone_sources_tmp) { | ||
4486 | ret = -ENOMEM; | ||
4487 | goto out; | ||
4488 | } | ||
4489 | |||
4490 | ret = copy_from_user(clone_sources_tmp, arg->clone_sources, | ||
4491 | arg->clone_sources_count * | ||
4492 | sizeof(*arg->clone_sources)); | ||
4493 | if (ret) { | ||
4494 | ret = -EFAULT; | ||
4495 | goto out; | ||
4496 | } | ||
4497 | |||
4498 | for (i = 0; i < arg->clone_sources_count; i++) { | ||
4499 | key.objectid = clone_sources_tmp[i]; | ||
4500 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
4501 | key.offset = (u64)-1; | ||
4502 | clone_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
4503 | if (!clone_root) { | ||
4504 | ret = -EINVAL; | ||
4505 | goto out; | ||
4506 | } | ||
4507 | if (IS_ERR(clone_root)) { | ||
4508 | ret = PTR_ERR(clone_root); | ||
4509 | goto out; | ||
4510 | } | ||
4511 | sctx->clone_roots[i].root = clone_root; | ||
4512 | } | ||
4513 | vfree(clone_sources_tmp); | ||
4514 | clone_sources_tmp = NULL; | ||
4515 | } | ||
4516 | |||
4517 | if (arg->parent_root) { | ||
4518 | key.objectid = arg->parent_root; | ||
4519 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
4520 | key.offset = (u64)-1; | ||
4521 | sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
4522 | if (!sctx->parent_root) { | ||
4523 | ret = -EINVAL; | ||
4524 | goto out; | ||
4525 | } | ||
4526 | } | ||
4527 | |||
4528 | /* | ||
4529 | * Clones from send_root are allowed, but only if the clone source | ||
4530 | * is behind the current send position. This is checked while searching | ||
4531 | * for possible clone sources. | ||
4532 | */ | ||
4533 | sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root; | ||
4534 | |||
4535 | /* We do a bsearch later */ | ||
4536 | sort(sctx->clone_roots, sctx->clone_roots_cnt, | ||
4537 | sizeof(*sctx->clone_roots), __clone_root_cmp_sort, | ||
4538 | NULL); | ||
4539 | |||
4540 | ret = send_subvol(sctx); | ||
4541 | if (ret < 0) | ||
4542 | goto out; | ||
4543 | |||
4544 | ret = begin_cmd(sctx, BTRFS_SEND_C_END); | ||
4545 | if (ret < 0) | ||
4546 | goto out; | ||
4547 | ret = send_cmd(sctx); | ||
4548 | if (ret < 0) | ||
4549 | goto out; | ||
4550 | |||
4551 | out: | ||
4552 | if (filp) | ||
4553 | fput(filp); | ||
4554 | kfree(arg); | ||
4555 | vfree(clone_sources_tmp); | ||
4556 | |||
4557 | if (sctx) { | ||
4558 | if (sctx->send_filp) | ||
4559 | fput(sctx->send_filp); | ||
4560 | |||
4561 | vfree(sctx->clone_roots); | ||
4562 | vfree(sctx->send_buf); | ||
4563 | vfree(sctx->read_buf); | ||
4564 | |||
4565 | name_cache_free(sctx); | ||
4566 | |||
4567 | kfree(sctx); | ||
4568 | } | ||
4569 | |||
4570 | return ret; | ||
4571 | } | ||
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h new file mode 100644 index 00000000000..9934e948e57 --- /dev/null +++ b/fs/btrfs/send.h | |||
@@ -0,0 +1,133 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 Alexander Block. All rights reserved. | ||
3 | * Copyright (C) 2012 STRATO. All rights reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public | ||
7 | * License v2 as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public | ||
15 | * License along with this program; if not, write to the | ||
16 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
17 | * Boston, MA 021110-1307, USA. | ||
18 | */ | ||
19 | |||
20 | #include "ctree.h" | ||
21 | |||
22 | #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream" | ||
23 | #define BTRFS_SEND_STREAM_VERSION 1 | ||
24 | |||
25 | #define BTRFS_SEND_BUF_SIZE (1024 * 64) | ||
26 | #define BTRFS_SEND_READ_SIZE (1024 * 48) | ||
27 | |||
28 | enum btrfs_tlv_type { | ||
29 | BTRFS_TLV_U8, | ||
30 | BTRFS_TLV_U16, | ||
31 | BTRFS_TLV_U32, | ||
32 | BTRFS_TLV_U64, | ||
33 | BTRFS_TLV_BINARY, | ||
34 | BTRFS_TLV_STRING, | ||
35 | BTRFS_TLV_UUID, | ||
36 | BTRFS_TLV_TIMESPEC, | ||
37 | }; | ||
38 | |||
39 | struct btrfs_stream_header { | ||
40 | char magic[sizeof(BTRFS_SEND_STREAM_MAGIC)]; | ||
41 | __le32 version; | ||
42 | } __attribute__ ((__packed__)); | ||
43 | |||
44 | struct btrfs_cmd_header { | ||
45 | /* len excluding the header */ | ||
46 | __le32 len; | ||
47 | __le16 cmd; | ||
48 | /* crc including the header with zero crc field */ | ||
49 | __le32 crc; | ||
50 | } __attribute__ ((__packed__)); | ||
51 | |||
52 | struct btrfs_tlv_header { | ||
53 | __le16 tlv_type; | ||
54 | /* len excluding the header */ | ||
55 | __le16 tlv_len; | ||
56 | } __attribute__ ((__packed__)); | ||
57 | |||
58 | /* commands */ | ||
59 | enum btrfs_send_cmd { | ||
60 | BTRFS_SEND_C_UNSPEC, | ||
61 | |||
62 | BTRFS_SEND_C_SUBVOL, | ||
63 | BTRFS_SEND_C_SNAPSHOT, | ||
64 | |||
65 | BTRFS_SEND_C_MKFILE, | ||
66 | BTRFS_SEND_C_MKDIR, | ||
67 | BTRFS_SEND_C_MKNOD, | ||
68 | BTRFS_SEND_C_MKFIFO, | ||
69 | BTRFS_SEND_C_MKSOCK, | ||
70 | BTRFS_SEND_C_SYMLINK, | ||
71 | |||
72 | BTRFS_SEND_C_RENAME, | ||
73 | BTRFS_SEND_C_LINK, | ||
74 | BTRFS_SEND_C_UNLINK, | ||
75 | BTRFS_SEND_C_RMDIR, | ||
76 | |||
77 | BTRFS_SEND_C_SET_XATTR, | ||
78 | BTRFS_SEND_C_REMOVE_XATTR, | ||
79 | |||
80 | BTRFS_SEND_C_WRITE, | ||
81 | BTRFS_SEND_C_CLONE, | ||
82 | |||
83 | BTRFS_SEND_C_TRUNCATE, | ||
84 | BTRFS_SEND_C_CHMOD, | ||
85 | BTRFS_SEND_C_CHOWN, | ||
86 | BTRFS_SEND_C_UTIMES, | ||
87 | |||
88 | BTRFS_SEND_C_END, | ||
89 | __BTRFS_SEND_C_MAX, | ||
90 | }; | ||
91 | #define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1) | ||
92 | |||
93 | /* attributes in send stream */ | ||
94 | enum { | ||
95 | BTRFS_SEND_A_UNSPEC, | ||
96 | |||
97 | BTRFS_SEND_A_UUID, | ||
98 | BTRFS_SEND_A_CTRANSID, | ||
99 | |||
100 | BTRFS_SEND_A_INO, | ||
101 | BTRFS_SEND_A_SIZE, | ||
102 | BTRFS_SEND_A_MODE, | ||
103 | BTRFS_SEND_A_UID, | ||
104 | BTRFS_SEND_A_GID, | ||
105 | BTRFS_SEND_A_RDEV, | ||
106 | BTRFS_SEND_A_CTIME, | ||
107 | BTRFS_SEND_A_MTIME, | ||
108 | BTRFS_SEND_A_ATIME, | ||
109 | BTRFS_SEND_A_OTIME, | ||
110 | |||
111 | BTRFS_SEND_A_XATTR_NAME, | ||
112 | BTRFS_SEND_A_XATTR_DATA, | ||
113 | |||
114 | BTRFS_SEND_A_PATH, | ||
115 | BTRFS_SEND_A_PATH_TO, | ||
116 | BTRFS_SEND_A_PATH_LINK, | ||
117 | |||
118 | BTRFS_SEND_A_FILE_OFFSET, | ||
119 | BTRFS_SEND_A_DATA, | ||
120 | |||
121 | BTRFS_SEND_A_CLONE_UUID, | ||
122 | BTRFS_SEND_A_CLONE_CTRANSID, | ||
123 | BTRFS_SEND_A_CLONE_PATH, | ||
124 | BTRFS_SEND_A_CLONE_OFFSET, | ||
125 | BTRFS_SEND_A_CLONE_LEN, | ||
126 | |||
127 | __BTRFS_SEND_A_MAX, | ||
128 | }; | ||
129 | #define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1) | ||
130 | |||
131 | #ifdef __KERNEL__ | ||
132 | long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); | ||
133 | #endif | ||
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index c6ffa581241..b976597b072 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c | |||
@@ -17,15 +17,27 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/highmem.h> | 19 | #include <linux/highmem.h> |
20 | #include <asm/unaligned.h> | ||
20 | 21 | ||
21 | /* this is some deeply nasty code. ctree.h has a different | 22 | #include "ctree.h" |
22 | * definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef | 23 | |
24 | static inline u8 get_unaligned_le8(const void *p) | ||
25 | { | ||
26 | return *(u8 *)p; | ||
27 | } | ||
28 | |||
29 | static inline void put_unaligned_le8(u8 val, void *p) | ||
30 | { | ||
31 | *(u8 *)p = val; | ||
32 | } | ||
33 | |||
34 | /* | ||
35 | * this is some deeply nasty code. | ||
23 | * | 36 | * |
24 | * The end result is that anyone who #includes ctree.h gets a | 37 | * The end result is that anyone who #includes ctree.h gets a |
25 | * declaration for the btrfs_set_foo functions and btrfs_foo functions | 38 | * declaration for the btrfs_set_foo functions and btrfs_foo functions, |
26 | * | 39 | * which are wappers of btrfs_set_token_#bits functions and |
27 | * This file declares the macros and then #includes ctree.h, which results | 40 | * btrfs_get_token_#bits functions, which are defined in this file. |
28 | * in cpp creating the function here based on the template below. | ||
29 | * | 41 | * |
30 | * These setget functions do all the extent_buffer related mapping | 42 | * These setget functions do all the extent_buffer related mapping |
31 | * required to efficiently read and write specific fields in the extent | 43 | * required to efficiently read and write specific fields in the extent |
@@ -33,103 +45,93 @@ | |||
33 | * an unsigned long offset into the extent buffer which has been | 45 | * an unsigned long offset into the extent buffer which has been |
34 | * cast to a specific type. This gives us all the gcc type checking. | 46 | * cast to a specific type. This gives us all the gcc type checking. |
35 | * | 47 | * |
36 | * The extent buffer api is used to do all the kmapping and page | 48 | * The extent buffer api is used to do the page spanning work required to |
37 | * spanning work required to get extent buffers in highmem and have | 49 | * have a metadata blocksize different from the page size. |
38 | * a metadata blocksize different from the page size. | ||
39 | * | ||
40 | * The macro starts with a simple function prototype declaration so that | ||
41 | * sparse won't complain about it being static. | ||
42 | */ | 50 | */ |
43 | 51 | ||
44 | #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ | 52 | #define DEFINE_BTRFS_SETGET_BITS(bits) \ |
45 | u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ | 53 | u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \ |
46 | void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); \ | 54 | unsigned long off, \ |
47 | void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token); \ | 55 | struct btrfs_map_token *token) \ |
48 | u##bits btrfs_token_##name(struct extent_buffer *eb, \ | ||
49 | type *s, struct btrfs_map_token *token) \ | ||
50 | { \ | 56 | { \ |
51 | unsigned long part_offset = (unsigned long)s; \ | 57 | unsigned long part_offset = (unsigned long)ptr; \ |
52 | unsigned long offset = part_offset + offsetof(type, member); \ | 58 | unsigned long offset = part_offset + off; \ |
53 | type *p; \ | 59 | void *p; \ |
54 | int err; \ | 60 | int err; \ |
55 | char *kaddr; \ | 61 | char *kaddr; \ |
56 | unsigned long map_start; \ | 62 | unsigned long map_start; \ |
57 | unsigned long map_len; \ | 63 | unsigned long map_len; \ |
58 | unsigned long mem_len = sizeof(((type *)0)->member); \ | 64 | int size = sizeof(u##bits); \ |
59 | u##bits res; \ | 65 | u##bits res; \ |
60 | if (token && token->kaddr && token->offset <= offset && \ | 66 | \ |
61 | token->eb == eb && \ | 67 | if (token && token->kaddr && token->offset <= offset && \ |
62 | (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \ | 68 | token->eb == eb && \ |
63 | kaddr = token->kaddr; \ | 69 | (token->offset + PAGE_CACHE_SIZE >= offset + size)) { \ |
64 | p = (type *)(kaddr + part_offset - token->offset); \ | 70 | kaddr = token->kaddr; \ |
65 | res = le##bits##_to_cpu(p->member); \ | 71 | p = kaddr + part_offset - token->offset; \ |
66 | return res; \ | 72 | res = get_unaligned_le##bits(p + off); \ |
67 | } \ | 73 | return res; \ |
68 | err = map_private_extent_buffer(eb, offset, \ | 74 | } \ |
69 | mem_len, \ | 75 | err = map_private_extent_buffer(eb, offset, size, \ |
70 | &kaddr, &map_start, &map_len); \ | 76 | &kaddr, &map_start, &map_len); \ |
71 | if (err) { \ | 77 | if (err) { \ |
72 | __le##bits leres; \ | 78 | __le##bits leres; \ |
73 | read_eb_member(eb, s, type, member, &leres); \ | 79 | \ |
74 | return le##bits##_to_cpu(leres); \ | 80 | read_extent_buffer(eb, &leres, offset, size); \ |
75 | } \ | 81 | return le##bits##_to_cpu(leres); \ |
76 | p = (type *)(kaddr + part_offset - map_start); \ | 82 | } \ |
77 | res = le##bits##_to_cpu(p->member); \ | 83 | p = kaddr + part_offset - map_start; \ |
78 | if (token) { \ | 84 | res = get_unaligned_le##bits(p + off); \ |
79 | token->kaddr = kaddr; \ | 85 | if (token) { \ |
80 | token->offset = map_start; \ | 86 | token->kaddr = kaddr; \ |
81 | token->eb = eb; \ | 87 | token->offset = map_start; \ |
82 | } \ | 88 | token->eb = eb; \ |
83 | return res; \ | 89 | } \ |
90 | return res; \ | ||
84 | } \ | 91 | } \ |
85 | void btrfs_set_token_##name(struct extent_buffer *eb, \ | 92 | void btrfs_set_token_##bits(struct extent_buffer *eb, \ |
86 | type *s, u##bits val, struct btrfs_map_token *token) \ | 93 | void *ptr, unsigned long off, u##bits val, \ |
94 | struct btrfs_map_token *token) \ | ||
87 | { \ | 95 | { \ |
88 | unsigned long part_offset = (unsigned long)s; \ | 96 | unsigned long part_offset = (unsigned long)ptr; \ |
89 | unsigned long offset = part_offset + offsetof(type, member); \ | 97 | unsigned long offset = part_offset + off; \ |
90 | type *p; \ | 98 | void *p; \ |
91 | int err; \ | 99 | int err; \ |
92 | char *kaddr; \ | 100 | char *kaddr; \ |
93 | unsigned long map_start; \ | 101 | unsigned long map_start; \ |
94 | unsigned long map_len; \ | 102 | unsigned long map_len; \ |
95 | unsigned long mem_len = sizeof(((type *)0)->member); \ | 103 | int size = sizeof(u##bits); \ |
96 | if (token && token->kaddr && token->offset <= offset && \ | 104 | \ |
97 | token->eb == eb && \ | 105 | if (token && token->kaddr && token->offset <= offset && \ |
98 | (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \ | 106 | token->eb == eb && \ |
99 | kaddr = token->kaddr; \ | 107 | (token->offset + PAGE_CACHE_SIZE >= offset + size)) { \ |
100 | p = (type *)(kaddr + part_offset - token->offset); \ | 108 | kaddr = token->kaddr; \ |
101 | p->member = cpu_to_le##bits(val); \ | 109 | p = kaddr + part_offset - token->offset; \ |
102 | return; \ | 110 | put_unaligned_le##bits(val, p + off); \ |
103 | } \ | 111 | return; \ |
104 | err = map_private_extent_buffer(eb, offset, \ | 112 | } \ |
105 | mem_len, \ | 113 | err = map_private_extent_buffer(eb, offset, size, \ |
106 | &kaddr, &map_start, &map_len); \ | 114 | &kaddr, &map_start, &map_len); \ |
107 | if (err) { \ | 115 | if (err) { \ |
108 | __le##bits val2; \ | 116 | __le##bits val2; \ |
109 | val2 = cpu_to_le##bits(val); \ | 117 | \ |
110 | write_eb_member(eb, s, type, member, &val2); \ | 118 | val2 = cpu_to_le##bits(val); \ |
111 | return; \ | 119 | write_extent_buffer(eb, &val2, offset, size); \ |
112 | } \ | 120 | return; \ |
113 | p = (type *)(kaddr + part_offset - map_start); \ | 121 | } \ |
114 | p->member = cpu_to_le##bits(val); \ | 122 | p = kaddr + part_offset - map_start; \ |
115 | if (token) { \ | 123 | put_unaligned_le##bits(val, p + off); \ |
116 | token->kaddr = kaddr; \ | 124 | if (token) { \ |
117 | token->offset = map_start; \ | 125 | token->kaddr = kaddr; \ |
118 | token->eb = eb; \ | 126 | token->offset = map_start; \ |
119 | } \ | 127 | token->eb = eb; \ |
120 | } \ | 128 | } \ |
121 | void btrfs_set_##name(struct extent_buffer *eb, \ | 129 | } |
122 | type *s, u##bits val) \ | ||
123 | { \ | ||
124 | btrfs_set_token_##name(eb, s, val, NULL); \ | ||
125 | } \ | ||
126 | u##bits btrfs_##name(struct extent_buffer *eb, \ | ||
127 | type *s) \ | ||
128 | { \ | ||
129 | return btrfs_token_##name(eb, s, NULL); \ | ||
130 | } \ | ||
131 | 130 | ||
132 | #include "ctree.h" | 131 | DEFINE_BTRFS_SETGET_BITS(8) |
132 | DEFINE_BTRFS_SETGET_BITS(16) | ||
133 | DEFINE_BTRFS_SETGET_BITS(32) | ||
134 | DEFINE_BTRFS_SETGET_BITS(64) | ||
133 | 135 | ||
134 | void btrfs_node_key(struct extent_buffer *eb, | 136 | void btrfs_node_key(struct extent_buffer *eb, |
135 | struct btrfs_disk_key *disk_key, int nr) | 137 | struct btrfs_disk_key *disk_key, int nr) |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b19d7556772..fa61ef59cd6 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -396,15 +396,23 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
396 | strcmp(args[0].from, "zlib") == 0) { | 396 | strcmp(args[0].from, "zlib") == 0) { |
397 | compress_type = "zlib"; | 397 | compress_type = "zlib"; |
398 | info->compress_type = BTRFS_COMPRESS_ZLIB; | 398 | info->compress_type = BTRFS_COMPRESS_ZLIB; |
399 | btrfs_set_opt(info->mount_opt, COMPRESS); | ||
399 | } else if (strcmp(args[0].from, "lzo") == 0) { | 400 | } else if (strcmp(args[0].from, "lzo") == 0) { |
400 | compress_type = "lzo"; | 401 | compress_type = "lzo"; |
401 | info->compress_type = BTRFS_COMPRESS_LZO; | 402 | info->compress_type = BTRFS_COMPRESS_LZO; |
403 | btrfs_set_opt(info->mount_opt, COMPRESS); | ||
404 | btrfs_set_fs_incompat(info, COMPRESS_LZO); | ||
405 | } else if (strncmp(args[0].from, "no", 2) == 0) { | ||
406 | compress_type = "no"; | ||
407 | info->compress_type = BTRFS_COMPRESS_NONE; | ||
408 | btrfs_clear_opt(info->mount_opt, COMPRESS); | ||
409 | btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); | ||
410 | compress_force = false; | ||
402 | } else { | 411 | } else { |
403 | ret = -EINVAL; | 412 | ret = -EINVAL; |
404 | goto out; | 413 | goto out; |
405 | } | 414 | } |
406 | 415 | ||
407 | btrfs_set_opt(info->mount_opt, COMPRESS); | ||
408 | if (compress_force) { | 416 | if (compress_force) { |
409 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | 417 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); |
410 | pr_info("btrfs: force %s compression\n", | 418 | pr_info("btrfs: force %s compression\n", |
@@ -1455,6 +1463,13 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | |||
1455 | ret = btrfs_scan_one_device(vol->name, FMODE_READ, | 1463 | ret = btrfs_scan_one_device(vol->name, FMODE_READ, |
1456 | &btrfs_fs_type, &fs_devices); | 1464 | &btrfs_fs_type, &fs_devices); |
1457 | break; | 1465 | break; |
1466 | case BTRFS_IOC_DEVICES_READY: | ||
1467 | ret = btrfs_scan_one_device(vol->name, FMODE_READ, | ||
1468 | &btrfs_fs_type, &fs_devices); | ||
1469 | if (ret) | ||
1470 | break; | ||
1471 | ret = !(fs_devices->num_devices == fs_devices->total_devices); | ||
1472 | break; | ||
1458 | } | 1473 | } |
1459 | 1474 | ||
1460 | kfree(vol); | 1475 | kfree(vol); |
@@ -1477,16 +1492,6 @@ static int btrfs_unfreeze(struct super_block *sb) | |||
1477 | return 0; | 1492 | return 0; |
1478 | } | 1493 | } |
1479 | 1494 | ||
1480 | static void btrfs_fs_dirty_inode(struct inode *inode, int flags) | ||
1481 | { | ||
1482 | int ret; | ||
1483 | |||
1484 | ret = btrfs_dirty_inode(inode); | ||
1485 | if (ret) | ||
1486 | printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu " | ||
1487 | "error %d\n", btrfs_ino(inode), ret); | ||
1488 | } | ||
1489 | |||
1490 | static int btrfs_show_devname(struct seq_file *m, struct dentry *root) | 1495 | static int btrfs_show_devname(struct seq_file *m, struct dentry *root) |
1491 | { | 1496 | { |
1492 | struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); | 1497 | struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); |
@@ -1526,7 +1531,6 @@ static const struct super_operations btrfs_super_ops = { | |||
1526 | .show_options = btrfs_show_options, | 1531 | .show_options = btrfs_show_options, |
1527 | .show_devname = btrfs_show_devname, | 1532 | .show_devname = btrfs_show_devname, |
1528 | .write_inode = btrfs_write_inode, | 1533 | .write_inode = btrfs_write_inode, |
1529 | .dirty_inode = btrfs_fs_dirty_inode, | ||
1530 | .alloc_inode = btrfs_alloc_inode, | 1534 | .alloc_inode = btrfs_alloc_inode, |
1531 | .destroy_inode = btrfs_destroy_inode, | 1535 | .destroy_inode = btrfs_destroy_inode, |
1532 | .statfs = btrfs_statfs, | 1536 | .statfs = btrfs_statfs, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b72b068183e..7ac7cdcc294 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/writeback.h> | 22 | #include <linux/writeback.h> |
23 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
24 | #include <linux/blkdev.h> | 24 | #include <linux/blkdev.h> |
25 | #include <linux/uuid.h> | ||
25 | #include "ctree.h" | 26 | #include "ctree.h" |
26 | #include "disk-io.h" | 27 | #include "disk-io.h" |
27 | #include "transaction.h" | 28 | #include "transaction.h" |
@@ -38,7 +39,6 @@ void put_transaction(struct btrfs_transaction *transaction) | |||
38 | if (atomic_dec_and_test(&transaction->use_count)) { | 39 | if (atomic_dec_and_test(&transaction->use_count)) { |
39 | BUG_ON(!list_empty(&transaction->list)); | 40 | BUG_ON(!list_empty(&transaction->list)); |
40 | WARN_ON(transaction->delayed_refs.root.rb_node); | 41 | WARN_ON(transaction->delayed_refs.root.rb_node); |
41 | WARN_ON(!list_empty(&transaction->delayed_refs.seq_head)); | ||
42 | memset(transaction, 0, sizeof(*transaction)); | 42 | memset(transaction, 0, sizeof(*transaction)); |
43 | kmem_cache_free(btrfs_transaction_cachep, transaction); | 43 | kmem_cache_free(btrfs_transaction_cachep, transaction); |
44 | } | 44 | } |
@@ -100,8 +100,8 @@ loop: | |||
100 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 100 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
101 | cur_trans = fs_info->running_transaction; | 101 | cur_trans = fs_info->running_transaction; |
102 | goto loop; | 102 | goto loop; |
103 | } else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 103 | } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { |
104 | spin_unlock(&root->fs_info->trans_lock); | 104 | spin_unlock(&fs_info->trans_lock); |
105 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 105 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
106 | return -EROFS; | 106 | return -EROFS; |
107 | } | 107 | } |
@@ -126,7 +126,6 @@ loop: | |||
126 | cur_trans->delayed_refs.num_heads = 0; | 126 | cur_trans->delayed_refs.num_heads = 0; |
127 | cur_trans->delayed_refs.flushing = 0; | 127 | cur_trans->delayed_refs.flushing = 0; |
128 | cur_trans->delayed_refs.run_delayed_start = 0; | 128 | cur_trans->delayed_refs.run_delayed_start = 0; |
129 | cur_trans->delayed_refs.seq = 1; | ||
130 | 129 | ||
131 | /* | 130 | /* |
132 | * although the tree mod log is per file system and not per transaction, | 131 | * although the tree mod log is per file system and not per transaction, |
@@ -145,10 +144,8 @@ loop: | |||
145 | } | 144 | } |
146 | atomic_set(&fs_info->tree_mod_seq, 0); | 145 | atomic_set(&fs_info->tree_mod_seq, 0); |
147 | 146 | ||
148 | init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); | ||
149 | spin_lock_init(&cur_trans->commit_lock); | 147 | spin_lock_init(&cur_trans->commit_lock); |
150 | spin_lock_init(&cur_trans->delayed_refs.lock); | 148 | spin_lock_init(&cur_trans->delayed_refs.lock); |
151 | INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); | ||
152 | 149 | ||
153 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 150 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
154 | list_add_tail(&cur_trans->list, &fs_info->trans_list); | 151 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
@@ -299,6 +296,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
299 | struct btrfs_transaction *cur_trans; | 296 | struct btrfs_transaction *cur_trans; |
300 | u64 num_bytes = 0; | 297 | u64 num_bytes = 0; |
301 | int ret; | 298 | int ret; |
299 | u64 qgroup_reserved = 0; | ||
302 | 300 | ||
303 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 301 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) |
304 | return ERR_PTR(-EROFS); | 302 | return ERR_PTR(-EROFS); |
@@ -317,6 +315,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
317 | * the appropriate flushing if need be. | 315 | * the appropriate flushing if need be. |
318 | */ | 316 | */ |
319 | if (num_items > 0 && root != root->fs_info->chunk_root) { | 317 | if (num_items > 0 && root != root->fs_info->chunk_root) { |
318 | if (root->fs_info->quota_enabled && | ||
319 | is_fstree(root->root_key.objectid)) { | ||
320 | qgroup_reserved = num_items * root->leafsize; | ||
321 | ret = btrfs_qgroup_reserve(root, qgroup_reserved); | ||
322 | if (ret) | ||
323 | return ERR_PTR(ret); | ||
324 | } | ||
325 | |||
320 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | 326 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); |
321 | ret = btrfs_block_rsv_add(root, | 327 | ret = btrfs_block_rsv_add(root, |
322 | &root->fs_info->trans_block_rsv, | 328 | &root->fs_info->trans_block_rsv, |
@@ -349,11 +355,16 @@ again: | |||
349 | h->transaction = cur_trans; | 355 | h->transaction = cur_trans; |
350 | h->blocks_used = 0; | 356 | h->blocks_used = 0; |
351 | h->bytes_reserved = 0; | 357 | h->bytes_reserved = 0; |
358 | h->root = root; | ||
352 | h->delayed_ref_updates = 0; | 359 | h->delayed_ref_updates = 0; |
353 | h->use_count = 1; | 360 | h->use_count = 1; |
361 | h->adding_csums = 0; | ||
354 | h->block_rsv = NULL; | 362 | h->block_rsv = NULL; |
355 | h->orig_rsv = NULL; | 363 | h->orig_rsv = NULL; |
356 | h->aborted = 0; | 364 | h->aborted = 0; |
365 | h->qgroup_reserved = qgroup_reserved; | ||
366 | h->delayed_ref_elem.seq = 0; | ||
367 | INIT_LIST_HEAD(&h->qgroup_ref_list); | ||
357 | 368 | ||
358 | smp_mb(); | 369 | smp_mb(); |
359 | if (cur_trans->blocked && may_wait_transaction(root, type)) { | 370 | if (cur_trans->blocked && may_wait_transaction(root, type)) { |
@@ -473,7 +484,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
473 | struct btrfs_root *root) | 484 | struct btrfs_root *root) |
474 | { | 485 | { |
475 | struct btrfs_transaction *cur_trans = trans->transaction; | 486 | struct btrfs_transaction *cur_trans = trans->transaction; |
476 | struct btrfs_block_rsv *rsv = trans->block_rsv; | ||
477 | int updates; | 487 | int updates; |
478 | int err; | 488 | int err; |
479 | 489 | ||
@@ -481,12 +491,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
481 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | 491 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) |
482 | return 1; | 492 | return 1; |
483 | 493 | ||
484 | /* | ||
485 | * We need to do this in case we're deleting csums so the global block | ||
486 | * rsv get's used instead of the csum block rsv. | ||
487 | */ | ||
488 | trans->block_rsv = NULL; | ||
489 | |||
490 | updates = trans->delayed_ref_updates; | 494 | updates = trans->delayed_ref_updates; |
491 | trans->delayed_ref_updates = 0; | 495 | trans->delayed_ref_updates = 0; |
492 | if (updates) { | 496 | if (updates) { |
@@ -495,8 +499,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
495 | return err; | 499 | return err; |
496 | } | 500 | } |
497 | 501 | ||
498 | trans->block_rsv = rsv; | ||
499 | |||
500 | return should_end_transaction(trans, root); | 502 | return should_end_transaction(trans, root); |
501 | } | 503 | } |
502 | 504 | ||
@@ -513,8 +515,24 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
513 | return 0; | 515 | return 0; |
514 | } | 516 | } |
515 | 517 | ||
518 | /* | ||
519 | * do the qgroup accounting as early as possible | ||
520 | */ | ||
521 | err = btrfs_delayed_refs_qgroup_accounting(trans, info); | ||
522 | |||
516 | btrfs_trans_release_metadata(trans, root); | 523 | btrfs_trans_release_metadata(trans, root); |
517 | trans->block_rsv = NULL; | 524 | trans->block_rsv = NULL; |
525 | /* | ||
526 | * the same root has to be passed to start_transaction and | ||
527 | * end_transaction. Subvolume quota depends on this. | ||
528 | */ | ||
529 | WARN_ON(trans->root != root); | ||
530 | |||
531 | if (trans->qgroup_reserved) { | ||
532 | btrfs_qgroup_free(root, trans->qgroup_reserved); | ||
533 | trans->qgroup_reserved = 0; | ||
534 | } | ||
535 | |||
518 | while (count < 2) { | 536 | while (count < 2) { |
519 | unsigned long cur = trans->delayed_ref_updates; | 537 | unsigned long cur = trans->delayed_ref_updates; |
520 | trans->delayed_ref_updates = 0; | 538 | trans->delayed_ref_updates = 0; |
@@ -527,6 +545,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
527 | } | 545 | } |
528 | count++; | 546 | count++; |
529 | } | 547 | } |
548 | btrfs_trans_release_metadata(trans, root); | ||
549 | trans->block_rsv = NULL; | ||
530 | 550 | ||
531 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && | 551 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && |
532 | should_end_transaction(trans, root)) { | 552 | should_end_transaction(trans, root)) { |
@@ -567,6 +587,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
567 | root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 587 | root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { |
568 | err = -EIO; | 588 | err = -EIO; |
569 | } | 589 | } |
590 | assert_qgroups_uptodate(trans); | ||
570 | 591 | ||
571 | memset(trans, 0, sizeof(*trans)); | 592 | memset(trans, 0, sizeof(*trans)); |
572 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 593 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
@@ -785,6 +806,13 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
785 | ret = btrfs_run_dev_stats(trans, root->fs_info); | 806 | ret = btrfs_run_dev_stats(trans, root->fs_info); |
786 | BUG_ON(ret); | 807 | BUG_ON(ret); |
787 | 808 | ||
809 | ret = btrfs_run_qgroups(trans, root->fs_info); | ||
810 | BUG_ON(ret); | ||
811 | |||
812 | /* run_qgroups might have added some more refs */ | ||
813 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
814 | BUG_ON(ret); | ||
815 | |||
788 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { | 816 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { |
789 | next = fs_info->dirty_cowonly_roots.next; | 817 | next = fs_info->dirty_cowonly_roots.next; |
790 | list_del_init(next); | 818 | list_del_init(next); |
@@ -926,11 +954,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
926 | struct dentry *dentry; | 954 | struct dentry *dentry; |
927 | struct extent_buffer *tmp; | 955 | struct extent_buffer *tmp; |
928 | struct extent_buffer *old; | 956 | struct extent_buffer *old; |
957 | struct timespec cur_time = CURRENT_TIME; | ||
929 | int ret; | 958 | int ret; |
930 | u64 to_reserve = 0; | 959 | u64 to_reserve = 0; |
931 | u64 index = 0; | 960 | u64 index = 0; |
932 | u64 objectid; | 961 | u64 objectid; |
933 | u64 root_flags; | 962 | u64 root_flags; |
963 | uuid_le new_uuid; | ||
934 | 964 | ||
935 | rsv = trans->block_rsv; | 965 | rsv = trans->block_rsv; |
936 | 966 | ||
@@ -957,6 +987,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
957 | } | 987 | } |
958 | } | 988 | } |
959 | 989 | ||
990 | ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, | ||
991 | objectid, pending->inherit); | ||
992 | kfree(pending->inherit); | ||
993 | if (ret) { | ||
994 | pending->error = ret; | ||
995 | goto fail; | ||
996 | } | ||
997 | |||
960 | key.objectid = objectid; | 998 | key.objectid = objectid; |
961 | key.offset = (u64)-1; | 999 | key.offset = (u64)-1; |
962 | key.type = BTRFS_ROOT_ITEM_KEY; | 1000 | key.type = BTRFS_ROOT_ITEM_KEY; |
@@ -1016,6 +1054,20 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1016 | root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; | 1054 | root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; |
1017 | btrfs_set_root_flags(new_root_item, root_flags); | 1055 | btrfs_set_root_flags(new_root_item, root_flags); |
1018 | 1056 | ||
1057 | btrfs_set_root_generation_v2(new_root_item, | ||
1058 | trans->transid); | ||
1059 | uuid_le_gen(&new_uuid); | ||
1060 | memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE); | ||
1061 | memcpy(new_root_item->parent_uuid, root->root_item.uuid, | ||
1062 | BTRFS_UUID_SIZE); | ||
1063 | new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); | ||
1064 | new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec); | ||
1065 | btrfs_set_root_otransid(new_root_item, trans->transid); | ||
1066 | memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); | ||
1067 | memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); | ||
1068 | btrfs_set_root_stransid(new_root_item, 0); | ||
1069 | btrfs_set_root_rtransid(new_root_item, 0); | ||
1070 | |||
1019 | old = btrfs_lock_root_node(root); | 1071 | old = btrfs_lock_root_node(root); |
1020 | ret = btrfs_cow_block(trans, root, old, NULL, 0, &old); | 1072 | ret = btrfs_cow_block(trans, root, old, NULL, 0, &old); |
1021 | if (ret) { | 1073 | if (ret) { |
@@ -1269,9 +1321,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1269 | 1321 | ||
1270 | btrfs_run_ordered_operations(root, 0); | 1322 | btrfs_run_ordered_operations(root, 0); |
1271 | 1323 | ||
1272 | btrfs_trans_release_metadata(trans, root); | ||
1273 | trans->block_rsv = NULL; | ||
1274 | |||
1275 | if (cur_trans->aborted) | 1324 | if (cur_trans->aborted) |
1276 | goto cleanup_transaction; | 1325 | goto cleanup_transaction; |
1277 | 1326 | ||
@@ -1282,6 +1331,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1282 | if (ret) | 1331 | if (ret) |
1283 | goto cleanup_transaction; | 1332 | goto cleanup_transaction; |
1284 | 1333 | ||
1334 | btrfs_trans_release_metadata(trans, root); | ||
1335 | trans->block_rsv = NULL; | ||
1336 | |||
1285 | cur_trans = trans->transaction; | 1337 | cur_trans = trans->transaction; |
1286 | 1338 | ||
1287 | /* | 1339 | /* |
@@ -1330,7 +1382,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1330 | spin_unlock(&root->fs_info->trans_lock); | 1382 | spin_unlock(&root->fs_info->trans_lock); |
1331 | } | 1383 | } |
1332 | 1384 | ||
1333 | if (now < cur_trans->start_time || now - cur_trans->start_time < 1) | 1385 | if (!btrfs_test_opt(root, SSD) && |
1386 | (now < cur_trans->start_time || now - cur_trans->start_time < 1)) | ||
1334 | should_grow = 1; | 1387 | should_grow = 1; |
1335 | 1388 | ||
1336 | do { | 1389 | do { |
@@ -1352,6 +1405,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1352 | goto cleanup_transaction; | 1405 | goto cleanup_transaction; |
1353 | 1406 | ||
1354 | /* | 1407 | /* |
1408 | * running the delayed items may have added new refs. account | ||
1409 | * them now so that they hinder processing of more delayed refs | ||
1410 | * as little as possible. | ||
1411 | */ | ||
1412 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
1413 | |||
1414 | /* | ||
1355 | * rename don't use btrfs_join_transaction, so, once we | 1415 | * rename don't use btrfs_join_transaction, so, once we |
1356 | * set the transaction to blocked above, we aren't going | 1416 | * set the transaction to blocked above, we aren't going |
1357 | * to get any new ordered operations. We can safely run | 1417 | * to get any new ordered operations. We can safely run |
@@ -1463,6 +1523,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1463 | root->fs_info->chunk_root->node); | 1523 | root->fs_info->chunk_root->node); |
1464 | switch_commit_root(root->fs_info->chunk_root); | 1524 | switch_commit_root(root->fs_info->chunk_root); |
1465 | 1525 | ||
1526 | assert_qgroups_uptodate(trans); | ||
1466 | update_super_roots(root); | 1527 | update_super_roots(root); |
1467 | 1528 | ||
1468 | if (!root->fs_info->log_root_recovering) { | 1529 | if (!root->fs_info->log_root_recovering) { |
@@ -1532,6 +1593,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1532 | return ret; | 1593 | return ret; |
1533 | 1594 | ||
1534 | cleanup_transaction: | 1595 | cleanup_transaction: |
1596 | btrfs_trans_release_metadata(trans, root); | ||
1597 | trans->block_rsv = NULL; | ||
1535 | btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n"); | 1598 | btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n"); |
1536 | // WARN_ON(1); | 1599 | // WARN_ON(1); |
1537 | if (current->journal_info == trans) | 1600 | if (current->journal_info == trans) |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index fe27379e368..e8b8416c688 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -20,6 +20,7 @@ | |||
20 | #define __BTRFS_TRANSACTION__ | 20 | #define __BTRFS_TRANSACTION__ |
21 | #include "btrfs_inode.h" | 21 | #include "btrfs_inode.h" |
22 | #include "delayed-ref.h" | 22 | #include "delayed-ref.h" |
23 | #include "ctree.h" | ||
23 | 24 | ||
24 | struct btrfs_transaction { | 25 | struct btrfs_transaction { |
25 | u64 transid; | 26 | u64 transid; |
@@ -49,6 +50,7 @@ struct btrfs_transaction { | |||
49 | struct btrfs_trans_handle { | 50 | struct btrfs_trans_handle { |
50 | u64 transid; | 51 | u64 transid; |
51 | u64 bytes_reserved; | 52 | u64 bytes_reserved; |
53 | u64 qgroup_reserved; | ||
52 | unsigned long use_count; | 54 | unsigned long use_count; |
53 | unsigned long blocks_reserved; | 55 | unsigned long blocks_reserved; |
54 | unsigned long blocks_used; | 56 | unsigned long blocks_used; |
@@ -57,12 +59,22 @@ struct btrfs_trans_handle { | |||
57 | struct btrfs_block_rsv *block_rsv; | 59 | struct btrfs_block_rsv *block_rsv; |
58 | struct btrfs_block_rsv *orig_rsv; | 60 | struct btrfs_block_rsv *orig_rsv; |
59 | int aborted; | 61 | int aborted; |
62 | int adding_csums; | ||
63 | /* | ||
64 | * this root is only needed to validate that the root passed to | ||
65 | * start_transaction is the same as the one passed to end_transaction. | ||
66 | * Subvolume quota depends on this | ||
67 | */ | ||
68 | struct btrfs_root *root; | ||
69 | struct seq_list delayed_ref_elem; | ||
70 | struct list_head qgroup_ref_list; | ||
60 | }; | 71 | }; |
61 | 72 | ||
62 | struct btrfs_pending_snapshot { | 73 | struct btrfs_pending_snapshot { |
63 | struct dentry *dentry; | 74 | struct dentry *dentry; |
64 | struct btrfs_root *root; | 75 | struct btrfs_root *root; |
65 | struct btrfs_root *snap; | 76 | struct btrfs_root *snap; |
77 | struct btrfs_qgroup_inherit *inherit; | ||
66 | /* block reservation for the operation */ | 78 | /* block reservation for the operation */ |
67 | struct btrfs_block_rsv block_rsv; | 79 | struct btrfs_block_rsv block_rsv; |
68 | /* extra metadata reseration for relocation */ | 80 | /* extra metadata reseration for relocation */ |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8abeae4224f..c86670f4f28 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -637,7 +637,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
637 | } | 637 | } |
638 | 638 | ||
639 | inode_set_bytes(inode, saved_nbytes); | 639 | inode_set_bytes(inode, saved_nbytes); |
640 | btrfs_update_inode(trans, root, inode); | 640 | ret = btrfs_update_inode(trans, root, inode); |
641 | out: | 641 | out: |
642 | if (inode) | 642 | if (inode) |
643 | iput(inode); | 643 | iput(inode); |
@@ -1133,7 +1133,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, | |||
1133 | btrfs_release_path(path); | 1133 | btrfs_release_path(path); |
1134 | if (ret == 0) { | 1134 | if (ret == 0) { |
1135 | btrfs_inc_nlink(inode); | 1135 | btrfs_inc_nlink(inode); |
1136 | btrfs_update_inode(trans, root, inode); | 1136 | ret = btrfs_update_inode(trans, root, inode); |
1137 | } else if (ret == -EEXIST) { | 1137 | } else if (ret == -EEXIST) { |
1138 | ret = 0; | 1138 | ret = 0; |
1139 | } else { | 1139 | } else { |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ecaad40e7ef..b8708f994e6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -429,6 +429,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
429 | mutex_init(&fs_devices->device_list_mutex); | 429 | mutex_init(&fs_devices->device_list_mutex); |
430 | fs_devices->latest_devid = orig->latest_devid; | 430 | fs_devices->latest_devid = orig->latest_devid; |
431 | fs_devices->latest_trans = orig->latest_trans; | 431 | fs_devices->latest_trans = orig->latest_trans; |
432 | fs_devices->total_devices = orig->total_devices; | ||
432 | memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); | 433 | memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); |
433 | 434 | ||
434 | /* We have held the volume lock, it is safe to get the devices. */ | 435 | /* We have held the volume lock, it is safe to get the devices. */ |
@@ -739,6 +740,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
739 | int ret; | 740 | int ret; |
740 | u64 devid; | 741 | u64 devid; |
741 | u64 transid; | 742 | u64 transid; |
743 | u64 total_devices; | ||
742 | 744 | ||
743 | flags |= FMODE_EXCL; | 745 | flags |= FMODE_EXCL; |
744 | bdev = blkdev_get_by_path(path, flags, holder); | 746 | bdev = blkdev_get_by_path(path, flags, holder); |
@@ -760,6 +762,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
760 | disk_super = (struct btrfs_super_block *)bh->b_data; | 762 | disk_super = (struct btrfs_super_block *)bh->b_data; |
761 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 763 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
762 | transid = btrfs_super_generation(disk_super); | 764 | transid = btrfs_super_generation(disk_super); |
765 | total_devices = btrfs_super_num_devices(disk_super); | ||
763 | if (disk_super->label[0]) | 766 | if (disk_super->label[0]) |
764 | printk(KERN_INFO "device label %s ", disk_super->label); | 767 | printk(KERN_INFO "device label %s ", disk_super->label); |
765 | else | 768 | else |
@@ -767,7 +770,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
767 | printk(KERN_CONT "devid %llu transid %llu %s\n", | 770 | printk(KERN_CONT "devid %llu transid %llu %s\n", |
768 | (unsigned long long)devid, (unsigned long long)transid, path); | 771 | (unsigned long long)devid, (unsigned long long)transid, path); |
769 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); | 772 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); |
770 | 773 | if (!ret && fs_devices_ret) | |
774 | (*fs_devices_ret)->total_devices = total_devices; | ||
771 | brelse(bh); | 775 | brelse(bh); |
772 | error_close: | 776 | error_close: |
773 | mutex_unlock(&uuid_mutex); | 777 | mutex_unlock(&uuid_mutex); |
@@ -1433,6 +1437,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1433 | list_del_rcu(&device->dev_list); | 1437 | list_del_rcu(&device->dev_list); |
1434 | 1438 | ||
1435 | device->fs_devices->num_devices--; | 1439 | device->fs_devices->num_devices--; |
1440 | device->fs_devices->total_devices--; | ||
1436 | 1441 | ||
1437 | if (device->missing) | 1442 | if (device->missing) |
1438 | root->fs_info->fs_devices->missing_devices--; | 1443 | root->fs_info->fs_devices->missing_devices--; |
@@ -1550,6 +1555,7 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) | |||
1550 | fs_devices->seeding = 0; | 1555 | fs_devices->seeding = 0; |
1551 | fs_devices->num_devices = 0; | 1556 | fs_devices->num_devices = 0; |
1552 | fs_devices->open_devices = 0; | 1557 | fs_devices->open_devices = 0; |
1558 | fs_devices->total_devices = 0; | ||
1553 | fs_devices->seed = seed_devices; | 1559 | fs_devices->seed = seed_devices; |
1554 | 1560 | ||
1555 | generate_random_uuid(fs_devices->fsid); | 1561 | generate_random_uuid(fs_devices->fsid); |
@@ -1749,6 +1755,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1749 | root->fs_info->fs_devices->num_devices++; | 1755 | root->fs_info->fs_devices->num_devices++; |
1750 | root->fs_info->fs_devices->open_devices++; | 1756 | root->fs_info->fs_devices->open_devices++; |
1751 | root->fs_info->fs_devices->rw_devices++; | 1757 | root->fs_info->fs_devices->rw_devices++; |
1758 | root->fs_info->fs_devices->total_devices++; | ||
1752 | if (device->can_discard) | 1759 | if (device->can_discard) |
1753 | root->fs_info->fs_devices->num_can_discard++; | 1760 | root->fs_info->fs_devices->num_can_discard++; |
1754 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; | 1761 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; |
@@ -4736,9 +4743,6 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) | |||
4736 | key.offset = device->devid; | 4743 | key.offset = device->devid; |
4737 | ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); | 4744 | ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); |
4738 | if (ret) { | 4745 | if (ret) { |
4739 | printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", | ||
4740 | rcu_str_deref(device->name), | ||
4741 | (unsigned long long)device->devid); | ||
4742 | __btrfs_reset_dev_stats(device); | 4746 | __btrfs_reset_dev_stats(device); |
4743 | device->dev_stats_valid = 1; | 4747 | device->dev_stats_valid = 1; |
4744 | btrfs_release_path(path); | 4748 | btrfs_release_path(path); |
@@ -4880,6 +4884,14 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) | |||
4880 | 4884 | ||
4881 | static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) | 4885 | static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) |
4882 | { | 4886 | { |
4887 | int i; | ||
4888 | |||
4889 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) | ||
4890 | if (btrfs_dev_stat_read(dev, i) != 0) | ||
4891 | break; | ||
4892 | if (i == BTRFS_DEV_STAT_VALUES_MAX) | ||
4893 | return; /* all values == 0, suppress message */ | ||
4894 | |||
4883 | printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", | 4895 | printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", |
4884 | rcu_str_deref(dev->name), | 4896 | rcu_str_deref(dev->name), |
4885 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), | 4897 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), |
@@ -4890,8 +4902,7 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) | |||
4890 | } | 4902 | } |
4891 | 4903 | ||
4892 | int btrfs_get_dev_stats(struct btrfs_root *root, | 4904 | int btrfs_get_dev_stats(struct btrfs_root *root, |
4893 | struct btrfs_ioctl_get_dev_stats *stats, | 4905 | struct btrfs_ioctl_get_dev_stats *stats) |
4894 | int reset_after_read) | ||
4895 | { | 4906 | { |
4896 | struct btrfs_device *dev; | 4907 | struct btrfs_device *dev; |
4897 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 4908 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
@@ -4909,7 +4920,7 @@ int btrfs_get_dev_stats(struct btrfs_root *root, | |||
4909 | printk(KERN_WARNING | 4920 | printk(KERN_WARNING |
4910 | "btrfs: get dev_stats failed, not yet valid\n"); | 4921 | "btrfs: get dev_stats failed, not yet valid\n"); |
4911 | return -ENODEV; | 4922 | return -ENODEV; |
4912 | } else if (reset_after_read) { | 4923 | } else if (stats->flags & BTRFS_DEV_STATS_RESET) { |
4913 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { | 4924 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { |
4914 | if (stats->nr_items > i) | 4925 | if (stats->nr_items > i) |
4915 | stats->values[i] = | 4926 | stats->values[i] = |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 95f6637614d..5479325987b 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -126,6 +126,7 @@ struct btrfs_fs_devices { | |||
126 | u64 missing_devices; | 126 | u64 missing_devices; |
127 | u64 total_rw_bytes; | 127 | u64 total_rw_bytes; |
128 | u64 num_can_discard; | 128 | u64 num_can_discard; |
129 | u64 total_devices; | ||
129 | struct block_device *latest_bdev; | 130 | struct block_device *latest_bdev; |
130 | 131 | ||
131 | /* all of the devices in the FS, protected by a mutex | 132 | /* all of the devices in the FS, protected by a mutex |
@@ -293,8 +294,7 @@ struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, | |||
293 | void btrfs_dev_stat_print_on_error(struct btrfs_device *device); | 294 | void btrfs_dev_stat_print_on_error(struct btrfs_device *device); |
294 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); | 295 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); |
295 | int btrfs_get_dev_stats(struct btrfs_root *root, | 296 | int btrfs_get_dev_stats(struct btrfs_root *root, |
296 | struct btrfs_ioctl_get_dev_stats *stats, | 297 | struct btrfs_ioctl_get_dev_stats *stats); |
297 | int reset_after_read); | ||
298 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); | 298 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); |
299 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, | 299 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, |
300 | struct btrfs_fs_info *fs_info); | 300 | struct btrfs_fs_info *fs_info); |
diff --git a/fs/inode.c b/fs/inode.c index 775cbabd4fa..3cc50432046 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1551,6 +1551,8 @@ void touch_atime(struct path *path) | |||
1551 | * Btrfs), but since we touch atime while walking down the path we | 1551 | * Btrfs), but since we touch atime while walking down the path we |
1552 | * really don't care if we failed to update the atime of the file, | 1552 | * really don't care if we failed to update the atime of the file, |
1553 | * so just ignore the return value. | 1553 | * so just ignore the return value. |
1554 | * We may also fail on filesystems that have the ability to make parts | ||
1555 | * of the fs read only, e.g. subvolumes in Btrfs. | ||
1554 | */ | 1556 | */ |
1555 | update_time(inode, &now, S_ATIME); | 1557 | update_time(inode, &now, S_ATIME); |
1556 | mnt_drop_write(mnt); | 1558 | mnt_drop_write(mnt); |