diff options
Diffstat (limited to 'fs/btrfs')
46 files changed, 3713 insertions, 1318 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index f341a98031d2..6d1d0b93b1aa 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -16,4 +16,4 @@ btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o | |||
16 | 16 | ||
17 | btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \ | 17 | btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \ |
18 | tests/extent-buffer-tests.o tests/btrfs-tests.o \ | 18 | tests/extent-buffer-tests.o tests/btrfs-tests.o \ |
19 | tests/extent-io-tests.o tests/inode-tests.o | 19 | tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index ff9b3995d453..9a0124a95851 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -79,13 +79,6 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans, | |||
79 | const char *name; | 79 | const char *name; |
80 | char *value = NULL; | 80 | char *value = NULL; |
81 | 81 | ||
82 | if (acl) { | ||
83 | ret = posix_acl_valid(acl); | ||
84 | if (ret < 0) | ||
85 | return ret; | ||
86 | ret = 0; | ||
87 | } | ||
88 | |||
89 | switch (type) { | 82 | switch (type) { |
90 | case ACL_TYPE_ACCESS: | 83 | case ACL_TYPE_ACCESS: |
91 | name = POSIX_ACL_XATTR_ACCESS; | 84 | name = POSIX_ACL_XATTR_ACCESS; |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 10db21fa0926..e25564bfcb46 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -900,7 +900,11 @@ again: | |||
900 | goto out; | 900 | goto out; |
901 | BUG_ON(ret == 0); | 901 | BUG_ON(ret == 0); |
902 | 902 | ||
903 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
904 | if (trans && likely(trans->type != __TRANS_DUMMY)) { | ||
905 | #else | ||
903 | if (trans) { | 906 | if (trans) { |
907 | #endif | ||
904 | /* | 908 | /* |
905 | * look if there are updates for this ref queued and lock the | 909 | * look if there are updates for this ref queued and lock the |
906 | * head | 910 | * head |
@@ -984,11 +988,12 @@ again: | |||
984 | goto out; | 988 | goto out; |
985 | } | 989 | } |
986 | if (ref->count && ref->parent) { | 990 | if (ref->count && ref->parent) { |
987 | if (extent_item_pos && !ref->inode_list) { | 991 | if (extent_item_pos && !ref->inode_list && |
992 | ref->level == 0) { | ||
988 | u32 bsz; | 993 | u32 bsz; |
989 | struct extent_buffer *eb; | 994 | struct extent_buffer *eb; |
990 | bsz = btrfs_level_size(fs_info->extent_root, | 995 | bsz = btrfs_level_size(fs_info->extent_root, |
991 | info_level); | 996 | ref->level); |
992 | eb = read_tree_block(fs_info->extent_root, | 997 | eb = read_tree_block(fs_info->extent_root, |
993 | ref->parent, bsz, 0); | 998 | ref->parent, bsz, 0); |
994 | if (!eb || !extent_buffer_uptodate(eb)) { | 999 | if (!eb || !extent_buffer_uptodate(eb)) { |
@@ -1404,9 +1409,10 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
1404 | * returns <0 on error | 1409 | * returns <0 on error |
1405 | */ | 1410 | */ |
1406 | static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, | 1411 | static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, |
1407 | struct btrfs_extent_item *ei, u32 item_size, | 1412 | struct btrfs_key *key, |
1408 | struct btrfs_extent_inline_ref **out_eiref, | 1413 | struct btrfs_extent_item *ei, u32 item_size, |
1409 | int *out_type) | 1414 | struct btrfs_extent_inline_ref **out_eiref, |
1415 | int *out_type) | ||
1410 | { | 1416 | { |
1411 | unsigned long end; | 1417 | unsigned long end; |
1412 | u64 flags; | 1418 | u64 flags; |
@@ -1416,19 +1422,26 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, | |||
1416 | /* first call */ | 1422 | /* first call */ |
1417 | flags = btrfs_extent_flags(eb, ei); | 1423 | flags = btrfs_extent_flags(eb, ei); |
1418 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 1424 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
1419 | info = (struct btrfs_tree_block_info *)(ei + 1); | 1425 | if (key->type == BTRFS_METADATA_ITEM_KEY) { |
1420 | *out_eiref = | 1426 | /* a skinny metadata extent */ |
1421 | (struct btrfs_extent_inline_ref *)(info + 1); | 1427 | *out_eiref = |
1428 | (struct btrfs_extent_inline_ref *)(ei + 1); | ||
1429 | } else { | ||
1430 | WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY); | ||
1431 | info = (struct btrfs_tree_block_info *)(ei + 1); | ||
1432 | *out_eiref = | ||
1433 | (struct btrfs_extent_inline_ref *)(info + 1); | ||
1434 | } | ||
1422 | } else { | 1435 | } else { |
1423 | *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1); | 1436 | *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1); |
1424 | } | 1437 | } |
1425 | *ptr = (unsigned long)*out_eiref; | 1438 | *ptr = (unsigned long)*out_eiref; |
1426 | if ((void *)*ptr >= (void *)ei + item_size) | 1439 | if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size) |
1427 | return -ENOENT; | 1440 | return -ENOENT; |
1428 | } | 1441 | } |
1429 | 1442 | ||
1430 | end = (unsigned long)ei + item_size; | 1443 | end = (unsigned long)ei + item_size; |
1431 | *out_eiref = (struct btrfs_extent_inline_ref *)*ptr; | 1444 | *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr); |
1432 | *out_type = btrfs_extent_inline_ref_type(eb, *out_eiref); | 1445 | *out_type = btrfs_extent_inline_ref_type(eb, *out_eiref); |
1433 | 1446 | ||
1434 | *ptr += btrfs_extent_inline_ref_size(*out_type); | 1447 | *ptr += btrfs_extent_inline_ref_size(*out_type); |
@@ -1447,8 +1460,8 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, | |||
1447 | * <0 on error. | 1460 | * <0 on error. |
1448 | */ | 1461 | */ |
1449 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | 1462 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, |
1450 | struct btrfs_extent_item *ei, u32 item_size, | 1463 | struct btrfs_key *key, struct btrfs_extent_item *ei, |
1451 | u64 *out_root, u8 *out_level) | 1464 | u32 item_size, u64 *out_root, u8 *out_level) |
1452 | { | 1465 | { |
1453 | int ret; | 1466 | int ret; |
1454 | int type; | 1467 | int type; |
@@ -1459,8 +1472,8 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | |||
1459 | return 1; | 1472 | return 1; |
1460 | 1473 | ||
1461 | while (1) { | 1474 | while (1) { |
1462 | ret = __get_extent_inline_ref(ptr, eb, ei, item_size, | 1475 | ret = __get_extent_inline_ref(ptr, eb, key, ei, item_size, |
1463 | &eiref, &type); | 1476 | &eiref, &type); |
1464 | if (ret < 0) | 1477 | if (ret < 0) |
1465 | return ret; | 1478 | return ret; |
1466 | 1479 | ||
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index a910b27a8ad9..86fc20fec282 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
@@ -40,8 +40,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
40 | u64 *flags); | 40 | u64 *flags); |
41 | 41 | ||
42 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | 42 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, |
43 | struct btrfs_extent_item *ei, u32 item_size, | 43 | struct btrfs_key *key, struct btrfs_extent_item *ei, |
44 | u64 *out_root, u8 *out_level); | 44 | u32 item_size, u64 *out_root, u8 *out_level); |
45 | 45 | ||
46 | int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | 46 | int iterate_extent_inodes(struct btrfs_fs_info *fs_info, |
47 | u64 extent_item_objectid, | 47 | u64 extent_item_objectid, |
@@ -55,8 +55,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | |||
55 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); | 55 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); |
56 | 56 | ||
57 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 57 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
58 | struct btrfs_fs_info *fs_info, u64 bytenr, | 58 | struct btrfs_fs_info *fs_info, u64 bytenr, |
59 | u64 time_seq, struct ulist **roots); | 59 | u64 time_seq, struct ulist **roots); |
60 | char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | 60 | char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, |
61 | u32 name_len, unsigned long name_off, | 61 | u32 name_len, unsigned long name_off, |
62 | struct extent_buffer *eb_in, u64 parent, | 62 | struct extent_buffer *eb_in, u64 parent, |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index c9a24444ec9a..4794923c410c 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -279,9 +279,11 @@ static inline void btrfs_inode_block_unlocked_dio(struct inode *inode) | |||
279 | 279 | ||
280 | static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode) | 280 | static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode) |
281 | { | 281 | { |
282 | smp_mb__before_clear_bit(); | 282 | smp_mb__before_atomic(); |
283 | clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, | 283 | clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, |
284 | &BTRFS_I(inode)->runtime_flags); | 284 | &BTRFS_I(inode)->runtime_flags); |
285 | } | 285 | } |
286 | 286 | ||
287 | bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end); | ||
288 | |||
287 | #endif | 289 | #endif |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 0e8388e72d8d..ce92ae30250f 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
@@ -1093,6 +1093,7 @@ leaf_item_out_of_bounce_error: | |||
1093 | next_stack = | 1093 | next_stack = |
1094 | btrfsic_stack_frame_alloc(); | 1094 | btrfsic_stack_frame_alloc(); |
1095 | if (NULL == next_stack) { | 1095 | if (NULL == next_stack) { |
1096 | sf->error = -1; | ||
1096 | btrfsic_release_block_ctx( | 1097 | btrfsic_release_block_ctx( |
1097 | &sf-> | 1098 | &sf-> |
1098 | next_block_ctx); | 1099 | next_block_ctx); |
@@ -1190,8 +1191,10 @@ continue_with_current_node_stack_frame: | |||
1190 | sf->next_block_ctx.datav[0]; | 1191 | sf->next_block_ctx.datav[0]; |
1191 | 1192 | ||
1192 | next_stack = btrfsic_stack_frame_alloc(); | 1193 | next_stack = btrfsic_stack_frame_alloc(); |
1193 | if (NULL == next_stack) | 1194 | if (NULL == next_stack) { |
1195 | sf->error = -1; | ||
1194 | goto one_stack_frame_backwards; | 1196 | goto one_stack_frame_backwards; |
1197 | } | ||
1195 | 1198 | ||
1196 | next_stack->i = -1; | 1199 | next_stack->i = -1; |
1197 | next_stack->block = sf->next_block; | 1200 | next_stack->block = sf->next_block; |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index d43c544d3b68..92371c414228 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -887,7 +887,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping, | |||
887 | 887 | ||
888 | workspace = find_workspace(type); | 888 | workspace = find_workspace(type); |
889 | if (IS_ERR(workspace)) | 889 | if (IS_ERR(workspace)) |
890 | return -1; | 890 | return PTR_ERR(workspace); |
891 | 891 | ||
892 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, | 892 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, |
893 | start, len, pages, | 893 | start, len, pages, |
@@ -923,7 +923,7 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in, | |||
923 | 923 | ||
924 | workspace = find_workspace(type); | 924 | workspace = find_workspace(type); |
925 | if (IS_ERR(workspace)) | 925 | if (IS_ERR(workspace)) |
926 | return -ENOMEM; | 926 | return PTR_ERR(workspace); |
927 | 927 | ||
928 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, | 928 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, |
929 | disk_start, | 929 | disk_start, |
@@ -945,7 +945,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | |||
945 | 945 | ||
946 | workspace = find_workspace(type); | 946 | workspace = find_workspace(type); |
947 | if (IS_ERR(workspace)) | 947 | if (IS_ERR(workspace)) |
948 | return -ENOMEM; | 948 | return PTR_ERR(workspace); |
949 | 949 | ||
950 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, | 950 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, |
951 | dest_page, start_byte, | 951 | dest_page, start_byte, |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1bcfcdb23cf4..aeab453b8e24 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -224,7 +224,8 @@ static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) | |||
224 | static void add_root_to_dirty_list(struct btrfs_root *root) | 224 | static void add_root_to_dirty_list(struct btrfs_root *root) |
225 | { | 225 | { |
226 | spin_lock(&root->fs_info->trans_lock); | 226 | spin_lock(&root->fs_info->trans_lock); |
227 | if (root->track_dirty && list_empty(&root->dirty_list)) { | 227 | if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) && |
228 | list_empty(&root->dirty_list)) { | ||
228 | list_add(&root->dirty_list, | 229 | list_add(&root->dirty_list, |
229 | &root->fs_info->dirty_cowonly_roots); | 230 | &root->fs_info->dirty_cowonly_roots); |
230 | } | 231 | } |
@@ -246,9 +247,10 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
246 | int level; | 247 | int level; |
247 | struct btrfs_disk_key disk_key; | 248 | struct btrfs_disk_key disk_key; |
248 | 249 | ||
249 | WARN_ON(root->ref_cows && trans->transid != | 250 | WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
250 | root->fs_info->running_transaction->transid); | 251 | trans->transid != root->fs_info->running_transaction->transid); |
251 | WARN_ON(root->ref_cows && trans->transid != root->last_trans); | 252 | WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
253 | trans->transid != root->last_trans); | ||
252 | 254 | ||
253 | level = btrfs_header_level(buf); | 255 | level = btrfs_header_level(buf); |
254 | if (level == 0) | 256 | if (level == 0) |
@@ -354,44 +356,14 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info) | |||
354 | } | 356 | } |
355 | 357 | ||
356 | /* | 358 | /* |
357 | * Increment the upper half of tree_mod_seq, set lower half zero. | 359 | * Pull a new tree mod seq number for our operation. |
358 | * | ||
359 | * Must be called with fs_info->tree_mod_seq_lock held. | ||
360 | */ | ||
361 | static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info) | ||
362 | { | ||
363 | u64 seq = atomic64_read(&fs_info->tree_mod_seq); | ||
364 | seq &= 0xffffffff00000000ull; | ||
365 | seq += 1ull << 32; | ||
366 | atomic64_set(&fs_info->tree_mod_seq, seq); | ||
367 | return seq; | ||
368 | } | ||
369 | |||
370 | /* | ||
371 | * Increment the lower half of tree_mod_seq. | ||
372 | * | ||
373 | * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers | ||
374 | * are generated should not technically require a spin lock here. (Rationale: | ||
375 | * incrementing the minor while incrementing the major seq number is between its | ||
376 | * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it | ||
377 | * just returns a unique sequence number as usual.) We have decided to leave | ||
378 | * that requirement in here and rethink it once we notice it really imposes a | ||
379 | * problem on some workload. | ||
380 | */ | 360 | */ |
381 | static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info) | 361 | static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) |
382 | { | 362 | { |
383 | return atomic64_inc_return(&fs_info->tree_mod_seq); | 363 | return atomic64_inc_return(&fs_info->tree_mod_seq); |
384 | } | 364 | } |
385 | 365 | ||
386 | /* | 366 | /* |
387 | * return the last minor in the previous major tree_mod_seq number | ||
388 | */ | ||
389 | u64 btrfs_tree_mod_seq_prev(u64 seq) | ||
390 | { | ||
391 | return (seq & 0xffffffff00000000ull) - 1ull; | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * This adds a new blocker to the tree mod log's blocker list if the @elem | 367 | * This adds a new blocker to the tree mod log's blocker list if the @elem |
396 | * passed does not already have a sequence number set. So when a caller expects | 368 | * passed does not already have a sequence number set. So when a caller expects |
397 | * to record tree modifications, it should ensure to set elem->seq to zero | 369 | * to record tree modifications, it should ensure to set elem->seq to zero |
@@ -402,19 +374,16 @@ u64 btrfs_tree_mod_seq_prev(u64 seq) | |||
402 | u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | 374 | u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, |
403 | struct seq_list *elem) | 375 | struct seq_list *elem) |
404 | { | 376 | { |
405 | u64 seq; | ||
406 | |||
407 | tree_mod_log_write_lock(fs_info); | 377 | tree_mod_log_write_lock(fs_info); |
408 | spin_lock(&fs_info->tree_mod_seq_lock); | 378 | spin_lock(&fs_info->tree_mod_seq_lock); |
409 | if (!elem->seq) { | 379 | if (!elem->seq) { |
410 | elem->seq = btrfs_inc_tree_mod_seq_major(fs_info); | 380 | elem->seq = btrfs_inc_tree_mod_seq(fs_info); |
411 | list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); | 381 | list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); |
412 | } | 382 | } |
413 | seq = btrfs_inc_tree_mod_seq_minor(fs_info); | ||
414 | spin_unlock(&fs_info->tree_mod_seq_lock); | 383 | spin_unlock(&fs_info->tree_mod_seq_lock); |
415 | tree_mod_log_write_unlock(fs_info); | 384 | tree_mod_log_write_unlock(fs_info); |
416 | 385 | ||
417 | return seq; | 386 | return elem->seq; |
418 | } | 387 | } |
419 | 388 | ||
420 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | 389 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, |
@@ -487,9 +456,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) | |||
487 | 456 | ||
488 | BUG_ON(!tm); | 457 | BUG_ON(!tm); |
489 | 458 | ||
490 | spin_lock(&fs_info->tree_mod_seq_lock); | 459 | tm->seq = btrfs_inc_tree_mod_seq(fs_info); |
491 | tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); | ||
492 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
493 | 460 | ||
494 | tm_root = &fs_info->tree_mod_log; | 461 | tm_root = &fs_info->tree_mod_log; |
495 | new = &tm_root->rb_node; | 462 | new = &tm_root->rb_node; |
@@ -997,14 +964,14 @@ int btrfs_block_can_be_shared(struct btrfs_root *root, | |||
997 | * snapshot and the block was not allocated by tree relocation, | 964 | * snapshot and the block was not allocated by tree relocation, |
998 | * we know the block is not shared. | 965 | * we know the block is not shared. |
999 | */ | 966 | */ |
1000 | if (root->ref_cows && | 967 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
1001 | buf != root->node && buf != root->commit_root && | 968 | buf != root->node && buf != root->commit_root && |
1002 | (btrfs_header_generation(buf) <= | 969 | (btrfs_header_generation(buf) <= |
1003 | btrfs_root_last_snapshot(&root->root_item) || | 970 | btrfs_root_last_snapshot(&root->root_item) || |
1004 | btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) | 971 | btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) |
1005 | return 1; | 972 | return 1; |
1006 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 973 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
1007 | if (root->ref_cows && | 974 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
1008 | btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) | 975 | btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) |
1009 | return 1; | 976 | return 1; |
1010 | #endif | 977 | #endif |
@@ -1146,9 +1113,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
1146 | 1113 | ||
1147 | btrfs_assert_tree_locked(buf); | 1114 | btrfs_assert_tree_locked(buf); |
1148 | 1115 | ||
1149 | WARN_ON(root->ref_cows && trans->transid != | 1116 | WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
1150 | root->fs_info->running_transaction->transid); | 1117 | trans->transid != root->fs_info->running_transaction->transid); |
1151 | WARN_ON(root->ref_cows && trans->transid != root->last_trans); | 1118 | WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
1119 | trans->transid != root->last_trans); | ||
1152 | 1120 | ||
1153 | level = btrfs_header_level(buf); | 1121 | level = btrfs_header_level(buf); |
1154 | 1122 | ||
@@ -1193,7 +1161,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
1193 | return ret; | 1161 | return ret; |
1194 | } | 1162 | } |
1195 | 1163 | ||
1196 | if (root->ref_cows) { | 1164 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { |
1197 | ret = btrfs_reloc_cow_block(trans, root, buf, cow); | 1165 | ret = btrfs_reloc_cow_block(trans, root, buf, cow); |
1198 | if (ret) | 1166 | if (ret) |
1199 | return ret; | 1167 | return ret; |
@@ -1538,6 +1506,10 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans, | |||
1538 | struct btrfs_root *root, | 1506 | struct btrfs_root *root, |
1539 | struct extent_buffer *buf) | 1507 | struct extent_buffer *buf) |
1540 | { | 1508 | { |
1509 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
1510 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
1511 | return 0; | ||
1512 | #endif | ||
1541 | /* ensure we can see the force_cow */ | 1513 | /* ensure we can see the force_cow */ |
1542 | smp_rmb(); | 1514 | smp_rmb(); |
1543 | 1515 | ||
@@ -1556,7 +1528,7 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans, | |||
1556 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && | 1528 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && |
1557 | !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && | 1529 | !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && |
1558 | btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) && | 1530 | btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) && |
1559 | !root->force_cow) | 1531 | !test_bit(BTRFS_ROOT_FORCE_COW, &root->state)) |
1560 | return 0; | 1532 | return 0; |
1561 | return 1; | 1533 | return 1; |
1562 | } | 1534 | } |
@@ -5125,7 +5097,17 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
5125 | return ret; | 5097 | return ret; |
5126 | btrfs_item_key(path->nodes[0], &found_key, 0); | 5098 | btrfs_item_key(path->nodes[0], &found_key, 0); |
5127 | ret = comp_keys(&found_key, &key); | 5099 | ret = comp_keys(&found_key, &key); |
5128 | if (ret < 0) | 5100 | /* |
5101 | * We might have had an item with the previous key in the tree right | ||
5102 | * before we released our path. And after we released our path, that | ||
5103 | * item might have been pushed to the first slot (0) of the leaf we | ||
5104 | * were holding due to a tree balance. Alternatively, an item with the | ||
5105 | * previous key can exist as the only element of a leaf (big fat item). | ||
5106 | * Therefore account for these 2 cases, so that our callers (like | ||
5107 | * btrfs_previous_item) don't miss an existing item with a key matching | ||
5108 | * the previous key we computed above. | ||
5109 | */ | ||
5110 | if (ret <= 0) | ||
5129 | return 0; | 5111 | return 0; |
5130 | return 1; | 5112 | return 1; |
5131 | } | 5113 | } |
@@ -5736,6 +5718,24 @@ again: | |||
5736 | ret = 0; | 5718 | ret = 0; |
5737 | goto done; | 5719 | goto done; |
5738 | } | 5720 | } |
5721 | /* | ||
5722 | * So the above check misses one case: | ||
5723 | * - after releasing the path above, someone has removed the item that | ||
5724 | * used to be at the very end of the block, and balance between leafs | ||
5725 | * gets another one with bigger key.offset to replace it. | ||
5726 | * | ||
5727 | * This one should be returned as well, or we can get leaf corruption | ||
5728 | * later(esp. in __btrfs_drop_extents()). | ||
5729 | * | ||
5730 | * And a bit more explanation about this check, | ||
5731 | * with ret > 0, the key isn't found, the path points to the slot | ||
5732 | * where it should be inserted, so the path->slots[0] item must be the | ||
5733 | * bigger one. | ||
5734 | */ | ||
5735 | if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) { | ||
5736 | ret = 0; | ||
5737 | goto done; | ||
5738 | } | ||
5739 | 5739 | ||
5740 | while (level < BTRFS_MAX_LEVEL) { | 5740 | while (level < BTRFS_MAX_LEVEL) { |
5741 | if (!path->nodes[level]) { | 5741 | if (!path->nodes[level]) { |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ba6b88528dc7..b7e2c1c1ef36 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/kmap_types.h> | 33 | #include <asm/kmap_types.h> |
34 | #include <linux/pagemap.h> | 34 | #include <linux/pagemap.h> |
35 | #include <linux/btrfs.h> | 35 | #include <linux/btrfs.h> |
36 | #include <linux/workqueue.h> | ||
36 | #include "extent_io.h" | 37 | #include "extent_io.h" |
37 | #include "extent_map.h" | 38 | #include "extent_map.h" |
38 | #include "async-thread.h" | 39 | #include "async-thread.h" |
@@ -756,6 +757,12 @@ struct btrfs_dir_item { | |||
756 | 757 | ||
757 | #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) | 758 | #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) |
758 | 759 | ||
760 | /* | ||
761 | * Internal in-memory flag that a subvolume has been marked for deletion but | ||
762 | * still visible as a directory | ||
763 | */ | ||
764 | #define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48) | ||
765 | |||
759 | struct btrfs_root_item { | 766 | struct btrfs_root_item { |
760 | struct btrfs_inode_item inode; | 767 | struct btrfs_inode_item inode; |
761 | __le64 generation; | 768 | __le64 generation; |
@@ -840,7 +847,10 @@ struct btrfs_disk_balance_args { | |||
840 | /* BTRFS_BALANCE_ARGS_* */ | 847 | /* BTRFS_BALANCE_ARGS_* */ |
841 | __le64 flags; | 848 | __le64 flags; |
842 | 849 | ||
843 | __le64 unused[8]; | 850 | /* BTRFS_BALANCE_ARGS_LIMIT value */ |
851 | __le64 limit; | ||
852 | |||
853 | __le64 unused[7]; | ||
844 | } __attribute__ ((__packed__)); | 854 | } __attribute__ ((__packed__)); |
845 | 855 | ||
846 | /* | 856 | /* |
@@ -1113,6 +1123,12 @@ struct btrfs_qgroup_limit_item { | |||
1113 | __le64 rsv_excl; | 1123 | __le64 rsv_excl; |
1114 | } __attribute__ ((__packed__)); | 1124 | } __attribute__ ((__packed__)); |
1115 | 1125 | ||
1126 | /* For raid type sysfs entries */ | ||
1127 | struct raid_kobject { | ||
1128 | int raid_type; | ||
1129 | struct kobject kobj; | ||
1130 | }; | ||
1131 | |||
1116 | struct btrfs_space_info { | 1132 | struct btrfs_space_info { |
1117 | spinlock_t lock; | 1133 | spinlock_t lock; |
1118 | 1134 | ||
@@ -1163,7 +1179,7 @@ struct btrfs_space_info { | |||
1163 | wait_queue_head_t wait; | 1179 | wait_queue_head_t wait; |
1164 | 1180 | ||
1165 | struct kobject kobj; | 1181 | struct kobject kobj; |
1166 | struct kobject block_group_kobjs[BTRFS_NR_RAID_TYPES]; | 1182 | struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES]; |
1167 | }; | 1183 | }; |
1168 | 1184 | ||
1169 | #define BTRFS_BLOCK_RSV_GLOBAL 1 | 1185 | #define BTRFS_BLOCK_RSV_GLOBAL 1 |
@@ -1313,6 +1329,8 @@ struct btrfs_stripe_hash_table { | |||
1313 | 1329 | ||
1314 | #define BTRFS_STRIPE_HASH_TABLE_BITS 11 | 1330 | #define BTRFS_STRIPE_HASH_TABLE_BITS 11 |
1315 | 1331 | ||
1332 | void btrfs_init_async_reclaim_work(struct work_struct *work); | ||
1333 | |||
1316 | /* fs_info */ | 1334 | /* fs_info */ |
1317 | struct reloc_control; | 1335 | struct reloc_control; |
1318 | struct btrfs_device; | 1336 | struct btrfs_device; |
@@ -1534,6 +1552,9 @@ struct btrfs_fs_info { | |||
1534 | */ | 1552 | */ |
1535 | struct btrfs_workqueue *fixup_workers; | 1553 | struct btrfs_workqueue *fixup_workers; |
1536 | struct btrfs_workqueue *delayed_workers; | 1554 | struct btrfs_workqueue *delayed_workers; |
1555 | |||
1556 | /* the extent workers do delayed refs on the extent allocation tree */ | ||
1557 | struct btrfs_workqueue *extent_workers; | ||
1537 | struct task_struct *transaction_kthread; | 1558 | struct task_struct *transaction_kthread; |
1538 | struct task_struct *cleaner_kthread; | 1559 | struct task_struct *cleaner_kthread; |
1539 | int thread_pool_size; | 1560 | int thread_pool_size; |
@@ -1636,7 +1657,10 @@ struct btrfs_fs_info { | |||
1636 | 1657 | ||
1637 | /* holds configuration and tracking. Protected by qgroup_lock */ | 1658 | /* holds configuration and tracking. Protected by qgroup_lock */ |
1638 | struct rb_root qgroup_tree; | 1659 | struct rb_root qgroup_tree; |
1660 | struct rb_root qgroup_op_tree; | ||
1639 | spinlock_t qgroup_lock; | 1661 | spinlock_t qgroup_lock; |
1662 | spinlock_t qgroup_op_lock; | ||
1663 | atomic_t qgroup_op_seq; | ||
1640 | 1664 | ||
1641 | /* | 1665 | /* |
1642 | * used to avoid frequently calling ulist_alloc()/ulist_free() | 1666 | * used to avoid frequently calling ulist_alloc()/ulist_free() |
@@ -1688,6 +1712,9 @@ struct btrfs_fs_info { | |||
1688 | 1712 | ||
1689 | struct semaphore uuid_tree_rescan_sem; | 1713 | struct semaphore uuid_tree_rescan_sem; |
1690 | unsigned int update_uuid_tree_gen:1; | 1714 | unsigned int update_uuid_tree_gen:1; |
1715 | |||
1716 | /* Used to reclaim the metadata space in the background. */ | ||
1717 | struct work_struct async_reclaim_work; | ||
1691 | }; | 1718 | }; |
1692 | 1719 | ||
1693 | struct btrfs_subvolume_writers { | 1720 | struct btrfs_subvolume_writers { |
@@ -1696,6 +1723,26 @@ struct btrfs_subvolume_writers { | |||
1696 | }; | 1723 | }; |
1697 | 1724 | ||
1698 | /* | 1725 | /* |
1726 | * The state of btrfs root | ||
1727 | */ | ||
1728 | /* | ||
1729 | * btrfs_record_root_in_trans is a multi-step process, | ||
1730 | * and it can race with the balancing code. But the | ||
1731 | * race is very small, and only the first time the root | ||
1732 | * is added to each transaction. So IN_TRANS_SETUP | ||
1733 | * is used to tell us when more checks are required | ||
1734 | */ | ||
1735 | #define BTRFS_ROOT_IN_TRANS_SETUP 0 | ||
1736 | #define BTRFS_ROOT_REF_COWS 1 | ||
1737 | #define BTRFS_ROOT_TRACK_DIRTY 2 | ||
1738 | #define BTRFS_ROOT_IN_RADIX 3 | ||
1739 | #define BTRFS_ROOT_DUMMY_ROOT 4 | ||
1740 | #define BTRFS_ROOT_ORPHAN_ITEM_INSERTED 5 | ||
1741 | #define BTRFS_ROOT_DEFRAG_RUNNING 6 | ||
1742 | #define BTRFS_ROOT_FORCE_COW 7 | ||
1743 | #define BTRFS_ROOT_MULTI_LOG_TASKS 8 | ||
1744 | |||
1745 | /* | ||
1699 | * in ram representation of the tree. extent_root is used for all allocations | 1746 | * in ram representation of the tree. extent_root is used for all allocations |
1700 | * and for the extent tree extent_root root. | 1747 | * and for the extent tree extent_root root. |
1701 | */ | 1748 | */ |
@@ -1706,6 +1753,7 @@ struct btrfs_root { | |||
1706 | struct btrfs_root *log_root; | 1753 | struct btrfs_root *log_root; |
1707 | struct btrfs_root *reloc_root; | 1754 | struct btrfs_root *reloc_root; |
1708 | 1755 | ||
1756 | unsigned long state; | ||
1709 | struct btrfs_root_item root_item; | 1757 | struct btrfs_root_item root_item; |
1710 | struct btrfs_key root_key; | 1758 | struct btrfs_key root_key; |
1711 | struct btrfs_fs_info *fs_info; | 1759 | struct btrfs_fs_info *fs_info; |
@@ -1740,7 +1788,6 @@ struct btrfs_root { | |||
1740 | /* Just be updated when the commit succeeds. */ | 1788 | /* Just be updated when the commit succeeds. */ |
1741 | int last_log_commit; | 1789 | int last_log_commit; |
1742 | pid_t log_start_pid; | 1790 | pid_t log_start_pid; |
1743 | bool log_multiple_pids; | ||
1744 | 1791 | ||
1745 | u64 objectid; | 1792 | u64 objectid; |
1746 | u64 last_trans; | 1793 | u64 last_trans; |
@@ -1760,23 +1807,13 @@ struct btrfs_root { | |||
1760 | 1807 | ||
1761 | u64 highest_objectid; | 1808 | u64 highest_objectid; |
1762 | 1809 | ||
1763 | /* btrfs_record_root_in_trans is a multi-step process, | ||
1764 | * and it can race with the balancing code. But the | ||
1765 | * race is very small, and only the first time the root | ||
1766 | * is added to each transaction. So in_trans_setup | ||
1767 | * is used to tell us when more checks are required | ||
1768 | */ | ||
1769 | unsigned long in_trans_setup; | ||
1770 | int ref_cows; | ||
1771 | int track_dirty; | ||
1772 | int in_radix; | ||
1773 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 1810 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
1774 | int dummy_root; | 1811 | u64 alloc_bytenr; |
1775 | #endif | 1812 | #endif |
1813 | |||
1776 | u64 defrag_trans_start; | 1814 | u64 defrag_trans_start; |
1777 | struct btrfs_key defrag_progress; | 1815 | struct btrfs_key defrag_progress; |
1778 | struct btrfs_key defrag_max; | 1816 | struct btrfs_key defrag_max; |
1779 | int defrag_running; | ||
1780 | char *name; | 1817 | char *name; |
1781 | 1818 | ||
1782 | /* the dirty list is only used by non-reference counted roots */ | 1819 | /* the dirty list is only used by non-reference counted roots */ |
@@ -1790,7 +1827,6 @@ struct btrfs_root { | |||
1790 | spinlock_t orphan_lock; | 1827 | spinlock_t orphan_lock; |
1791 | atomic_t orphan_inodes; | 1828 | atomic_t orphan_inodes; |
1792 | struct btrfs_block_rsv *orphan_block_rsv; | 1829 | struct btrfs_block_rsv *orphan_block_rsv; |
1793 | int orphan_item_inserted; | ||
1794 | int orphan_cleanup_state; | 1830 | int orphan_cleanup_state; |
1795 | 1831 | ||
1796 | spinlock_t inode_lock; | 1832 | spinlock_t inode_lock; |
@@ -1808,8 +1844,6 @@ struct btrfs_root { | |||
1808 | */ | 1844 | */ |
1809 | dev_t anon_dev; | 1845 | dev_t anon_dev; |
1810 | 1846 | ||
1811 | int force_cow; | ||
1812 | |||
1813 | spinlock_t root_item_lock; | 1847 | spinlock_t root_item_lock; |
1814 | atomic_t refs; | 1848 | atomic_t refs; |
1815 | 1849 | ||
@@ -2788,6 +2822,11 @@ static inline bool btrfs_root_readonly(struct btrfs_root *root) | |||
2788 | return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0; | 2822 | return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0; |
2789 | } | 2823 | } |
2790 | 2824 | ||
2825 | static inline bool btrfs_root_dead(struct btrfs_root *root) | ||
2826 | { | ||
2827 | return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0; | ||
2828 | } | ||
2829 | |||
2791 | /* struct btrfs_root_backup */ | 2830 | /* struct btrfs_root_backup */ |
2792 | BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, | 2831 | BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, |
2793 | tree_root, 64); | 2832 | tree_root, 64); |
@@ -2897,6 +2936,7 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, | |||
2897 | cpu->vend = le64_to_cpu(disk->vend); | 2936 | cpu->vend = le64_to_cpu(disk->vend); |
2898 | cpu->target = le64_to_cpu(disk->target); | 2937 | cpu->target = le64_to_cpu(disk->target); |
2899 | cpu->flags = le64_to_cpu(disk->flags); | 2938 | cpu->flags = le64_to_cpu(disk->flags); |
2939 | cpu->limit = le64_to_cpu(disk->limit); | ||
2900 | } | 2940 | } |
2901 | 2941 | ||
2902 | static inline void | 2942 | static inline void |
@@ -2914,6 +2954,7 @@ btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk, | |||
2914 | disk->vend = cpu_to_le64(cpu->vend); | 2954 | disk->vend = cpu_to_le64(cpu->vend); |
2915 | disk->target = cpu_to_le64(cpu->target); | 2955 | disk->target = cpu_to_le64(cpu->target); |
2916 | disk->flags = cpu_to_le64(cpu->flags); | 2956 | disk->flags = cpu_to_le64(cpu->flags); |
2957 | disk->limit = cpu_to_le64(cpu->limit); | ||
2917 | } | 2958 | } |
2918 | 2959 | ||
2919 | /* struct btrfs_super_block */ | 2960 | /* struct btrfs_super_block */ |
@@ -3236,6 +3277,8 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, | |||
3236 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | 3277 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); |
3237 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 3278 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
3238 | struct btrfs_root *root, unsigned long count); | 3279 | struct btrfs_root *root, unsigned long count); |
3280 | int btrfs_async_run_delayed_refs(struct btrfs_root *root, | ||
3281 | unsigned long count, int wait); | ||
3239 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 3282 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
3240 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | 3283 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, |
3241 | struct btrfs_root *root, u64 bytenr, | 3284 | struct btrfs_root *root, u64 bytenr, |
@@ -3275,9 +3318,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, | |||
3275 | u64 min_alloc_size, u64 empty_size, u64 hint_byte, | 3318 | u64 min_alloc_size, u64 empty_size, u64 hint_byte, |
3276 | struct btrfs_key *ins, int is_data); | 3319 | struct btrfs_key *ins, int is_data); |
3277 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 3320 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
3278 | struct extent_buffer *buf, int full_backref, int for_cow); | 3321 | struct extent_buffer *buf, int full_backref, int no_quota); |
3279 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 3322 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
3280 | struct extent_buffer *buf, int full_backref, int for_cow); | 3323 | struct extent_buffer *buf, int full_backref, int no_quota); |
3281 | int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, | 3324 | int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, |
3282 | struct btrfs_root *root, | 3325 | struct btrfs_root *root, |
3283 | u64 bytenr, u64 num_bytes, u64 flags, | 3326 | u64 bytenr, u64 num_bytes, u64 flags, |
@@ -3285,7 +3328,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, | |||
3285 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 3328 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
3286 | struct btrfs_root *root, | 3329 | struct btrfs_root *root, |
3287 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, | 3330 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, |
3288 | u64 owner, u64 offset, int for_cow); | 3331 | u64 owner, u64 offset, int no_quota); |
3289 | 3332 | ||
3290 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 3333 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); |
3291 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, | 3334 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, |
@@ -3297,7 +3340,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3297 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 3340 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
3298 | struct btrfs_root *root, | 3341 | struct btrfs_root *root, |
3299 | u64 bytenr, u64 num_bytes, u64 parent, | 3342 | u64 bytenr, u64 num_bytes, u64 parent, |
3300 | u64 root_objectid, u64 owner, u64 offset, int for_cow); | 3343 | u64 root_objectid, u64 owner, u64 offset, int no_quota); |
3301 | 3344 | ||
3302 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 3345 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
3303 | struct btrfs_root *root); | 3346 | struct btrfs_root *root); |
@@ -3385,7 +3428,6 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | |||
3385 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | 3428 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, |
3386 | struct btrfs_fs_info *fs_info); | 3429 | struct btrfs_fs_info *fs_info); |
3387 | int __get_raid_index(u64 flags); | 3430 | int __get_raid_index(u64 flags); |
3388 | |||
3389 | int btrfs_start_nocow_write(struct btrfs_root *root); | 3431 | int btrfs_start_nocow_write(struct btrfs_root *root); |
3390 | void btrfs_end_nocow_write(struct btrfs_root *root); | 3432 | void btrfs_end_nocow_write(struct btrfs_root *root); |
3391 | /* ctree.c */ | 3433 | /* ctree.c */ |
@@ -3561,7 +3603,6 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | |||
3561 | struct seq_list *elem); | 3603 | struct seq_list *elem); |
3562 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | 3604 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, |
3563 | struct seq_list *elem); | 3605 | struct seq_list *elem); |
3564 | u64 btrfs_tree_mod_seq_prev(u64 seq); | ||
3565 | int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); | 3606 | int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); |
3566 | 3607 | ||
3567 | /* root-item.c */ | 3608 | /* root-item.c */ |
@@ -3708,6 +3749,12 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | |||
3708 | struct bio *bio, u64 file_start, int contig); | 3749 | struct bio *bio, u64 file_start, int contig); |
3709 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 3750 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
3710 | struct list_head *list, int search_commit); | 3751 | struct list_head *list, int search_commit); |
3752 | void btrfs_extent_item_to_extent_map(struct inode *inode, | ||
3753 | const struct btrfs_path *path, | ||
3754 | struct btrfs_file_extent_item *fi, | ||
3755 | const bool new_inline, | ||
3756 | struct extent_map *em); | ||
3757 | |||
3711 | /* inode.c */ | 3758 | /* inode.c */ |
3712 | struct btrfs_delalloc_work { | 3759 | struct btrfs_delalloc_work { |
3713 | struct inode *inode; | 3760 | struct inode *inode; |
@@ -4069,52 +4116,6 @@ void btrfs_reada_detach(void *handle); | |||
4069 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | 4116 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, |
4070 | u64 start, int err); | 4117 | u64 start, int err); |
4071 | 4118 | ||
4072 | /* qgroup.c */ | ||
4073 | struct qgroup_update { | ||
4074 | struct list_head list; | ||
4075 | struct btrfs_delayed_ref_node *node; | ||
4076 | struct btrfs_delayed_extent_op *extent_op; | ||
4077 | }; | ||
4078 | |||
4079 | int btrfs_quota_enable(struct btrfs_trans_handle *trans, | ||
4080 | struct btrfs_fs_info *fs_info); | ||
4081 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, | ||
4082 | struct btrfs_fs_info *fs_info); | ||
4083 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); | ||
4084 | void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); | ||
4085 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); | ||
4086 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | ||
4087 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | ||
4088 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | ||
4089 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | ||
4090 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, | ||
4091 | struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
4092 | char *name); | ||
4093 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, | ||
4094 | struct btrfs_fs_info *fs_info, u64 qgroupid); | ||
4095 | int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, | ||
4096 | struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
4097 | struct btrfs_qgroup_limit *limit); | ||
4098 | int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); | ||
4099 | void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); | ||
4100 | struct btrfs_delayed_extent_op; | ||
4101 | int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, | ||
4102 | struct btrfs_delayed_ref_node *node, | ||
4103 | struct btrfs_delayed_extent_op *extent_op); | ||
4104 | int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | ||
4105 | struct btrfs_fs_info *fs_info, | ||
4106 | struct btrfs_delayed_ref_node *node, | ||
4107 | struct btrfs_delayed_extent_op *extent_op); | ||
4108 | int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | ||
4109 | struct btrfs_fs_info *fs_info); | ||
4110 | int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | ||
4111 | struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, | ||
4112 | struct btrfs_qgroup_inherit *inherit); | ||
4113 | int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); | ||
4114 | void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); | ||
4115 | |||
4116 | void assert_qgroups_uptodate(struct btrfs_trans_handle *trans); | ||
4117 | |||
4118 | static inline int is_fstree(u64 rootid) | 4119 | static inline int is_fstree(u64 rootid) |
4119 | { | 4120 | { |
4120 | if (rootid == BTRFS_FS_TREE_OBJECTID || | 4121 | if (rootid == BTRFS_FS_TREE_OBJECTID || |
@@ -4131,6 +4132,8 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info) | |||
4131 | /* Sanity test specific functions */ | 4132 | /* Sanity test specific functions */ |
4132 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 4133 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
4133 | void btrfs_test_destroy_inode(struct inode *inode); | 4134 | void btrfs_test_destroy_inode(struct inode *inode); |
4135 | int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
4136 | u64 rfer, u64 excl); | ||
4134 | #endif | 4137 | #endif |
4135 | 4138 | ||
4136 | #endif | 4139 | #endif |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 33e561a84013..da775bfdebc9 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -149,8 +149,8 @@ again: | |||
149 | spin_lock(&root->inode_lock); | 149 | spin_lock(&root->inode_lock); |
150 | ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node); | 150 | ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node); |
151 | if (ret == -EEXIST) { | 151 | if (ret == -EEXIST) { |
152 | kmem_cache_free(delayed_node_cache, node); | ||
153 | spin_unlock(&root->inode_lock); | 152 | spin_unlock(&root->inode_lock); |
153 | kmem_cache_free(delayed_node_cache, node); | ||
154 | radix_tree_preload_end(); | 154 | radix_tree_preload_end(); |
155 | goto again; | 155 | goto again; |
156 | } | 156 | } |
@@ -267,14 +267,17 @@ static void __btrfs_release_delayed_node( | |||
267 | mutex_unlock(&delayed_node->mutex); | 267 | mutex_unlock(&delayed_node->mutex); |
268 | 268 | ||
269 | if (atomic_dec_and_test(&delayed_node->refs)) { | 269 | if (atomic_dec_and_test(&delayed_node->refs)) { |
270 | bool free = false; | ||
270 | struct btrfs_root *root = delayed_node->root; | 271 | struct btrfs_root *root = delayed_node->root; |
271 | spin_lock(&root->inode_lock); | 272 | spin_lock(&root->inode_lock); |
272 | if (atomic_read(&delayed_node->refs) == 0) { | 273 | if (atomic_read(&delayed_node->refs) == 0) { |
273 | radix_tree_delete(&root->delayed_nodes_tree, | 274 | radix_tree_delete(&root->delayed_nodes_tree, |
274 | delayed_node->inode_id); | 275 | delayed_node->inode_id); |
275 | kmem_cache_free(delayed_node_cache, delayed_node); | 276 | free = true; |
276 | } | 277 | } |
277 | spin_unlock(&root->inode_lock); | 278 | spin_unlock(&root->inode_lock); |
279 | if (free) | ||
280 | kmem_cache_free(delayed_node_cache, delayed_node); | ||
278 | } | 281 | } |
279 | } | 282 | } |
280 | 283 | ||
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 31299646024d..6d16bea94e1c 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -106,6 +106,10 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2, | |||
106 | return -1; | 106 | return -1; |
107 | if (ref1->type > ref2->type) | 107 | if (ref1->type > ref2->type) |
108 | return 1; | 108 | return 1; |
109 | if (ref1->no_quota > ref2->no_quota) | ||
110 | return 1; | ||
111 | if (ref1->no_quota < ref2->no_quota) | ||
112 | return -1; | ||
109 | /* merging of sequenced refs is not allowed */ | 113 | /* merging of sequenced refs is not allowed */ |
110 | if (compare_seq) { | 114 | if (compare_seq) { |
111 | if (ref1->seq < ref2->seq) | 115 | if (ref1->seq < ref2->seq) |
@@ -635,7 +639,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
635 | struct btrfs_delayed_ref_head *head_ref, | 639 | struct btrfs_delayed_ref_head *head_ref, |
636 | struct btrfs_delayed_ref_node *ref, u64 bytenr, | 640 | struct btrfs_delayed_ref_node *ref, u64 bytenr, |
637 | u64 num_bytes, u64 parent, u64 ref_root, int level, | 641 | u64 num_bytes, u64 parent, u64 ref_root, int level, |
638 | int action, int for_cow) | 642 | int action, int no_quota) |
639 | { | 643 | { |
640 | struct btrfs_delayed_ref_node *existing; | 644 | struct btrfs_delayed_ref_node *existing; |
641 | struct btrfs_delayed_tree_ref *full_ref; | 645 | struct btrfs_delayed_tree_ref *full_ref; |
@@ -645,6 +649,8 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
645 | if (action == BTRFS_ADD_DELAYED_EXTENT) | 649 | if (action == BTRFS_ADD_DELAYED_EXTENT) |
646 | action = BTRFS_ADD_DELAYED_REF; | 650 | action = BTRFS_ADD_DELAYED_REF; |
647 | 651 | ||
652 | if (is_fstree(ref_root)) | ||
653 | seq = atomic64_read(&fs_info->tree_mod_seq); | ||
648 | delayed_refs = &trans->transaction->delayed_refs; | 654 | delayed_refs = &trans->transaction->delayed_refs; |
649 | 655 | ||
650 | /* first set the basic ref node struct up */ | 656 | /* first set the basic ref node struct up */ |
@@ -655,9 +661,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
655 | ref->action = action; | 661 | ref->action = action; |
656 | ref->is_head = 0; | 662 | ref->is_head = 0; |
657 | ref->in_tree = 1; | 663 | ref->in_tree = 1; |
658 | 664 | ref->no_quota = no_quota; | |
659 | if (need_ref_seq(for_cow, ref_root)) | ||
660 | seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem); | ||
661 | ref->seq = seq; | 665 | ref->seq = seq; |
662 | 666 | ||
663 | full_ref = btrfs_delayed_node_to_tree_ref(ref); | 667 | full_ref = btrfs_delayed_node_to_tree_ref(ref); |
@@ -697,7 +701,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
697 | struct btrfs_delayed_ref_head *head_ref, | 701 | struct btrfs_delayed_ref_head *head_ref, |
698 | struct btrfs_delayed_ref_node *ref, u64 bytenr, | 702 | struct btrfs_delayed_ref_node *ref, u64 bytenr, |
699 | u64 num_bytes, u64 parent, u64 ref_root, u64 owner, | 703 | u64 num_bytes, u64 parent, u64 ref_root, u64 owner, |
700 | u64 offset, int action, int for_cow) | 704 | u64 offset, int action, int no_quota) |
701 | { | 705 | { |
702 | struct btrfs_delayed_ref_node *existing; | 706 | struct btrfs_delayed_ref_node *existing; |
703 | struct btrfs_delayed_data_ref *full_ref; | 707 | struct btrfs_delayed_data_ref *full_ref; |
@@ -709,6 +713,9 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
709 | 713 | ||
710 | delayed_refs = &trans->transaction->delayed_refs; | 714 | delayed_refs = &trans->transaction->delayed_refs; |
711 | 715 | ||
716 | if (is_fstree(ref_root)) | ||
717 | seq = atomic64_read(&fs_info->tree_mod_seq); | ||
718 | |||
712 | /* first set the basic ref node struct up */ | 719 | /* first set the basic ref node struct up */ |
713 | atomic_set(&ref->refs, 1); | 720 | atomic_set(&ref->refs, 1); |
714 | ref->bytenr = bytenr; | 721 | ref->bytenr = bytenr; |
@@ -717,9 +724,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
717 | ref->action = action; | 724 | ref->action = action; |
718 | ref->is_head = 0; | 725 | ref->is_head = 0; |
719 | ref->in_tree = 1; | 726 | ref->in_tree = 1; |
720 | 727 | ref->no_quota = no_quota; | |
721 | if (need_ref_seq(for_cow, ref_root)) | ||
722 | seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem); | ||
723 | ref->seq = seq; | 728 | ref->seq = seq; |
724 | 729 | ||
725 | full_ref = btrfs_delayed_node_to_data_ref(ref); | 730 | full_ref = btrfs_delayed_node_to_data_ref(ref); |
@@ -762,12 +767,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
762 | u64 bytenr, u64 num_bytes, u64 parent, | 767 | u64 bytenr, u64 num_bytes, u64 parent, |
763 | u64 ref_root, int level, int action, | 768 | u64 ref_root, int level, int action, |
764 | struct btrfs_delayed_extent_op *extent_op, | 769 | struct btrfs_delayed_extent_op *extent_op, |
765 | int for_cow) | 770 | int no_quota) |
766 | { | 771 | { |
767 | struct btrfs_delayed_tree_ref *ref; | 772 | struct btrfs_delayed_tree_ref *ref; |
768 | struct btrfs_delayed_ref_head *head_ref; | 773 | struct btrfs_delayed_ref_head *head_ref; |
769 | struct btrfs_delayed_ref_root *delayed_refs; | 774 | struct btrfs_delayed_ref_root *delayed_refs; |
770 | 775 | ||
776 | if (!is_fstree(ref_root) || !fs_info->quota_enabled) | ||
777 | no_quota = 0; | ||
778 | |||
771 | BUG_ON(extent_op && extent_op->is_data); | 779 | BUG_ON(extent_op && extent_op->is_data); |
772 | ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); | 780 | ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); |
773 | if (!ref) | 781 | if (!ref) |
@@ -793,10 +801,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
793 | 801 | ||
794 | add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, | 802 | add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, |
795 | num_bytes, parent, ref_root, level, action, | 803 | num_bytes, parent, ref_root, level, action, |
796 | for_cow); | 804 | no_quota); |
797 | spin_unlock(&delayed_refs->lock); | 805 | spin_unlock(&delayed_refs->lock); |
798 | if (need_ref_seq(for_cow, ref_root)) | ||
799 | btrfs_qgroup_record_ref(trans, &ref->node, extent_op); | ||
800 | 806 | ||
801 | return 0; | 807 | return 0; |
802 | } | 808 | } |
@@ -810,12 +816,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
810 | u64 parent, u64 ref_root, | 816 | u64 parent, u64 ref_root, |
811 | u64 owner, u64 offset, int action, | 817 | u64 owner, u64 offset, int action, |
812 | struct btrfs_delayed_extent_op *extent_op, | 818 | struct btrfs_delayed_extent_op *extent_op, |
813 | int for_cow) | 819 | int no_quota) |
814 | { | 820 | { |
815 | struct btrfs_delayed_data_ref *ref; | 821 | struct btrfs_delayed_data_ref *ref; |
816 | struct btrfs_delayed_ref_head *head_ref; | 822 | struct btrfs_delayed_ref_head *head_ref; |
817 | struct btrfs_delayed_ref_root *delayed_refs; | 823 | struct btrfs_delayed_ref_root *delayed_refs; |
818 | 824 | ||
825 | if (!is_fstree(ref_root) || !fs_info->quota_enabled) | ||
826 | no_quota = 0; | ||
827 | |||
819 | BUG_ON(extent_op && !extent_op->is_data); | 828 | BUG_ON(extent_op && !extent_op->is_data); |
820 | ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); | 829 | ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); |
821 | if (!ref) | 830 | if (!ref) |
@@ -841,10 +850,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
841 | 850 | ||
842 | add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, | 851 | add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, |
843 | num_bytes, parent, ref_root, owner, offset, | 852 | num_bytes, parent, ref_root, owner, offset, |
844 | action, for_cow); | 853 | action, no_quota); |
845 | spin_unlock(&delayed_refs->lock); | 854 | spin_unlock(&delayed_refs->lock); |
846 | if (need_ref_seq(for_cow, ref_root)) | ||
847 | btrfs_qgroup_record_ref(trans, &ref->node, extent_op); | ||
848 | 855 | ||
849 | return 0; | 856 | return 0; |
850 | } | 857 | } |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 4ba9b93022ff..a764e2340d48 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -52,6 +52,7 @@ struct btrfs_delayed_ref_node { | |||
52 | 52 | ||
53 | unsigned int action:8; | 53 | unsigned int action:8; |
54 | unsigned int type:8; | 54 | unsigned int type:8; |
55 | unsigned int no_quota:1; | ||
55 | /* is this node still in the rbtree? */ | 56 | /* is this node still in the rbtree? */ |
56 | unsigned int is_head:1; | 57 | unsigned int is_head:1; |
57 | unsigned int in_tree:1; | 58 | unsigned int in_tree:1; |
@@ -196,14 +197,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
196 | u64 bytenr, u64 num_bytes, u64 parent, | 197 | u64 bytenr, u64 num_bytes, u64 parent, |
197 | u64 ref_root, int level, int action, | 198 | u64 ref_root, int level, int action, |
198 | struct btrfs_delayed_extent_op *extent_op, | 199 | struct btrfs_delayed_extent_op *extent_op, |
199 | int for_cow); | 200 | int no_quota); |
200 | int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | 201 | int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, |
201 | struct btrfs_trans_handle *trans, | 202 | struct btrfs_trans_handle *trans, |
202 | u64 bytenr, u64 num_bytes, | 203 | u64 bytenr, u64 num_bytes, |
203 | u64 parent, u64 ref_root, | 204 | u64 parent, u64 ref_root, |
204 | u64 owner, u64 offset, int action, | 205 | u64 owner, u64 offset, int action, |
205 | struct btrfs_delayed_extent_op *extent_op, | 206 | struct btrfs_delayed_extent_op *extent_op, |
206 | int for_cow); | 207 | int no_quota); |
207 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | 208 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, |
208 | struct btrfs_trans_handle *trans, | 209 | struct btrfs_trans_handle *trans, |
209 | u64 bytenr, u64 num_bytes, | 210 | u64 bytenr, u64 num_bytes, |
@@ -231,25 +232,6 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, | |||
231 | u64 seq); | 232 | u64 seq); |
232 | 233 | ||
233 | /* | 234 | /* |
234 | * delayed refs with a ref_seq > 0 must be held back during backref walking. | ||
235 | * this only applies to items in one of the fs-trees. for_cow items never need | ||
236 | * to be held back, so they won't get a ref_seq number. | ||
237 | */ | ||
238 | static inline int need_ref_seq(int for_cow, u64 rootid) | ||
239 | { | ||
240 | if (for_cow) | ||
241 | return 0; | ||
242 | |||
243 | if (rootid == BTRFS_FS_TREE_OBJECTID) | ||
244 | return 1; | ||
245 | |||
246 | if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) | ||
247 | return 1; | ||
248 | |||
249 | return 0; | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * a node might live in a head or a regular ref, this lets you | 235 | * a node might live in a head or a regular ref, this lets you |
254 | * test for the proper type to use. | 236 | * test for the proper type to use. |
255 | */ | 237 | */ |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 9f2290509aca..2af6e66fe788 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
@@ -313,7 +313,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
313 | 313 | ||
314 | if (btrfs_fs_incompat(fs_info, RAID56)) { | 314 | if (btrfs_fs_incompat(fs_info, RAID56)) { |
315 | btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6"); | 315 | btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6"); |
316 | return -EINVAL; | 316 | return -EOPNOTSUPP; |
317 | } | 317 | } |
318 | 318 | ||
319 | switch (args->start.cont_reading_from_srcdev_mode) { | 319 | switch (args->start.cont_reading_from_srcdev_mode) { |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 983314932af3..8bb4aa19898f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include "dev-replace.h" | 49 | #include "dev-replace.h" |
50 | #include "raid56.h" | 50 | #include "raid56.h" |
51 | #include "sysfs.h" | 51 | #include "sysfs.h" |
52 | #include "qgroup.h" | ||
52 | 53 | ||
53 | #ifdef CONFIG_X86 | 54 | #ifdef CONFIG_X86 |
54 | #include <asm/cpufeature.h> | 55 | #include <asm/cpufeature.h> |
@@ -1109,6 +1110,11 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | |||
1109 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | 1110 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, |
1110 | u64 bytenr, u32 blocksize) | 1111 | u64 bytenr, u32 blocksize) |
1111 | { | 1112 | { |
1113 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
1114 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
1115 | return alloc_test_extent_buffer(root->fs_info, bytenr, | ||
1116 | blocksize); | ||
1117 | #endif | ||
1112 | return alloc_extent_buffer(root->fs_info, bytenr, blocksize); | 1118 | return alloc_extent_buffer(root->fs_info, bytenr, blocksize); |
1113 | } | 1119 | } |
1114 | 1120 | ||
@@ -1201,10 +1207,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1201 | root->nodesize = nodesize; | 1207 | root->nodesize = nodesize; |
1202 | root->leafsize = leafsize; | 1208 | root->leafsize = leafsize; |
1203 | root->stripesize = stripesize; | 1209 | root->stripesize = stripesize; |
1204 | root->ref_cows = 0; | 1210 | root->state = 0; |
1205 | root->track_dirty = 0; | ||
1206 | root->in_radix = 0; | ||
1207 | root->orphan_item_inserted = 0; | ||
1208 | root->orphan_cleanup_state = 0; | 1211 | root->orphan_cleanup_state = 0; |
1209 | 1212 | ||
1210 | root->objectid = objectid; | 1213 | root->objectid = objectid; |
@@ -1265,7 +1268,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1265 | else | 1268 | else |
1266 | root->defrag_trans_start = 0; | 1269 | root->defrag_trans_start = 0; |
1267 | init_completion(&root->kobj_unregister); | 1270 | init_completion(&root->kobj_unregister); |
1268 | root->defrag_running = 0; | ||
1269 | root->root_key.objectid = objectid; | 1271 | root->root_key.objectid = objectid; |
1270 | root->anon_dev = 0; | 1272 | root->anon_dev = 0; |
1271 | 1273 | ||
@@ -1290,7 +1292,8 @@ struct btrfs_root *btrfs_alloc_dummy_root(void) | |||
1290 | if (!root) | 1292 | if (!root) |
1291 | return ERR_PTR(-ENOMEM); | 1293 | return ERR_PTR(-ENOMEM); |
1292 | __setup_root(4096, 4096, 4096, 4096, root, NULL, 1); | 1294 | __setup_root(4096, 4096, 4096, 4096, root, NULL, 1); |
1293 | root->dummy_root = 1; | 1295 | set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); |
1296 | root->alloc_bytenr = 0; | ||
1294 | 1297 | ||
1295 | return root; | 1298 | return root; |
1296 | } | 1299 | } |
@@ -1341,8 +1344,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | |||
1341 | btrfs_mark_buffer_dirty(leaf); | 1344 | btrfs_mark_buffer_dirty(leaf); |
1342 | 1345 | ||
1343 | root->commit_root = btrfs_root_node(root); | 1346 | root->commit_root = btrfs_root_node(root); |
1344 | root->track_dirty = 1; | 1347 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); |
1345 | |||
1346 | 1348 | ||
1347 | root->root_item.flags = 0; | 1349 | root->root_item.flags = 0; |
1348 | root->root_item.byte_limit = 0; | 1350 | root->root_item.byte_limit = 0; |
@@ -1371,6 +1373,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | |||
1371 | fail: | 1373 | fail: |
1372 | if (leaf) { | 1374 | if (leaf) { |
1373 | btrfs_tree_unlock(leaf); | 1375 | btrfs_tree_unlock(leaf); |
1376 | free_extent_buffer(root->commit_root); | ||
1374 | free_extent_buffer(leaf); | 1377 | free_extent_buffer(leaf); |
1375 | } | 1378 | } |
1376 | kfree(root); | 1379 | kfree(root); |
@@ -1396,13 +1399,15 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
1396 | root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; | 1399 | root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; |
1397 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; | 1400 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; |
1398 | root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; | 1401 | root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; |
1402 | |||
1399 | /* | 1403 | /* |
1404 | * DON'T set REF_COWS for log trees | ||
1405 | * | ||
1400 | * log trees do not get reference counted because they go away | 1406 | * log trees do not get reference counted because they go away |
1401 | * before a real commit is actually done. They do store pointers | 1407 | * before a real commit is actually done. They do store pointers |
1402 | * to file data extents, and those reference counts still get | 1408 | * to file data extents, and those reference counts still get |
1403 | * updated (along with back refs to the log tree). | 1409 | * updated (along with back refs to the log tree). |
1404 | */ | 1410 | */ |
1405 | root->ref_cows = 0; | ||
1406 | 1411 | ||
1407 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 1412 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
1408 | BTRFS_TREE_LOG_OBJECTID, NULL, | 1413 | BTRFS_TREE_LOG_OBJECTID, NULL, |
@@ -1536,7 +1541,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | |||
1536 | return root; | 1541 | return root; |
1537 | 1542 | ||
1538 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | 1543 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
1539 | root->ref_cows = 1; | 1544 | set_bit(BTRFS_ROOT_REF_COWS, &root->state); |
1540 | btrfs_check_and_init_root_item(&root->root_item); | 1545 | btrfs_check_and_init_root_item(&root->root_item); |
1541 | } | 1546 | } |
1542 | 1547 | ||
@@ -1606,7 +1611,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, | |||
1606 | (unsigned long)root->root_key.objectid, | 1611 | (unsigned long)root->root_key.objectid, |
1607 | root); | 1612 | root); |
1608 | if (ret == 0) | 1613 | if (ret == 0) |
1609 | root->in_radix = 1; | 1614 | set_bit(BTRFS_ROOT_IN_RADIX, &root->state); |
1610 | spin_unlock(&fs_info->fs_roots_radix_lock); | 1615 | spin_unlock(&fs_info->fs_roots_radix_lock); |
1611 | radix_tree_preload_end(); | 1616 | radix_tree_preload_end(); |
1612 | 1617 | ||
@@ -1662,7 +1667,7 @@ again: | |||
1662 | if (ret < 0) | 1667 | if (ret < 0) |
1663 | goto fail; | 1668 | goto fail; |
1664 | if (ret == 0) | 1669 | if (ret == 0) |
1665 | root->orphan_item_inserted = 1; | 1670 | set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); |
1666 | 1671 | ||
1667 | ret = btrfs_insert_fs_root(fs_info, root); | 1672 | ret = btrfs_insert_fs_root(fs_info, root); |
1668 | if (ret) { | 1673 | if (ret) { |
@@ -2064,6 +2069,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) | |||
2064 | btrfs_destroy_workqueue(fs_info->readahead_workers); | 2069 | btrfs_destroy_workqueue(fs_info->readahead_workers); |
2065 | btrfs_destroy_workqueue(fs_info->flush_workers); | 2070 | btrfs_destroy_workqueue(fs_info->flush_workers); |
2066 | btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); | 2071 | btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); |
2072 | btrfs_destroy_workqueue(fs_info->extent_workers); | ||
2067 | } | 2073 | } |
2068 | 2074 | ||
2069 | static void free_root_extent_buffers(struct btrfs_root *root) | 2075 | static void free_root_extent_buffers(struct btrfs_root *root) |
@@ -2090,7 +2096,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) | |||
2090 | free_root_extent_buffers(info->chunk_root); | 2096 | free_root_extent_buffers(info->chunk_root); |
2091 | } | 2097 | } |
2092 | 2098 | ||
2093 | static void del_fs_roots(struct btrfs_fs_info *fs_info) | 2099 | void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) |
2094 | { | 2100 | { |
2095 | int ret; | 2101 | int ret; |
2096 | struct btrfs_root *gang[8]; | 2102 | struct btrfs_root *gang[8]; |
@@ -2101,7 +2107,7 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info) | |||
2101 | struct btrfs_root, root_list); | 2107 | struct btrfs_root, root_list); |
2102 | list_del(&gang[0]->root_list); | 2108 | list_del(&gang[0]->root_list); |
2103 | 2109 | ||
2104 | if (gang[0]->in_radix) { | 2110 | if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state)) { |
2105 | btrfs_drop_and_free_fs_root(fs_info, gang[0]); | 2111 | btrfs_drop_and_free_fs_root(fs_info, gang[0]); |
2106 | } else { | 2112 | } else { |
2107 | free_extent_buffer(gang[0]->node); | 2113 | free_extent_buffer(gang[0]->node); |
@@ -2221,6 +2227,7 @@ int open_ctree(struct super_block *sb, | |||
2221 | spin_lock_init(&fs_info->free_chunk_lock); | 2227 | spin_lock_init(&fs_info->free_chunk_lock); |
2222 | spin_lock_init(&fs_info->tree_mod_seq_lock); | 2228 | spin_lock_init(&fs_info->tree_mod_seq_lock); |
2223 | spin_lock_init(&fs_info->super_lock); | 2229 | spin_lock_init(&fs_info->super_lock); |
2230 | spin_lock_init(&fs_info->qgroup_op_lock); | ||
2224 | spin_lock_init(&fs_info->buffer_lock); | 2231 | spin_lock_init(&fs_info->buffer_lock); |
2225 | rwlock_init(&fs_info->tree_mod_log_lock); | 2232 | rwlock_init(&fs_info->tree_mod_log_lock); |
2226 | mutex_init(&fs_info->reloc_mutex); | 2233 | mutex_init(&fs_info->reloc_mutex); |
@@ -2246,6 +2253,7 @@ int open_ctree(struct super_block *sb, | |||
2246 | atomic_set(&fs_info->async_submit_draining, 0); | 2253 | atomic_set(&fs_info->async_submit_draining, 0); |
2247 | atomic_set(&fs_info->nr_async_bios, 0); | 2254 | atomic_set(&fs_info->nr_async_bios, 0); |
2248 | atomic_set(&fs_info->defrag_running, 0); | 2255 | atomic_set(&fs_info->defrag_running, 0); |
2256 | atomic_set(&fs_info->qgroup_op_seq, 0); | ||
2249 | atomic64_set(&fs_info->tree_mod_seq, 0); | 2257 | atomic64_set(&fs_info->tree_mod_seq, 0); |
2250 | fs_info->sb = sb; | 2258 | fs_info->sb = sb; |
2251 | fs_info->max_inline = 8192 * 1024; | 2259 | fs_info->max_inline = 8192 * 1024; |
@@ -2291,6 +2299,7 @@ int open_ctree(struct super_block *sb, | |||
2291 | atomic_set(&fs_info->balance_cancel_req, 0); | 2299 | atomic_set(&fs_info->balance_cancel_req, 0); |
2292 | fs_info->balance_ctl = NULL; | 2300 | fs_info->balance_ctl = NULL; |
2293 | init_waitqueue_head(&fs_info->balance_wait_q); | 2301 | init_waitqueue_head(&fs_info->balance_wait_q); |
2302 | btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work); | ||
2294 | 2303 | ||
2295 | sb->s_blocksize = 4096; | 2304 | sb->s_blocksize = 4096; |
2296 | sb->s_blocksize_bits = blksize_bits(4096); | 2305 | sb->s_blocksize_bits = blksize_bits(4096); |
@@ -2354,6 +2363,7 @@ int open_ctree(struct super_block *sb, | |||
2354 | spin_lock_init(&fs_info->qgroup_lock); | 2363 | spin_lock_init(&fs_info->qgroup_lock); |
2355 | mutex_init(&fs_info->qgroup_ioctl_lock); | 2364 | mutex_init(&fs_info->qgroup_ioctl_lock); |
2356 | fs_info->qgroup_tree = RB_ROOT; | 2365 | fs_info->qgroup_tree = RB_ROOT; |
2366 | fs_info->qgroup_op_tree = RB_ROOT; | ||
2357 | INIT_LIST_HEAD(&fs_info->dirty_qgroups); | 2367 | INIT_LIST_HEAD(&fs_info->dirty_qgroups); |
2358 | fs_info->qgroup_seq = 1; | 2368 | fs_info->qgroup_seq = 1; |
2359 | fs_info->quota_enabled = 0; | 2369 | fs_info->quota_enabled = 0; |
@@ -2577,6 +2587,10 @@ int open_ctree(struct super_block *sb, | |||
2577 | btrfs_alloc_workqueue("readahead", flags, max_active, 2); | 2587 | btrfs_alloc_workqueue("readahead", flags, max_active, 2); |
2578 | fs_info->qgroup_rescan_workers = | 2588 | fs_info->qgroup_rescan_workers = |
2579 | btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); | 2589 | btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); |
2590 | fs_info->extent_workers = | ||
2591 | btrfs_alloc_workqueue("extent-refs", flags, | ||
2592 | min_t(u64, fs_devices->num_devices, | ||
2593 | max_active), 8); | ||
2580 | 2594 | ||
2581 | if (!(fs_info->workers && fs_info->delalloc_workers && | 2595 | if (!(fs_info->workers && fs_info->delalloc_workers && |
2582 | fs_info->submit_workers && fs_info->flush_workers && | 2596 | fs_info->submit_workers && fs_info->flush_workers && |
@@ -2586,6 +2600,7 @@ int open_ctree(struct super_block *sb, | |||
2586 | fs_info->endio_freespace_worker && fs_info->rmw_workers && | 2600 | fs_info->endio_freespace_worker && fs_info->rmw_workers && |
2587 | fs_info->caching_workers && fs_info->readahead_workers && | 2601 | fs_info->caching_workers && fs_info->readahead_workers && |
2588 | fs_info->fixup_workers && fs_info->delayed_workers && | 2602 | fs_info->fixup_workers && fs_info->delayed_workers && |
2603 | fs_info->fixup_workers && fs_info->extent_workers && | ||
2589 | fs_info->qgroup_rescan_workers)) { | 2604 | fs_info->qgroup_rescan_workers)) { |
2590 | err = -ENOMEM; | 2605 | err = -ENOMEM; |
2591 | goto fail_sb_buffer; | 2606 | goto fail_sb_buffer; |
@@ -2693,7 +2708,7 @@ retry_root_backup: | |||
2693 | ret = PTR_ERR(extent_root); | 2708 | ret = PTR_ERR(extent_root); |
2694 | goto recovery_tree_root; | 2709 | goto recovery_tree_root; |
2695 | } | 2710 | } |
2696 | extent_root->track_dirty = 1; | 2711 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &extent_root->state); |
2697 | fs_info->extent_root = extent_root; | 2712 | fs_info->extent_root = extent_root; |
2698 | 2713 | ||
2699 | location.objectid = BTRFS_DEV_TREE_OBJECTID; | 2714 | location.objectid = BTRFS_DEV_TREE_OBJECTID; |
@@ -2702,7 +2717,7 @@ retry_root_backup: | |||
2702 | ret = PTR_ERR(dev_root); | 2717 | ret = PTR_ERR(dev_root); |
2703 | goto recovery_tree_root; | 2718 | goto recovery_tree_root; |
2704 | } | 2719 | } |
2705 | dev_root->track_dirty = 1; | 2720 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &dev_root->state); |
2706 | fs_info->dev_root = dev_root; | 2721 | fs_info->dev_root = dev_root; |
2707 | btrfs_init_devices_late(fs_info); | 2722 | btrfs_init_devices_late(fs_info); |
2708 | 2723 | ||
@@ -2712,13 +2727,13 @@ retry_root_backup: | |||
2712 | ret = PTR_ERR(csum_root); | 2727 | ret = PTR_ERR(csum_root); |
2713 | goto recovery_tree_root; | 2728 | goto recovery_tree_root; |
2714 | } | 2729 | } |
2715 | csum_root->track_dirty = 1; | 2730 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &csum_root->state); |
2716 | fs_info->csum_root = csum_root; | 2731 | fs_info->csum_root = csum_root; |
2717 | 2732 | ||
2718 | location.objectid = BTRFS_QUOTA_TREE_OBJECTID; | 2733 | location.objectid = BTRFS_QUOTA_TREE_OBJECTID; |
2719 | quota_root = btrfs_read_tree_root(tree_root, &location); | 2734 | quota_root = btrfs_read_tree_root(tree_root, &location); |
2720 | if (!IS_ERR(quota_root)) { | 2735 | if (!IS_ERR(quota_root)) { |
2721 | quota_root->track_dirty = 1; | 2736 | set_bit(BTRFS_ROOT_TRACK_DIRTY, "a_root->state); |
2722 | fs_info->quota_enabled = 1; | 2737 | fs_info->quota_enabled = 1; |
2723 | fs_info->pending_quota_state = 1; | 2738 | fs_info->pending_quota_state = 1; |
2724 | fs_info->quota_root = quota_root; | 2739 | fs_info->quota_root = quota_root; |
@@ -2733,7 +2748,7 @@ retry_root_backup: | |||
2733 | create_uuid_tree = true; | 2748 | create_uuid_tree = true; |
2734 | check_uuid_tree = false; | 2749 | check_uuid_tree = false; |
2735 | } else { | 2750 | } else { |
2736 | uuid_root->track_dirty = 1; | 2751 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &uuid_root->state); |
2737 | fs_info->uuid_root = uuid_root; | 2752 | fs_info->uuid_root = uuid_root; |
2738 | create_uuid_tree = false; | 2753 | create_uuid_tree = false; |
2739 | check_uuid_tree = | 2754 | check_uuid_tree = |
@@ -2966,7 +2981,7 @@ fail_qgroup: | |||
2966 | fail_trans_kthread: | 2981 | fail_trans_kthread: |
2967 | kthread_stop(fs_info->transaction_kthread); | 2982 | kthread_stop(fs_info->transaction_kthread); |
2968 | btrfs_cleanup_transaction(fs_info->tree_root); | 2983 | btrfs_cleanup_transaction(fs_info->tree_root); |
2969 | del_fs_roots(fs_info); | 2984 | btrfs_free_fs_roots(fs_info); |
2970 | fail_cleaner: | 2985 | fail_cleaner: |
2971 | kthread_stop(fs_info->cleaner_kthread); | 2986 | kthread_stop(fs_info->cleaner_kthread); |
2972 | 2987 | ||
@@ -3501,8 +3516,10 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, | |||
3501 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) | 3516 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) |
3502 | btrfs_free_log(NULL, root); | 3517 | btrfs_free_log(NULL, root); |
3503 | 3518 | ||
3504 | __btrfs_remove_free_space_cache(root->free_ino_pinned); | 3519 | if (root->free_ino_pinned) |
3505 | __btrfs_remove_free_space_cache(root->free_ino_ctl); | 3520 | __btrfs_remove_free_space_cache(root->free_ino_pinned); |
3521 | if (root->free_ino_ctl) | ||
3522 | __btrfs_remove_free_space_cache(root->free_ino_ctl); | ||
3506 | free_fs_root(root); | 3523 | free_fs_root(root); |
3507 | } | 3524 | } |
3508 | 3525 | ||
@@ -3533,28 +3550,51 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
3533 | { | 3550 | { |
3534 | u64 root_objectid = 0; | 3551 | u64 root_objectid = 0; |
3535 | struct btrfs_root *gang[8]; | 3552 | struct btrfs_root *gang[8]; |
3536 | int i; | 3553 | int i = 0; |
3537 | int ret; | 3554 | int err = 0; |
3555 | unsigned int ret = 0; | ||
3556 | int index; | ||
3538 | 3557 | ||
3539 | while (1) { | 3558 | while (1) { |
3559 | index = srcu_read_lock(&fs_info->subvol_srcu); | ||
3540 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, | 3560 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, |
3541 | (void **)gang, root_objectid, | 3561 | (void **)gang, root_objectid, |
3542 | ARRAY_SIZE(gang)); | 3562 | ARRAY_SIZE(gang)); |
3543 | if (!ret) | 3563 | if (!ret) { |
3564 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
3544 | break; | 3565 | break; |
3545 | 3566 | } | |
3546 | root_objectid = gang[ret - 1]->root_key.objectid + 1; | 3567 | root_objectid = gang[ret - 1]->root_key.objectid + 1; |
3568 | |||
3547 | for (i = 0; i < ret; i++) { | 3569 | for (i = 0; i < ret; i++) { |
3548 | int err; | 3570 | /* Avoid to grab roots in dead_roots */ |
3571 | if (btrfs_root_refs(&gang[i]->root_item) == 0) { | ||
3572 | gang[i] = NULL; | ||
3573 | continue; | ||
3574 | } | ||
3575 | /* grab all the search result for later use */ | ||
3576 | gang[i] = btrfs_grab_fs_root(gang[i]); | ||
3577 | } | ||
3578 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
3549 | 3579 | ||
3580 | for (i = 0; i < ret; i++) { | ||
3581 | if (!gang[i]) | ||
3582 | continue; | ||
3550 | root_objectid = gang[i]->root_key.objectid; | 3583 | root_objectid = gang[i]->root_key.objectid; |
3551 | err = btrfs_orphan_cleanup(gang[i]); | 3584 | err = btrfs_orphan_cleanup(gang[i]); |
3552 | if (err) | 3585 | if (err) |
3553 | return err; | 3586 | break; |
3587 | btrfs_put_fs_root(gang[i]); | ||
3554 | } | 3588 | } |
3555 | root_objectid++; | 3589 | root_objectid++; |
3556 | } | 3590 | } |
3557 | return 0; | 3591 | |
3592 | /* release the uncleaned roots due to error */ | ||
3593 | for (; i < ret; i++) { | ||
3594 | if (gang[i]) | ||
3595 | btrfs_put_fs_root(gang[i]); | ||
3596 | } | ||
3597 | return err; | ||
3558 | } | 3598 | } |
3559 | 3599 | ||
3560 | int btrfs_commit_super(struct btrfs_root *root) | 3600 | int btrfs_commit_super(struct btrfs_root *root) |
@@ -3603,6 +3643,8 @@ int close_ctree(struct btrfs_root *root) | |||
3603 | /* clear out the rbtree of defraggable inodes */ | 3643 | /* clear out the rbtree of defraggable inodes */ |
3604 | btrfs_cleanup_defrag_inodes(fs_info); | 3644 | btrfs_cleanup_defrag_inodes(fs_info); |
3605 | 3645 | ||
3646 | cancel_work_sync(&fs_info->async_reclaim_work); | ||
3647 | |||
3606 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 3648 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
3607 | ret = btrfs_commit_super(root); | 3649 | ret = btrfs_commit_super(root); |
3608 | if (ret) | 3650 | if (ret) |
@@ -3627,12 +3669,17 @@ int close_ctree(struct btrfs_root *root) | |||
3627 | 3669 | ||
3628 | btrfs_sysfs_remove_one(fs_info); | 3670 | btrfs_sysfs_remove_one(fs_info); |
3629 | 3671 | ||
3630 | del_fs_roots(fs_info); | 3672 | btrfs_free_fs_roots(fs_info); |
3631 | 3673 | ||
3632 | btrfs_put_block_group_cache(fs_info); | 3674 | btrfs_put_block_group_cache(fs_info); |
3633 | 3675 | ||
3634 | btrfs_free_block_groups(fs_info); | 3676 | btrfs_free_block_groups(fs_info); |
3635 | 3677 | ||
3678 | /* | ||
3679 | * we must make sure there is not any read request to | ||
3680 | * submit after we stopping all workers. | ||
3681 | */ | ||
3682 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | ||
3636 | btrfs_stop_all_workers(fs_info); | 3683 | btrfs_stop_all_workers(fs_info); |
3637 | 3684 | ||
3638 | free_root_pointers(fs_info, 1); | 3685 | free_root_pointers(fs_info, 1); |
@@ -3709,6 +3756,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
3709 | __percpu_counter_add(&root->fs_info->dirty_metadata_bytes, | 3756 | __percpu_counter_add(&root->fs_info->dirty_metadata_bytes, |
3710 | buf->len, | 3757 | buf->len, |
3711 | root->fs_info->dirty_metadata_batch); | 3758 | root->fs_info->dirty_metadata_batch); |
3759 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | ||
3760 | if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { | ||
3761 | btrfs_print_leaf(root, buf); | ||
3762 | ASSERT(0); | ||
3763 | } | ||
3764 | #endif | ||
3712 | } | 3765 | } |
3713 | 3766 | ||
3714 | static void __btrfs_btree_balance_dirty(struct btrfs_root *root, | 3767 | static void __btrfs_btree_balance_dirty(struct btrfs_root *root, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 53059df350f8..23ce3ceba0a9 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -68,6 +68,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | |||
68 | int btrfs_init_fs_root(struct btrfs_root *root); | 68 | int btrfs_init_fs_root(struct btrfs_root *root); |
69 | int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, | 69 | int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, |
70 | struct btrfs_root *root); | 70 | struct btrfs_root *root); |
71 | void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); | ||
71 | 72 | ||
72 | struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, | 73 | struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, |
73 | struct btrfs_key *key, | 74 | struct btrfs_key *key, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5590af92094b..fafb3e53ecde 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -26,16 +26,16 @@ | |||
26 | #include <linux/ratelimit.h> | 26 | #include <linux/ratelimit.h> |
27 | #include <linux/percpu_counter.h> | 27 | #include <linux/percpu_counter.h> |
28 | #include "hash.h" | 28 | #include "hash.h" |
29 | #include "ctree.h" | 29 | #include "tree-log.h" |
30 | #include "disk-io.h" | 30 | #include "disk-io.h" |
31 | #include "print-tree.h" | 31 | #include "print-tree.h" |
32 | #include "transaction.h" | ||
33 | #include "volumes.h" | 32 | #include "volumes.h" |
34 | #include "raid56.h" | 33 | #include "raid56.h" |
35 | #include "locking.h" | 34 | #include "locking.h" |
36 | #include "free-space-cache.h" | 35 | #include "free-space-cache.h" |
37 | #include "math.h" | 36 | #include "math.h" |
38 | #include "sysfs.h" | 37 | #include "sysfs.h" |
38 | #include "qgroup.h" | ||
39 | 39 | ||
40 | #undef SCRAMBLE_DELAYED_REFS | 40 | #undef SCRAMBLE_DELAYED_REFS |
41 | 41 | ||
@@ -81,7 +81,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
81 | u64 bytenr, u64 num_bytes, u64 parent, | 81 | u64 bytenr, u64 num_bytes, u64 parent, |
82 | u64 root_objectid, u64 owner_objectid, | 82 | u64 root_objectid, u64 owner_objectid, |
83 | u64 owner_offset, int refs_to_drop, | 83 | u64 owner_offset, int refs_to_drop, |
84 | struct btrfs_delayed_extent_op *extra_op); | 84 | struct btrfs_delayed_extent_op *extra_op, |
85 | int no_quota); | ||
85 | static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, | 86 | static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, |
86 | struct extent_buffer *leaf, | 87 | struct extent_buffer *leaf, |
87 | struct btrfs_extent_item *ei); | 88 | struct btrfs_extent_item *ei); |
@@ -94,7 +95,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
94 | struct btrfs_root *root, | 95 | struct btrfs_root *root, |
95 | u64 parent, u64 root_objectid, | 96 | u64 parent, u64 root_objectid, |
96 | u64 flags, struct btrfs_disk_key *key, | 97 | u64 flags, struct btrfs_disk_key *key, |
97 | int level, struct btrfs_key *ins); | 98 | int level, struct btrfs_key *ins, |
99 | int no_quota); | ||
98 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 100 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
99 | struct btrfs_root *extent_root, u64 flags, | 101 | struct btrfs_root *extent_root, u64 flags, |
100 | int force); | 102 | int force); |
@@ -1271,7 +1273,7 @@ fail: | |||
1271 | static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, | 1273 | static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, |
1272 | struct btrfs_root *root, | 1274 | struct btrfs_root *root, |
1273 | struct btrfs_path *path, | 1275 | struct btrfs_path *path, |
1274 | int refs_to_drop) | 1276 | int refs_to_drop, int *last_ref) |
1275 | { | 1277 | { |
1276 | struct btrfs_key key; | 1278 | struct btrfs_key key; |
1277 | struct btrfs_extent_data_ref *ref1 = NULL; | 1279 | struct btrfs_extent_data_ref *ref1 = NULL; |
@@ -1307,6 +1309,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, | |||
1307 | 1309 | ||
1308 | if (num_refs == 0) { | 1310 | if (num_refs == 0) { |
1309 | ret = btrfs_del_item(trans, root, path); | 1311 | ret = btrfs_del_item(trans, root, path); |
1312 | *last_ref = 1; | ||
1310 | } else { | 1313 | } else { |
1311 | if (key.type == BTRFS_EXTENT_DATA_REF_KEY) | 1314 | if (key.type == BTRFS_EXTENT_DATA_REF_KEY) |
1312 | btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); | 1315 | btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); |
@@ -1764,7 +1767,8 @@ void update_inline_extent_backref(struct btrfs_root *root, | |||
1764 | struct btrfs_path *path, | 1767 | struct btrfs_path *path, |
1765 | struct btrfs_extent_inline_ref *iref, | 1768 | struct btrfs_extent_inline_ref *iref, |
1766 | int refs_to_mod, | 1769 | int refs_to_mod, |
1767 | struct btrfs_delayed_extent_op *extent_op) | 1770 | struct btrfs_delayed_extent_op *extent_op, |
1771 | int *last_ref) | ||
1768 | { | 1772 | { |
1769 | struct extent_buffer *leaf; | 1773 | struct extent_buffer *leaf; |
1770 | struct btrfs_extent_item *ei; | 1774 | struct btrfs_extent_item *ei; |
@@ -1808,6 +1812,7 @@ void update_inline_extent_backref(struct btrfs_root *root, | |||
1808 | else | 1812 | else |
1809 | btrfs_set_shared_data_ref_count(leaf, sref, refs); | 1813 | btrfs_set_shared_data_ref_count(leaf, sref, refs); |
1810 | } else { | 1814 | } else { |
1815 | *last_ref = 1; | ||
1811 | size = btrfs_extent_inline_ref_size(type); | 1816 | size = btrfs_extent_inline_ref_size(type); |
1812 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | 1817 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); |
1813 | ptr = (unsigned long)iref; | 1818 | ptr = (unsigned long)iref; |
@@ -1839,7 +1844,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans, | |||
1839 | if (ret == 0) { | 1844 | if (ret == 0) { |
1840 | BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); | 1845 | BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); |
1841 | update_inline_extent_backref(root, path, iref, | 1846 | update_inline_extent_backref(root, path, iref, |
1842 | refs_to_add, extent_op); | 1847 | refs_to_add, extent_op, NULL); |
1843 | } else if (ret == -ENOENT) { | 1848 | } else if (ret == -ENOENT) { |
1844 | setup_inline_extent_backref(root, path, iref, parent, | 1849 | setup_inline_extent_backref(root, path, iref, parent, |
1845 | root_objectid, owner, offset, | 1850 | root_objectid, owner, offset, |
@@ -1872,17 +1877,19 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1872 | struct btrfs_root *root, | 1877 | struct btrfs_root *root, |
1873 | struct btrfs_path *path, | 1878 | struct btrfs_path *path, |
1874 | struct btrfs_extent_inline_ref *iref, | 1879 | struct btrfs_extent_inline_ref *iref, |
1875 | int refs_to_drop, int is_data) | 1880 | int refs_to_drop, int is_data, int *last_ref) |
1876 | { | 1881 | { |
1877 | int ret = 0; | 1882 | int ret = 0; |
1878 | 1883 | ||
1879 | BUG_ON(!is_data && refs_to_drop != 1); | 1884 | BUG_ON(!is_data && refs_to_drop != 1); |
1880 | if (iref) { | 1885 | if (iref) { |
1881 | update_inline_extent_backref(root, path, iref, | 1886 | update_inline_extent_backref(root, path, iref, |
1882 | -refs_to_drop, NULL); | 1887 | -refs_to_drop, NULL, last_ref); |
1883 | } else if (is_data) { | 1888 | } else if (is_data) { |
1884 | ret = remove_extent_data_ref(trans, root, path, refs_to_drop); | 1889 | ret = remove_extent_data_ref(trans, root, path, refs_to_drop, |
1890 | last_ref); | ||
1885 | } else { | 1891 | } else { |
1892 | *last_ref = 1; | ||
1886 | ret = btrfs_del_item(trans, root, path); | 1893 | ret = btrfs_del_item(trans, root, path); |
1887 | } | 1894 | } |
1888 | return ret; | 1895 | return ret; |
@@ -1946,7 +1953,8 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1946 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 1953 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
1947 | struct btrfs_root *root, | 1954 | struct btrfs_root *root, |
1948 | u64 bytenr, u64 num_bytes, u64 parent, | 1955 | u64 bytenr, u64 num_bytes, u64 parent, |
1949 | u64 root_objectid, u64 owner, u64 offset, int for_cow) | 1956 | u64 root_objectid, u64 owner, u64 offset, |
1957 | int no_quota) | ||
1950 | { | 1958 | { |
1951 | int ret; | 1959 | int ret; |
1952 | struct btrfs_fs_info *fs_info = root->fs_info; | 1960 | struct btrfs_fs_info *fs_info = root->fs_info; |
@@ -1958,12 +1966,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1958 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, | 1966 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, |
1959 | num_bytes, | 1967 | num_bytes, |
1960 | parent, root_objectid, (int)owner, | 1968 | parent, root_objectid, (int)owner, |
1961 | BTRFS_ADD_DELAYED_REF, NULL, for_cow); | 1969 | BTRFS_ADD_DELAYED_REF, NULL, no_quota); |
1962 | } else { | 1970 | } else { |
1963 | ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, | 1971 | ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, |
1964 | num_bytes, | 1972 | num_bytes, |
1965 | parent, root_objectid, owner, offset, | 1973 | parent, root_objectid, owner, offset, |
1966 | BTRFS_ADD_DELAYED_REF, NULL, for_cow); | 1974 | BTRFS_ADD_DELAYED_REF, NULL, no_quota); |
1967 | } | 1975 | } |
1968 | return ret; | 1976 | return ret; |
1969 | } | 1977 | } |
@@ -1973,31 +1981,64 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1973 | u64 bytenr, u64 num_bytes, | 1981 | u64 bytenr, u64 num_bytes, |
1974 | u64 parent, u64 root_objectid, | 1982 | u64 parent, u64 root_objectid, |
1975 | u64 owner, u64 offset, int refs_to_add, | 1983 | u64 owner, u64 offset, int refs_to_add, |
1984 | int no_quota, | ||
1976 | struct btrfs_delayed_extent_op *extent_op) | 1985 | struct btrfs_delayed_extent_op *extent_op) |
1977 | { | 1986 | { |
1987 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1978 | struct btrfs_path *path; | 1988 | struct btrfs_path *path; |
1979 | struct extent_buffer *leaf; | 1989 | struct extent_buffer *leaf; |
1980 | struct btrfs_extent_item *item; | 1990 | struct btrfs_extent_item *item; |
1991 | struct btrfs_key key; | ||
1981 | u64 refs; | 1992 | u64 refs; |
1982 | int ret; | 1993 | int ret; |
1994 | enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL; | ||
1983 | 1995 | ||
1984 | path = btrfs_alloc_path(); | 1996 | path = btrfs_alloc_path(); |
1985 | if (!path) | 1997 | if (!path) |
1986 | return -ENOMEM; | 1998 | return -ENOMEM; |
1987 | 1999 | ||
2000 | if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled) | ||
2001 | no_quota = 1; | ||
2002 | |||
1988 | path->reada = 1; | 2003 | path->reada = 1; |
1989 | path->leave_spinning = 1; | 2004 | path->leave_spinning = 1; |
1990 | /* this will setup the path even if it fails to insert the back ref */ | 2005 | /* this will setup the path even if it fails to insert the back ref */ |
1991 | ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, | 2006 | ret = insert_inline_extent_backref(trans, fs_info->extent_root, path, |
1992 | path, bytenr, num_bytes, parent, | 2007 | bytenr, num_bytes, parent, |
1993 | root_objectid, owner, offset, | 2008 | root_objectid, owner, offset, |
1994 | refs_to_add, extent_op); | 2009 | refs_to_add, extent_op); |
1995 | if (ret != -EAGAIN) | 2010 | if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota)) |
1996 | goto out; | 2011 | goto out; |
2012 | /* | ||
2013 | * Ok we were able to insert an inline extent and it appears to be a new | ||
2014 | * reference, deal with the qgroup accounting. | ||
2015 | */ | ||
2016 | if (!ret && !no_quota) { | ||
2017 | ASSERT(root->fs_info->quota_enabled); | ||
2018 | leaf = path->nodes[0]; | ||
2019 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
2020 | item = btrfs_item_ptr(leaf, path->slots[0], | ||
2021 | struct btrfs_extent_item); | ||
2022 | if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add) | ||
2023 | type = BTRFS_QGROUP_OPER_ADD_SHARED; | ||
2024 | btrfs_release_path(path); | ||
1997 | 2025 | ||
2026 | ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, | ||
2027 | bytenr, num_bytes, type, 0); | ||
2028 | goto out; | ||
2029 | } | ||
2030 | |||
2031 | /* | ||
2032 | * Ok we had -EAGAIN which means we didn't have space to insert and | ||
2033 | * inline extent ref, so just update the reference count and add a | ||
2034 | * normal backref. | ||
2035 | */ | ||
1998 | leaf = path->nodes[0]; | 2036 | leaf = path->nodes[0]; |
2037 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
1999 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | 2038 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); |
2000 | refs = btrfs_extent_refs(leaf, item); | 2039 | refs = btrfs_extent_refs(leaf, item); |
2040 | if (refs) | ||
2041 | type = BTRFS_QGROUP_OPER_ADD_SHARED; | ||
2001 | btrfs_set_extent_refs(leaf, item, refs + refs_to_add); | 2042 | btrfs_set_extent_refs(leaf, item, refs + refs_to_add); |
2002 | if (extent_op) | 2043 | if (extent_op) |
2003 | __run_delayed_extent_op(extent_op, leaf, item); | 2044 | __run_delayed_extent_op(extent_op, leaf, item); |
@@ -2005,9 +2046,15 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
2005 | btrfs_mark_buffer_dirty(leaf); | 2046 | btrfs_mark_buffer_dirty(leaf); |
2006 | btrfs_release_path(path); | 2047 | btrfs_release_path(path); |
2007 | 2048 | ||
2049 | if (!no_quota) { | ||
2050 | ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, | ||
2051 | bytenr, num_bytes, type, 0); | ||
2052 | if (ret) | ||
2053 | goto out; | ||
2054 | } | ||
2055 | |||
2008 | path->reada = 1; | 2056 | path->reada = 1; |
2009 | path->leave_spinning = 1; | 2057 | path->leave_spinning = 1; |
2010 | |||
2011 | /* now insert the actual backref */ | 2058 | /* now insert the actual backref */ |
2012 | ret = insert_extent_backref(trans, root->fs_info->extent_root, | 2059 | ret = insert_extent_backref(trans, root->fs_info->extent_root, |
2013 | path, bytenr, parent, root_objectid, | 2060 | path, bytenr, parent, root_objectid, |
@@ -2041,8 +2088,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
2041 | 2088 | ||
2042 | if (node->type == BTRFS_SHARED_DATA_REF_KEY) | 2089 | if (node->type == BTRFS_SHARED_DATA_REF_KEY) |
2043 | parent = ref->parent; | 2090 | parent = ref->parent; |
2044 | else | 2091 | ref_root = ref->root; |
2045 | ref_root = ref->root; | ||
2046 | 2092 | ||
2047 | if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { | 2093 | if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { |
2048 | if (extent_op) | 2094 | if (extent_op) |
@@ -2056,13 +2102,13 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
2056 | node->num_bytes, parent, | 2102 | node->num_bytes, parent, |
2057 | ref_root, ref->objectid, | 2103 | ref_root, ref->objectid, |
2058 | ref->offset, node->ref_mod, | 2104 | ref->offset, node->ref_mod, |
2059 | extent_op); | 2105 | node->no_quota, extent_op); |
2060 | } else if (node->action == BTRFS_DROP_DELAYED_REF) { | 2106 | } else if (node->action == BTRFS_DROP_DELAYED_REF) { |
2061 | ret = __btrfs_free_extent(trans, root, node->bytenr, | 2107 | ret = __btrfs_free_extent(trans, root, node->bytenr, |
2062 | node->num_bytes, parent, | 2108 | node->num_bytes, parent, |
2063 | ref_root, ref->objectid, | 2109 | ref_root, ref->objectid, |
2064 | ref->offset, node->ref_mod, | 2110 | ref->offset, node->ref_mod, |
2065 | extent_op); | 2111 | extent_op, node->no_quota); |
2066 | } else { | 2112 | } else { |
2067 | BUG(); | 2113 | BUG(); |
2068 | } | 2114 | } |
@@ -2199,8 +2245,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
2199 | 2245 | ||
2200 | if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) | 2246 | if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) |
2201 | parent = ref->parent; | 2247 | parent = ref->parent; |
2202 | else | 2248 | ref_root = ref->root; |
2203 | ref_root = ref->root; | ||
2204 | 2249 | ||
2205 | ins.objectid = node->bytenr; | 2250 | ins.objectid = node->bytenr; |
2206 | if (skinny_metadata) { | 2251 | if (skinny_metadata) { |
@@ -2218,15 +2263,18 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
2218 | parent, ref_root, | 2263 | parent, ref_root, |
2219 | extent_op->flags_to_set, | 2264 | extent_op->flags_to_set, |
2220 | &extent_op->key, | 2265 | &extent_op->key, |
2221 | ref->level, &ins); | 2266 | ref->level, &ins, |
2267 | node->no_quota); | ||
2222 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | 2268 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { |
2223 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | 2269 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, |
2224 | node->num_bytes, parent, ref_root, | 2270 | node->num_bytes, parent, ref_root, |
2225 | ref->level, 0, 1, extent_op); | 2271 | ref->level, 0, 1, node->no_quota, |
2272 | extent_op); | ||
2226 | } else if (node->action == BTRFS_DROP_DELAYED_REF) { | 2273 | } else if (node->action == BTRFS_DROP_DELAYED_REF) { |
2227 | ret = __btrfs_free_extent(trans, root, node->bytenr, | 2274 | ret = __btrfs_free_extent(trans, root, node->bytenr, |
2228 | node->num_bytes, parent, ref_root, | 2275 | node->num_bytes, parent, ref_root, |
2229 | ref->level, 0, 1, extent_op); | 2276 | ref->level, 0, 1, extent_op, |
2277 | node->no_quota); | ||
2230 | } else { | 2278 | } else { |
2231 | BUG(); | 2279 | BUG(); |
2232 | } | 2280 | } |
@@ -2574,42 +2622,6 @@ static u64 find_middle(struct rb_root *root) | |||
2574 | } | 2622 | } |
2575 | #endif | 2623 | #endif |
2576 | 2624 | ||
2577 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | ||
2578 | struct btrfs_fs_info *fs_info) | ||
2579 | { | ||
2580 | struct qgroup_update *qgroup_update; | ||
2581 | int ret = 0; | ||
2582 | |||
2583 | if (list_empty(&trans->qgroup_ref_list) != | ||
2584 | !trans->delayed_ref_elem.seq) { | ||
2585 | /* list without seq or seq without list */ | ||
2586 | btrfs_err(fs_info, | ||
2587 | "qgroup accounting update error, list is%s empty, seq is %#x.%x", | ||
2588 | list_empty(&trans->qgroup_ref_list) ? "" : " not", | ||
2589 | (u32)(trans->delayed_ref_elem.seq >> 32), | ||
2590 | (u32)trans->delayed_ref_elem.seq); | ||
2591 | BUG(); | ||
2592 | } | ||
2593 | |||
2594 | if (!trans->delayed_ref_elem.seq) | ||
2595 | return 0; | ||
2596 | |||
2597 | while (!list_empty(&trans->qgroup_ref_list)) { | ||
2598 | qgroup_update = list_first_entry(&trans->qgroup_ref_list, | ||
2599 | struct qgroup_update, list); | ||
2600 | list_del(&qgroup_update->list); | ||
2601 | if (!ret) | ||
2602 | ret = btrfs_qgroup_account_ref( | ||
2603 | trans, fs_info, qgroup_update->node, | ||
2604 | qgroup_update->extent_op); | ||
2605 | kfree(qgroup_update); | ||
2606 | } | ||
2607 | |||
2608 | btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem); | ||
2609 | |||
2610 | return ret; | ||
2611 | } | ||
2612 | |||
2613 | static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) | 2625 | static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) |
2614 | { | 2626 | { |
2615 | u64 num_bytes; | 2627 | u64 num_bytes; |
@@ -2662,15 +2674,94 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, | |||
2662 | u64 num_entries = | 2674 | u64 num_entries = |
2663 | atomic_read(&trans->transaction->delayed_refs.num_entries); | 2675 | atomic_read(&trans->transaction->delayed_refs.num_entries); |
2664 | u64 avg_runtime; | 2676 | u64 avg_runtime; |
2677 | u64 val; | ||
2665 | 2678 | ||
2666 | smp_mb(); | 2679 | smp_mb(); |
2667 | avg_runtime = fs_info->avg_delayed_ref_runtime; | 2680 | avg_runtime = fs_info->avg_delayed_ref_runtime; |
2681 | val = num_entries * avg_runtime; | ||
2668 | if (num_entries * avg_runtime >= NSEC_PER_SEC) | 2682 | if (num_entries * avg_runtime >= NSEC_PER_SEC) |
2669 | return 1; | 2683 | return 1; |
2684 | if (val >= NSEC_PER_SEC / 2) | ||
2685 | return 2; | ||
2670 | 2686 | ||
2671 | return btrfs_check_space_for_delayed_refs(trans, root); | 2687 | return btrfs_check_space_for_delayed_refs(trans, root); |
2672 | } | 2688 | } |
2673 | 2689 | ||
2690 | struct async_delayed_refs { | ||
2691 | struct btrfs_root *root; | ||
2692 | int count; | ||
2693 | int error; | ||
2694 | int sync; | ||
2695 | struct completion wait; | ||
2696 | struct btrfs_work work; | ||
2697 | }; | ||
2698 | |||
2699 | static void delayed_ref_async_start(struct btrfs_work *work) | ||
2700 | { | ||
2701 | struct async_delayed_refs *async; | ||
2702 | struct btrfs_trans_handle *trans; | ||
2703 | int ret; | ||
2704 | |||
2705 | async = container_of(work, struct async_delayed_refs, work); | ||
2706 | |||
2707 | trans = btrfs_join_transaction(async->root); | ||
2708 | if (IS_ERR(trans)) { | ||
2709 | async->error = PTR_ERR(trans); | ||
2710 | goto done; | ||
2711 | } | ||
2712 | |||
2713 | /* | ||
2714 | * trans->sync means that when we call end_transaciton, we won't | ||
2715 | * wait on delayed refs | ||
2716 | */ | ||
2717 | trans->sync = true; | ||
2718 | ret = btrfs_run_delayed_refs(trans, async->root, async->count); | ||
2719 | if (ret) | ||
2720 | async->error = ret; | ||
2721 | |||
2722 | ret = btrfs_end_transaction(trans, async->root); | ||
2723 | if (ret && !async->error) | ||
2724 | async->error = ret; | ||
2725 | done: | ||
2726 | if (async->sync) | ||
2727 | complete(&async->wait); | ||
2728 | else | ||
2729 | kfree(async); | ||
2730 | } | ||
2731 | |||
2732 | int btrfs_async_run_delayed_refs(struct btrfs_root *root, | ||
2733 | unsigned long count, int wait) | ||
2734 | { | ||
2735 | struct async_delayed_refs *async; | ||
2736 | int ret; | ||
2737 | |||
2738 | async = kmalloc(sizeof(*async), GFP_NOFS); | ||
2739 | if (!async) | ||
2740 | return -ENOMEM; | ||
2741 | |||
2742 | async->root = root->fs_info->tree_root; | ||
2743 | async->count = count; | ||
2744 | async->error = 0; | ||
2745 | if (wait) | ||
2746 | async->sync = 1; | ||
2747 | else | ||
2748 | async->sync = 0; | ||
2749 | init_completion(&async->wait); | ||
2750 | |||
2751 | btrfs_init_work(&async->work, delayed_ref_async_start, | ||
2752 | NULL, NULL); | ||
2753 | |||
2754 | btrfs_queue_work(root->fs_info->extent_workers, &async->work); | ||
2755 | |||
2756 | if (wait) { | ||
2757 | wait_for_completion(&async->wait); | ||
2758 | ret = async->error; | ||
2759 | kfree(async); | ||
2760 | return ret; | ||
2761 | } | ||
2762 | return 0; | ||
2763 | } | ||
2764 | |||
2674 | /* | 2765 | /* |
2675 | * this starts processing the delayed reference count updates and | 2766 | * this starts processing the delayed reference count updates and |
2676 | * extent insertions we have queued up so far. count can be | 2767 | * extent insertions we have queued up so far. count can be |
@@ -2698,8 +2789,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2698 | if (root == root->fs_info->extent_root) | 2789 | if (root == root->fs_info->extent_root) |
2699 | root = root->fs_info->tree_root; | 2790 | root = root->fs_info->tree_root; |
2700 | 2791 | ||
2701 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
2702 | |||
2703 | delayed_refs = &trans->transaction->delayed_refs; | 2792 | delayed_refs = &trans->transaction->delayed_refs; |
2704 | if (count == 0) { | 2793 | if (count == 0) { |
2705 | count = atomic_read(&delayed_refs->num_entries) * 2; | 2794 | count = atomic_read(&delayed_refs->num_entries) * 2; |
@@ -2758,6 +2847,9 @@ again: | |||
2758 | goto again; | 2847 | goto again; |
2759 | } | 2848 | } |
2760 | out: | 2849 | out: |
2850 | ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info); | ||
2851 | if (ret) | ||
2852 | return ret; | ||
2761 | assert_qgroups_uptodate(trans); | 2853 | assert_qgroups_uptodate(trans); |
2762 | return 0; | 2854 | return 0; |
2763 | } | 2855 | } |
@@ -2964,7 +3056,7 @@ out: | |||
2964 | static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | 3056 | static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, |
2965 | struct btrfs_root *root, | 3057 | struct btrfs_root *root, |
2966 | struct extent_buffer *buf, | 3058 | struct extent_buffer *buf, |
2967 | int full_backref, int inc, int for_cow) | 3059 | int full_backref, int inc, int no_quota) |
2968 | { | 3060 | { |
2969 | u64 bytenr; | 3061 | u64 bytenr; |
2970 | u64 num_bytes; | 3062 | u64 num_bytes; |
@@ -2979,11 +3071,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
2979 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, | 3071 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, |
2980 | u64, u64, u64, u64, u64, u64, int); | 3072 | u64, u64, u64, u64, u64, u64, int); |
2981 | 3073 | ||
3074 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
3075 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
3076 | return 0; | ||
3077 | #endif | ||
2982 | ref_root = btrfs_header_owner(buf); | 3078 | ref_root = btrfs_header_owner(buf); |
2983 | nritems = btrfs_header_nritems(buf); | 3079 | nritems = btrfs_header_nritems(buf); |
2984 | level = btrfs_header_level(buf); | 3080 | level = btrfs_header_level(buf); |
2985 | 3081 | ||
2986 | if (!root->ref_cows && level == 0) | 3082 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0) |
2987 | return 0; | 3083 | return 0; |
2988 | 3084 | ||
2989 | if (inc) | 3085 | if (inc) |
@@ -3014,7 +3110,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
3014 | key.offset -= btrfs_file_extent_offset(buf, fi); | 3110 | key.offset -= btrfs_file_extent_offset(buf, fi); |
3015 | ret = process_func(trans, root, bytenr, num_bytes, | 3111 | ret = process_func(trans, root, bytenr, num_bytes, |
3016 | parent, ref_root, key.objectid, | 3112 | parent, ref_root, key.objectid, |
3017 | key.offset, for_cow); | 3113 | key.offset, no_quota); |
3018 | if (ret) | 3114 | if (ret) |
3019 | goto fail; | 3115 | goto fail; |
3020 | } else { | 3116 | } else { |
@@ -3022,7 +3118,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
3022 | num_bytes = btrfs_level_size(root, level - 1); | 3118 | num_bytes = btrfs_level_size(root, level - 1); |
3023 | ret = process_func(trans, root, bytenr, num_bytes, | 3119 | ret = process_func(trans, root, bytenr, num_bytes, |
3024 | parent, ref_root, level - 1, 0, | 3120 | parent, ref_root, level - 1, 0, |
3025 | for_cow); | 3121 | no_quota); |
3026 | if (ret) | 3122 | if (ret) |
3027 | goto fail; | 3123 | goto fail; |
3028 | } | 3124 | } |
@@ -3033,15 +3129,15 @@ fail: | |||
3033 | } | 3129 | } |
3034 | 3130 | ||
3035 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 3131 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
3036 | struct extent_buffer *buf, int full_backref, int for_cow) | 3132 | struct extent_buffer *buf, int full_backref, int no_quota) |
3037 | { | 3133 | { |
3038 | return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow); | 3134 | return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota); |
3039 | } | 3135 | } |
3040 | 3136 | ||
3041 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 3137 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
3042 | struct extent_buffer *buf, int full_backref, int for_cow) | 3138 | struct extent_buffer *buf, int full_backref, int no_quota) |
3043 | { | 3139 | { |
3044 | return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow); | 3140 | return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota); |
3045 | } | 3141 | } |
3046 | 3142 | ||
3047 | static int write_one_cache_group(struct btrfs_trans_handle *trans, | 3143 | static int write_one_cache_group(struct btrfs_trans_handle *trans, |
@@ -3401,10 +3497,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3401 | return ret; | 3497 | return ret; |
3402 | } | 3498 | } |
3403 | 3499 | ||
3404 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { | 3500 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
3405 | INIT_LIST_HEAD(&found->block_groups[i]); | 3501 | INIT_LIST_HEAD(&found->block_groups[i]); |
3406 | kobject_init(&found->block_group_kobjs[i], &btrfs_raid_ktype); | ||
3407 | } | ||
3408 | init_rwsem(&found->groups_sem); | 3502 | init_rwsem(&found->groups_sem); |
3409 | spin_lock_init(&found->lock); | 3503 | spin_lock_init(&found->lock); |
3410 | found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; | 3504 | found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; |
@@ -4204,6 +4298,104 @@ static int flush_space(struct btrfs_root *root, | |||
4204 | 4298 | ||
4205 | return ret; | 4299 | return ret; |
4206 | } | 4300 | } |
4301 | |||
4302 | static inline u64 | ||
4303 | btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, | ||
4304 | struct btrfs_space_info *space_info) | ||
4305 | { | ||
4306 | u64 used; | ||
4307 | u64 expected; | ||
4308 | u64 to_reclaim; | ||
4309 | |||
4310 | to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024, | ||
4311 | 16 * 1024 * 1024); | ||
4312 | spin_lock(&space_info->lock); | ||
4313 | if (can_overcommit(root, space_info, to_reclaim, | ||
4314 | BTRFS_RESERVE_FLUSH_ALL)) { | ||
4315 | to_reclaim = 0; | ||
4316 | goto out; | ||
4317 | } | ||
4318 | |||
4319 | used = space_info->bytes_used + space_info->bytes_reserved + | ||
4320 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
4321 | space_info->bytes_may_use; | ||
4322 | if (can_overcommit(root, space_info, 1024 * 1024, | ||
4323 | BTRFS_RESERVE_FLUSH_ALL)) | ||
4324 | expected = div_factor_fine(space_info->total_bytes, 95); | ||
4325 | else | ||
4326 | expected = div_factor_fine(space_info->total_bytes, 90); | ||
4327 | |||
4328 | if (used > expected) | ||
4329 | to_reclaim = used - expected; | ||
4330 | else | ||
4331 | to_reclaim = 0; | ||
4332 | to_reclaim = min(to_reclaim, space_info->bytes_may_use + | ||
4333 | space_info->bytes_reserved); | ||
4334 | out: | ||
4335 | spin_unlock(&space_info->lock); | ||
4336 | |||
4337 | return to_reclaim; | ||
4338 | } | ||
4339 | |||
4340 | static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, | ||
4341 | struct btrfs_fs_info *fs_info, u64 used) | ||
4342 | { | ||
4343 | return (used >= div_factor_fine(space_info->total_bytes, 98) && | ||
4344 | !btrfs_fs_closing(fs_info) && | ||
4345 | !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); | ||
4346 | } | ||
4347 | |||
4348 | static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info, | ||
4349 | struct btrfs_fs_info *fs_info) | ||
4350 | { | ||
4351 | u64 used; | ||
4352 | |||
4353 | spin_lock(&space_info->lock); | ||
4354 | used = space_info->bytes_used + space_info->bytes_reserved + | ||
4355 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
4356 | space_info->bytes_may_use; | ||
4357 | if (need_do_async_reclaim(space_info, fs_info, used)) { | ||
4358 | spin_unlock(&space_info->lock); | ||
4359 | return 1; | ||
4360 | } | ||
4361 | spin_unlock(&space_info->lock); | ||
4362 | |||
4363 | return 0; | ||
4364 | } | ||
4365 | |||
4366 | static void btrfs_async_reclaim_metadata_space(struct work_struct *work) | ||
4367 | { | ||
4368 | struct btrfs_fs_info *fs_info; | ||
4369 | struct btrfs_space_info *space_info; | ||
4370 | u64 to_reclaim; | ||
4371 | int flush_state; | ||
4372 | |||
4373 | fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); | ||
4374 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
4375 | |||
4376 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, | ||
4377 | space_info); | ||
4378 | if (!to_reclaim) | ||
4379 | return; | ||
4380 | |||
4381 | flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
4382 | do { | ||
4383 | flush_space(fs_info->fs_root, space_info, to_reclaim, | ||
4384 | to_reclaim, flush_state); | ||
4385 | flush_state++; | ||
4386 | if (!btrfs_need_do_async_reclaim(space_info, fs_info)) | ||
4387 | return; | ||
4388 | } while (flush_state <= COMMIT_TRANS); | ||
4389 | |||
4390 | if (btrfs_need_do_async_reclaim(space_info, fs_info)) | ||
4391 | queue_work(system_unbound_wq, work); | ||
4392 | } | ||
4393 | |||
4394 | void btrfs_init_async_reclaim_work(struct work_struct *work) | ||
4395 | { | ||
4396 | INIT_WORK(work, btrfs_async_reclaim_metadata_space); | ||
4397 | } | ||
4398 | |||
4207 | /** | 4399 | /** |
4208 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | 4400 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space |
4209 | * @root - the root we're allocating for | 4401 | * @root - the root we're allocating for |
@@ -4311,8 +4503,13 @@ again: | |||
4311 | if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { | 4503 | if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { |
4312 | flushing = true; | 4504 | flushing = true; |
4313 | space_info->flush = 1; | 4505 | space_info->flush = 1; |
4506 | } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
4507 | used += orig_bytes; | ||
4508 | if (need_do_async_reclaim(space_info, root->fs_info, used) && | ||
4509 | !work_busy(&root->fs_info->async_reclaim_work)) | ||
4510 | queue_work(system_unbound_wq, | ||
4511 | &root->fs_info->async_reclaim_work); | ||
4314 | } | 4512 | } |
4315 | |||
4316 | spin_unlock(&space_info->lock); | 4513 | spin_unlock(&space_info->lock); |
4317 | 4514 | ||
4318 | if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) | 4515 | if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) |
@@ -4369,7 +4566,7 @@ static struct btrfs_block_rsv *get_block_rsv( | |||
4369 | { | 4566 | { |
4370 | struct btrfs_block_rsv *block_rsv = NULL; | 4567 | struct btrfs_block_rsv *block_rsv = NULL; |
4371 | 4568 | ||
4372 | if (root->ref_cows) | 4569 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
4373 | block_rsv = trans->block_rsv; | 4570 | block_rsv = trans->block_rsv; |
4374 | 4571 | ||
4375 | if (root == root->fs_info->csum_root && trans->adding_csums) | 4572 | if (root == root->fs_info->csum_root && trans->adding_csums) |
@@ -5621,7 +5818,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5621 | u64 bytenr, u64 num_bytes, u64 parent, | 5818 | u64 bytenr, u64 num_bytes, u64 parent, |
5622 | u64 root_objectid, u64 owner_objectid, | 5819 | u64 root_objectid, u64 owner_objectid, |
5623 | u64 owner_offset, int refs_to_drop, | 5820 | u64 owner_offset, int refs_to_drop, |
5624 | struct btrfs_delayed_extent_op *extent_op) | 5821 | struct btrfs_delayed_extent_op *extent_op, |
5822 | int no_quota) | ||
5625 | { | 5823 | { |
5626 | struct btrfs_key key; | 5824 | struct btrfs_key key; |
5627 | struct btrfs_path *path; | 5825 | struct btrfs_path *path; |
@@ -5637,9 +5835,14 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5637 | int num_to_del = 1; | 5835 | int num_to_del = 1; |
5638 | u32 item_size; | 5836 | u32 item_size; |
5639 | u64 refs; | 5837 | u64 refs; |
5838 | int last_ref = 0; | ||
5839 | enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL; | ||
5640 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, | 5840 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, |
5641 | SKINNY_METADATA); | 5841 | SKINNY_METADATA); |
5642 | 5842 | ||
5843 | if (!info->quota_enabled || !is_fstree(root_objectid)) | ||
5844 | no_quota = 1; | ||
5845 | |||
5643 | path = btrfs_alloc_path(); | 5846 | path = btrfs_alloc_path(); |
5644 | if (!path) | 5847 | if (!path) |
5645 | return -ENOMEM; | 5848 | return -ENOMEM; |
@@ -5687,7 +5890,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5687 | BUG_ON(iref); | 5890 | BUG_ON(iref); |
5688 | ret = remove_extent_backref(trans, extent_root, path, | 5891 | ret = remove_extent_backref(trans, extent_root, path, |
5689 | NULL, refs_to_drop, | 5892 | NULL, refs_to_drop, |
5690 | is_data); | 5893 | is_data, &last_ref); |
5691 | if (ret) { | 5894 | if (ret) { |
5692 | btrfs_abort_transaction(trans, extent_root, ret); | 5895 | btrfs_abort_transaction(trans, extent_root, ret); |
5693 | goto out; | 5896 | goto out; |
@@ -5806,7 +6009,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5806 | refs = btrfs_extent_refs(leaf, ei); | 6009 | refs = btrfs_extent_refs(leaf, ei); |
5807 | if (refs < refs_to_drop) { | 6010 | if (refs < refs_to_drop) { |
5808 | btrfs_err(info, "trying to drop %d refs but we only have %Lu " | 6011 | btrfs_err(info, "trying to drop %d refs but we only have %Lu " |
5809 | "for bytenr %Lu\n", refs_to_drop, refs, bytenr); | 6012 | "for bytenr %Lu", refs_to_drop, refs, bytenr); |
5810 | ret = -EINVAL; | 6013 | ret = -EINVAL; |
5811 | btrfs_abort_transaction(trans, extent_root, ret); | 6014 | btrfs_abort_transaction(trans, extent_root, ret); |
5812 | goto out; | 6015 | goto out; |
@@ -5814,6 +6017,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5814 | refs -= refs_to_drop; | 6017 | refs -= refs_to_drop; |
5815 | 6018 | ||
5816 | if (refs > 0) { | 6019 | if (refs > 0) { |
6020 | type = BTRFS_QGROUP_OPER_SUB_SHARED; | ||
5817 | if (extent_op) | 6021 | if (extent_op) |
5818 | __run_delayed_extent_op(extent_op, leaf, ei); | 6022 | __run_delayed_extent_op(extent_op, leaf, ei); |
5819 | /* | 6023 | /* |
@@ -5829,7 +6033,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5829 | if (found_extent) { | 6033 | if (found_extent) { |
5830 | ret = remove_extent_backref(trans, extent_root, path, | 6034 | ret = remove_extent_backref(trans, extent_root, path, |
5831 | iref, refs_to_drop, | 6035 | iref, refs_to_drop, |
5832 | is_data); | 6036 | is_data, &last_ref); |
5833 | if (ret) { | 6037 | if (ret) { |
5834 | btrfs_abort_transaction(trans, extent_root, ret); | 6038 | btrfs_abort_transaction(trans, extent_root, ret); |
5835 | goto out; | 6039 | goto out; |
@@ -5850,6 +6054,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5850 | } | 6054 | } |
5851 | } | 6055 | } |
5852 | 6056 | ||
6057 | last_ref = 1; | ||
5853 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 6058 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
5854 | num_to_del); | 6059 | num_to_del); |
5855 | if (ret) { | 6060 | if (ret) { |
@@ -5872,6 +6077,20 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5872 | goto out; | 6077 | goto out; |
5873 | } | 6078 | } |
5874 | } | 6079 | } |
6080 | btrfs_release_path(path); | ||
6081 | |||
6082 | /* Deal with the quota accounting */ | ||
6083 | if (!ret && last_ref && !no_quota) { | ||
6084 | int mod_seq = 0; | ||
6085 | |||
6086 | if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID && | ||
6087 | type == BTRFS_QGROUP_OPER_SUB_SHARED) | ||
6088 | mod_seq = 1; | ||
6089 | |||
6090 | ret = btrfs_qgroup_record_ref(trans, info, root_objectid, | ||
6091 | bytenr, num_bytes, type, | ||
6092 | mod_seq); | ||
6093 | } | ||
5875 | out: | 6094 | out: |
5876 | btrfs_free_path(path); | 6095 | btrfs_free_path(path); |
5877 | return ret; | 6096 | return ret; |
@@ -6008,11 +6227,15 @@ out: | |||
6008 | /* Can return -ENOMEM */ | 6227 | /* Can return -ENOMEM */ |
6009 | int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 6228 | int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
6010 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, | 6229 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, |
6011 | u64 owner, u64 offset, int for_cow) | 6230 | u64 owner, u64 offset, int no_quota) |
6012 | { | 6231 | { |
6013 | int ret; | 6232 | int ret; |
6014 | struct btrfs_fs_info *fs_info = root->fs_info; | 6233 | struct btrfs_fs_info *fs_info = root->fs_info; |
6015 | 6234 | ||
6235 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
6236 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
6237 | return 0; | ||
6238 | #endif | ||
6016 | add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); | 6239 | add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); |
6017 | 6240 | ||
6018 | /* | 6241 | /* |
@@ -6028,13 +6251,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
6028 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, | 6251 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, |
6029 | num_bytes, | 6252 | num_bytes, |
6030 | parent, root_objectid, (int)owner, | 6253 | parent, root_objectid, (int)owner, |
6031 | BTRFS_DROP_DELAYED_REF, NULL, for_cow); | 6254 | BTRFS_DROP_DELAYED_REF, NULL, no_quota); |
6032 | } else { | 6255 | } else { |
6033 | ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, | 6256 | ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, |
6034 | num_bytes, | 6257 | num_bytes, |
6035 | parent, root_objectid, owner, | 6258 | parent, root_objectid, owner, |
6036 | offset, BTRFS_DROP_DELAYED_REF, | 6259 | offset, BTRFS_DROP_DELAYED_REF, |
6037 | NULL, for_cow); | 6260 | NULL, no_quota); |
6038 | } | 6261 | } |
6039 | return ret; | 6262 | return ret; |
6040 | } | 6263 | } |
@@ -6514,8 +6737,14 @@ loop: | |||
6514 | loop++; | 6737 | loop++; |
6515 | if (loop == LOOP_ALLOC_CHUNK) { | 6738 | if (loop == LOOP_ALLOC_CHUNK) { |
6516 | struct btrfs_trans_handle *trans; | 6739 | struct btrfs_trans_handle *trans; |
6740 | int exist = 0; | ||
6741 | |||
6742 | trans = current->journal_info; | ||
6743 | if (trans) | ||
6744 | exist = 1; | ||
6745 | else | ||
6746 | trans = btrfs_join_transaction(root); | ||
6517 | 6747 | ||
6518 | trans = btrfs_join_transaction(root); | ||
6519 | if (IS_ERR(trans)) { | 6748 | if (IS_ERR(trans)) { |
6520 | ret = PTR_ERR(trans); | 6749 | ret = PTR_ERR(trans); |
6521 | goto out; | 6750 | goto out; |
@@ -6532,7 +6761,8 @@ loop: | |||
6532 | root, ret); | 6761 | root, ret); |
6533 | else | 6762 | else |
6534 | ret = 0; | 6763 | ret = 0; |
6535 | btrfs_end_transaction(trans, root); | 6764 | if (!exist) |
6765 | btrfs_end_transaction(trans, root); | ||
6536 | if (ret) | 6766 | if (ret) |
6537 | goto out; | 6767 | goto out; |
6538 | } | 6768 | } |
@@ -6733,6 +6963,13 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
6733 | btrfs_mark_buffer_dirty(path->nodes[0]); | 6963 | btrfs_mark_buffer_dirty(path->nodes[0]); |
6734 | btrfs_free_path(path); | 6964 | btrfs_free_path(path); |
6735 | 6965 | ||
6966 | /* Always set parent to 0 here since its exclusive anyway. */ | ||
6967 | ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, | ||
6968 | ins->objectid, ins->offset, | ||
6969 | BTRFS_QGROUP_OPER_ADD_EXCL, 0); | ||
6970 | if (ret) | ||
6971 | return ret; | ||
6972 | |||
6736 | ret = update_block_group(root, ins->objectid, ins->offset, 1); | 6973 | ret = update_block_group(root, ins->objectid, ins->offset, 1); |
6737 | if (ret) { /* -ENOENT, logic error */ | 6974 | if (ret) { /* -ENOENT, logic error */ |
6738 | btrfs_err(fs_info, "update block group failed for %llu %llu", | 6975 | btrfs_err(fs_info, "update block group failed for %llu %llu", |
@@ -6747,7 +6984,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
6747 | struct btrfs_root *root, | 6984 | struct btrfs_root *root, |
6748 | u64 parent, u64 root_objectid, | 6985 | u64 parent, u64 root_objectid, |
6749 | u64 flags, struct btrfs_disk_key *key, | 6986 | u64 flags, struct btrfs_disk_key *key, |
6750 | int level, struct btrfs_key *ins) | 6987 | int level, struct btrfs_key *ins, |
6988 | int no_quota) | ||
6751 | { | 6989 | { |
6752 | int ret; | 6990 | int ret; |
6753 | struct btrfs_fs_info *fs_info = root->fs_info; | 6991 | struct btrfs_fs_info *fs_info = root->fs_info; |
@@ -6757,6 +6995,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
6757 | struct btrfs_path *path; | 6995 | struct btrfs_path *path; |
6758 | struct extent_buffer *leaf; | 6996 | struct extent_buffer *leaf; |
6759 | u32 size = sizeof(*extent_item) + sizeof(*iref); | 6997 | u32 size = sizeof(*extent_item) + sizeof(*iref); |
6998 | u64 num_bytes = ins->offset; | ||
6760 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, | 6999 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, |
6761 | SKINNY_METADATA); | 7000 | SKINNY_METADATA); |
6762 | 7001 | ||
@@ -6790,6 +7029,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
6790 | 7029 | ||
6791 | if (skinny_metadata) { | 7030 | if (skinny_metadata) { |
6792 | iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); | 7031 | iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); |
7032 | num_bytes = root->leafsize; | ||
6793 | } else { | 7033 | } else { |
6794 | block_info = (struct btrfs_tree_block_info *)(extent_item + 1); | 7034 | block_info = (struct btrfs_tree_block_info *)(extent_item + 1); |
6795 | btrfs_set_tree_block_key(leaf, block_info, key); | 7035 | btrfs_set_tree_block_key(leaf, block_info, key); |
@@ -6811,6 +7051,14 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
6811 | btrfs_mark_buffer_dirty(leaf); | 7051 | btrfs_mark_buffer_dirty(leaf); |
6812 | btrfs_free_path(path); | 7052 | btrfs_free_path(path); |
6813 | 7053 | ||
7054 | if (!no_quota) { | ||
7055 | ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, | ||
7056 | ins->objectid, num_bytes, | ||
7057 | BTRFS_QGROUP_OPER_ADD_EXCL, 0); | ||
7058 | if (ret) | ||
7059 | return ret; | ||
7060 | } | ||
7061 | |||
6814 | ret = update_block_group(root, ins->objectid, root->leafsize, 1); | 7062 | ret = update_block_group(root, ins->objectid, root->leafsize, 1); |
6815 | if (ret) { /* -ENOENT, logic error */ | 7063 | if (ret) { /* -ENOENT, logic error */ |
6816 | btrfs_err(fs_info, "update block group failed for %llu %llu", | 7064 | btrfs_err(fs_info, "update block group failed for %llu %llu", |
@@ -6994,6 +7242,15 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
6994 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, | 7242 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, |
6995 | SKINNY_METADATA); | 7243 | SKINNY_METADATA); |
6996 | 7244 | ||
7245 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
7246 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) { | ||
7247 | buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, | ||
7248 | blocksize, level); | ||
7249 | if (!IS_ERR(buf)) | ||
7250 | root->alloc_bytenr += blocksize; | ||
7251 | return buf; | ||
7252 | } | ||
7253 | #endif | ||
6997 | block_rsv = use_block_rsv(trans, root, blocksize); | 7254 | block_rsv = use_block_rsv(trans, root, blocksize); |
6998 | if (IS_ERR(block_rsv)) | 7255 | if (IS_ERR(block_rsv)) |
6999 | return ERR_CAST(block_rsv); | 7256 | return ERR_CAST(block_rsv); |
@@ -7735,7 +7992,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7735 | } | 7992 | } |
7736 | } | 7993 | } |
7737 | 7994 | ||
7738 | if (root->in_radix) { | 7995 | if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) { |
7739 | btrfs_drop_and_free_fs_root(tree_root->fs_info, root); | 7996 | btrfs_drop_and_free_fs_root(tree_root->fs_info, root); |
7740 | } else { | 7997 | } else { |
7741 | free_extent_buffer(root->node); | 7998 | free_extent_buffer(root->node); |
@@ -8327,8 +8584,9 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
8327 | list_del(&space_info->list); | 8584 | list_del(&space_info->list); |
8328 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { | 8585 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { |
8329 | struct kobject *kobj; | 8586 | struct kobject *kobj; |
8330 | kobj = &space_info->block_group_kobjs[i]; | 8587 | kobj = space_info->block_group_kobjs[i]; |
8331 | if (kobj->parent) { | 8588 | space_info->block_group_kobjs[i] = NULL; |
8589 | if (kobj) { | ||
8332 | kobject_del(kobj); | 8590 | kobject_del(kobj); |
8333 | kobject_put(kobj); | 8591 | kobject_put(kobj); |
8334 | } | 8592 | } |
@@ -8352,17 +8610,26 @@ static void __link_block_group(struct btrfs_space_info *space_info, | |||
8352 | up_write(&space_info->groups_sem); | 8610 | up_write(&space_info->groups_sem); |
8353 | 8611 | ||
8354 | if (first) { | 8612 | if (first) { |
8355 | struct kobject *kobj = &space_info->block_group_kobjs[index]; | 8613 | struct raid_kobject *rkobj; |
8356 | int ret; | 8614 | int ret; |
8357 | 8615 | ||
8358 | kobject_get(&space_info->kobj); /* put in release */ | 8616 | rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS); |
8359 | ret = kobject_add(kobj, &space_info->kobj, "%s", | 8617 | if (!rkobj) |
8360 | get_raid_name(index)); | 8618 | goto out_err; |
8619 | rkobj->raid_type = index; | ||
8620 | kobject_init(&rkobj->kobj, &btrfs_raid_ktype); | ||
8621 | ret = kobject_add(&rkobj->kobj, &space_info->kobj, | ||
8622 | "%s", get_raid_name(index)); | ||
8361 | if (ret) { | 8623 | if (ret) { |
8362 | pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n"); | 8624 | kobject_put(&rkobj->kobj); |
8363 | kobject_put(&space_info->kobj); | 8625 | goto out_err; |
8364 | } | 8626 | } |
8627 | space_info->block_group_kobjs[index] = &rkobj->kobj; | ||
8365 | } | 8628 | } |
8629 | |||
8630 | return; | ||
8631 | out_err: | ||
8632 | pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n"); | ||
8366 | } | 8633 | } |
8367 | 8634 | ||
8368 | static struct btrfs_block_group_cache * | 8635 | static struct btrfs_block_group_cache * |
@@ -8611,7 +8878,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
8611 | 8878 | ||
8612 | extent_root = root->fs_info->extent_root; | 8879 | extent_root = root->fs_info->extent_root; |
8613 | 8880 | ||
8614 | root->fs_info->last_trans_log_full_commit = trans->transid; | 8881 | btrfs_set_log_full_commit(root->fs_info, trans); |
8615 | 8882 | ||
8616 | cache = btrfs_create_block_group_cache(root, chunk_offset, size); | 8883 | cache = btrfs_create_block_group_cache(root, chunk_offset, size); |
8617 | if (!cache) | 8884 | if (!cache) |
@@ -8697,6 +8964,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8697 | struct btrfs_root *tree_root = root->fs_info->tree_root; | 8964 | struct btrfs_root *tree_root = root->fs_info->tree_root; |
8698 | struct btrfs_key key; | 8965 | struct btrfs_key key; |
8699 | struct inode *inode; | 8966 | struct inode *inode; |
8967 | struct kobject *kobj = NULL; | ||
8700 | int ret; | 8968 | int ret; |
8701 | int index; | 8969 | int index; |
8702 | int factor; | 8970 | int factor; |
@@ -8796,11 +9064,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8796 | */ | 9064 | */ |
8797 | list_del_init(&block_group->list); | 9065 | list_del_init(&block_group->list); |
8798 | if (list_empty(&block_group->space_info->block_groups[index])) { | 9066 | if (list_empty(&block_group->space_info->block_groups[index])) { |
8799 | kobject_del(&block_group->space_info->block_group_kobjs[index]); | 9067 | kobj = block_group->space_info->block_group_kobjs[index]; |
8800 | kobject_put(&block_group->space_info->block_group_kobjs[index]); | 9068 | block_group->space_info->block_group_kobjs[index] = NULL; |
8801 | clear_avail_alloc_bits(root->fs_info, block_group->flags); | 9069 | clear_avail_alloc_bits(root->fs_info, block_group->flags); |
8802 | } | 9070 | } |
8803 | up_write(&block_group->space_info->groups_sem); | 9071 | up_write(&block_group->space_info->groups_sem); |
9072 | if (kobj) { | ||
9073 | kobject_del(kobj); | ||
9074 | kobject_put(kobj); | ||
9075 | } | ||
8804 | 9076 | ||
8805 | if (block_group->cached == BTRFS_CACHE_STARTED) | 9077 | if (block_group->cached == BTRFS_CACHE_STARTED) |
8806 | wait_block_group_cache_done(block_group); | 9078 | wait_block_group_cache_done(block_group); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3955e475ceec..f25a9092b946 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1693,6 +1693,7 @@ again: | |||
1693 | * shortening the size of the delalloc range we're searching | 1693 | * shortening the size of the delalloc range we're searching |
1694 | */ | 1694 | */ |
1695 | free_extent_state(cached_state); | 1695 | free_extent_state(cached_state); |
1696 | cached_state = NULL; | ||
1696 | if (!loops) { | 1697 | if (!loops) { |
1697 | max_bytes = PAGE_CACHE_SIZE; | 1698 | max_bytes = PAGE_CACHE_SIZE; |
1698 | loops = 1; | 1699 | loops = 1; |
@@ -2367,6 +2368,8 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) | |||
2367 | if (!uptodate) { | 2368 | if (!uptodate) { |
2368 | ClearPageUptodate(page); | 2369 | ClearPageUptodate(page); |
2369 | SetPageError(page); | 2370 | SetPageError(page); |
2371 | ret = ret < 0 ? ret : -EIO; | ||
2372 | mapping_set_error(page->mapping, ret); | ||
2370 | } | 2373 | } |
2371 | return 0; | 2374 | return 0; |
2372 | } | 2375 | } |
@@ -3098,143 +3101,130 @@ static noinline void update_nr_written(struct page *page, | |||
3098 | } | 3101 | } |
3099 | 3102 | ||
3100 | /* | 3103 | /* |
3101 | * the writepage semantics are similar to regular writepage. extent | 3104 | * helper for __extent_writepage, doing all of the delayed allocation setup. |
3102 | * records are inserted to lock ranges in the tree, and as dirty areas | 3105 | * |
3103 | * are found, they are marked writeback. Then the lock bits are removed | 3106 | * This returns 1 if our fill_delalloc function did all the work required |
3104 | * and the end_io handler clears the writeback ranges | 3107 | * to write the page (copy into inline extent). In this case the IO has |
3108 | * been started and the page is already unlocked. | ||
3109 | * | ||
3110 | * This returns 0 if all went well (page still locked) | ||
3111 | * This returns < 0 if there were errors (page still locked) | ||
3105 | */ | 3112 | */ |
3106 | static int __extent_writepage(struct page *page, struct writeback_control *wbc, | 3113 | static noinline_for_stack int writepage_delalloc(struct inode *inode, |
3107 | void *data) | 3114 | struct page *page, struct writeback_control *wbc, |
3115 | struct extent_page_data *epd, | ||
3116 | u64 delalloc_start, | ||
3117 | unsigned long *nr_written) | ||
3118 | { | ||
3119 | struct extent_io_tree *tree = epd->tree; | ||
3120 | u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1; | ||
3121 | u64 nr_delalloc; | ||
3122 | u64 delalloc_to_write = 0; | ||
3123 | u64 delalloc_end = 0; | ||
3124 | int ret; | ||
3125 | int page_started = 0; | ||
3126 | |||
3127 | if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc) | ||
3128 | return 0; | ||
3129 | |||
3130 | while (delalloc_end < page_end) { | ||
3131 | nr_delalloc = find_lock_delalloc_range(inode, tree, | ||
3132 | page, | ||
3133 | &delalloc_start, | ||
3134 | &delalloc_end, | ||
3135 | 128 * 1024 * 1024); | ||
3136 | if (nr_delalloc == 0) { | ||
3137 | delalloc_start = delalloc_end + 1; | ||
3138 | continue; | ||
3139 | } | ||
3140 | ret = tree->ops->fill_delalloc(inode, page, | ||
3141 | delalloc_start, | ||
3142 | delalloc_end, | ||
3143 | &page_started, | ||
3144 | nr_written); | ||
3145 | /* File system has been set read-only */ | ||
3146 | if (ret) { | ||
3147 | SetPageError(page); | ||
3148 | /* fill_delalloc should be return < 0 for error | ||
3149 | * but just in case, we use > 0 here meaning the | ||
3150 | * IO is started, so we don't want to return > 0 | ||
3151 | * unless things are going well. | ||
3152 | */ | ||
3153 | ret = ret < 0 ? ret : -EIO; | ||
3154 | goto done; | ||
3155 | } | ||
3156 | /* | ||
3157 | * delalloc_end is already one less than the total | ||
3158 | * length, so we don't subtract one from | ||
3159 | * PAGE_CACHE_SIZE | ||
3160 | */ | ||
3161 | delalloc_to_write += (delalloc_end - delalloc_start + | ||
3162 | PAGE_CACHE_SIZE) >> | ||
3163 | PAGE_CACHE_SHIFT; | ||
3164 | delalloc_start = delalloc_end + 1; | ||
3165 | } | ||
3166 | if (wbc->nr_to_write < delalloc_to_write) { | ||
3167 | int thresh = 8192; | ||
3168 | |||
3169 | if (delalloc_to_write < thresh * 2) | ||
3170 | thresh = delalloc_to_write; | ||
3171 | wbc->nr_to_write = min_t(u64, delalloc_to_write, | ||
3172 | thresh); | ||
3173 | } | ||
3174 | |||
3175 | /* did the fill delalloc function already unlock and start | ||
3176 | * the IO? | ||
3177 | */ | ||
3178 | if (page_started) { | ||
3179 | /* | ||
3180 | * we've unlocked the page, so we can't update | ||
3181 | * the mapping's writeback index, just update | ||
3182 | * nr_to_write. | ||
3183 | */ | ||
3184 | wbc->nr_to_write -= *nr_written; | ||
3185 | return 1; | ||
3186 | } | ||
3187 | |||
3188 | ret = 0; | ||
3189 | |||
3190 | done: | ||
3191 | return ret; | ||
3192 | } | ||
3193 | |||
3194 | /* | ||
3195 | * helper for __extent_writepage. This calls the writepage start hooks, | ||
3196 | * and does the loop to map the page into extents and bios. | ||
3197 | * | ||
3198 | * We return 1 if the IO is started and the page is unlocked, | ||
3199 | * 0 if all went well (page still locked) | ||
3200 | * < 0 if there were errors (page still locked) | ||
3201 | */ | ||
3202 | static noinline_for_stack int __extent_writepage_io(struct inode *inode, | ||
3203 | struct page *page, | ||
3204 | struct writeback_control *wbc, | ||
3205 | struct extent_page_data *epd, | ||
3206 | loff_t i_size, | ||
3207 | unsigned long nr_written, | ||
3208 | int write_flags, int *nr_ret) | ||
3108 | { | 3209 | { |
3109 | struct inode *inode = page->mapping->host; | ||
3110 | struct extent_page_data *epd = data; | ||
3111 | struct extent_io_tree *tree = epd->tree; | 3210 | struct extent_io_tree *tree = epd->tree; |
3112 | u64 start = page_offset(page); | 3211 | u64 start = page_offset(page); |
3113 | u64 delalloc_start; | ||
3114 | u64 page_end = start + PAGE_CACHE_SIZE - 1; | 3212 | u64 page_end = start + PAGE_CACHE_SIZE - 1; |
3115 | u64 end; | 3213 | u64 end; |
3116 | u64 cur = start; | 3214 | u64 cur = start; |
3117 | u64 extent_offset; | 3215 | u64 extent_offset; |
3118 | u64 last_byte = i_size_read(inode); | ||
3119 | u64 block_start; | 3216 | u64 block_start; |
3120 | u64 iosize; | 3217 | u64 iosize; |
3121 | sector_t sector; | 3218 | sector_t sector; |
3122 | struct extent_state *cached_state = NULL; | 3219 | struct extent_state *cached_state = NULL; |
3123 | struct extent_map *em; | 3220 | struct extent_map *em; |
3124 | struct block_device *bdev; | 3221 | struct block_device *bdev; |
3125 | int ret; | ||
3126 | int nr = 0; | ||
3127 | size_t pg_offset = 0; | 3222 | size_t pg_offset = 0; |
3128 | size_t blocksize; | 3223 | size_t blocksize; |
3129 | loff_t i_size = i_size_read(inode); | 3224 | int ret = 0; |
3130 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; | 3225 | int nr = 0; |
3131 | u64 nr_delalloc; | 3226 | bool compressed; |
3132 | u64 delalloc_end; | ||
3133 | int page_started; | ||
3134 | int compressed; | ||
3135 | int write_flags; | ||
3136 | unsigned long nr_written = 0; | ||
3137 | bool fill_delalloc = true; | ||
3138 | |||
3139 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
3140 | write_flags = WRITE_SYNC; | ||
3141 | else | ||
3142 | write_flags = WRITE; | ||
3143 | |||
3144 | trace___extent_writepage(page, inode, wbc); | ||
3145 | |||
3146 | WARN_ON(!PageLocked(page)); | ||
3147 | |||
3148 | ClearPageError(page); | ||
3149 | |||
3150 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | ||
3151 | if (page->index > end_index || | ||
3152 | (page->index == end_index && !pg_offset)) { | ||
3153 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); | ||
3154 | unlock_page(page); | ||
3155 | return 0; | ||
3156 | } | ||
3157 | |||
3158 | if (page->index == end_index) { | ||
3159 | char *userpage; | ||
3160 | |||
3161 | userpage = kmap_atomic(page); | ||
3162 | memset(userpage + pg_offset, 0, | ||
3163 | PAGE_CACHE_SIZE - pg_offset); | ||
3164 | kunmap_atomic(userpage); | ||
3165 | flush_dcache_page(page); | ||
3166 | } | ||
3167 | pg_offset = 0; | ||
3168 | |||
3169 | set_page_extent_mapped(page); | ||
3170 | |||
3171 | if (!tree->ops || !tree->ops->fill_delalloc) | ||
3172 | fill_delalloc = false; | ||
3173 | |||
3174 | delalloc_start = start; | ||
3175 | delalloc_end = 0; | ||
3176 | page_started = 0; | ||
3177 | if (!epd->extent_locked && fill_delalloc) { | ||
3178 | u64 delalloc_to_write = 0; | ||
3179 | /* | ||
3180 | * make sure the wbc mapping index is at least updated | ||
3181 | * to this page. | ||
3182 | */ | ||
3183 | update_nr_written(page, wbc, 0); | ||
3184 | |||
3185 | while (delalloc_end < page_end) { | ||
3186 | nr_delalloc = find_lock_delalloc_range(inode, tree, | ||
3187 | page, | ||
3188 | &delalloc_start, | ||
3189 | &delalloc_end, | ||
3190 | 128 * 1024 * 1024); | ||
3191 | if (nr_delalloc == 0) { | ||
3192 | delalloc_start = delalloc_end + 1; | ||
3193 | continue; | ||
3194 | } | ||
3195 | ret = tree->ops->fill_delalloc(inode, page, | ||
3196 | delalloc_start, | ||
3197 | delalloc_end, | ||
3198 | &page_started, | ||
3199 | &nr_written); | ||
3200 | /* File system has been set read-only */ | ||
3201 | if (ret) { | ||
3202 | SetPageError(page); | ||
3203 | goto done; | ||
3204 | } | ||
3205 | /* | ||
3206 | * delalloc_end is already one less than the total | ||
3207 | * length, so we don't subtract one from | ||
3208 | * PAGE_CACHE_SIZE | ||
3209 | */ | ||
3210 | delalloc_to_write += (delalloc_end - delalloc_start + | ||
3211 | PAGE_CACHE_SIZE) >> | ||
3212 | PAGE_CACHE_SHIFT; | ||
3213 | delalloc_start = delalloc_end + 1; | ||
3214 | } | ||
3215 | if (wbc->nr_to_write < delalloc_to_write) { | ||
3216 | int thresh = 8192; | ||
3217 | |||
3218 | if (delalloc_to_write < thresh * 2) | ||
3219 | thresh = delalloc_to_write; | ||
3220 | wbc->nr_to_write = min_t(u64, delalloc_to_write, | ||
3221 | thresh); | ||
3222 | } | ||
3223 | 3227 | ||
3224 | /* did the fill delalloc function already unlock and start | ||
3225 | * the IO? | ||
3226 | */ | ||
3227 | if (page_started) { | ||
3228 | ret = 0; | ||
3229 | /* | ||
3230 | * we've unlocked the page, so we can't update | ||
3231 | * the mapping's writeback index, just update | ||
3232 | * nr_to_write. | ||
3233 | */ | ||
3234 | wbc->nr_to_write -= nr_written; | ||
3235 | goto done_unlocked; | ||
3236 | } | ||
3237 | } | ||
3238 | if (tree->ops && tree->ops->writepage_start_hook) { | 3228 | if (tree->ops && tree->ops->writepage_start_hook) { |
3239 | ret = tree->ops->writepage_start_hook(page, start, | 3229 | ret = tree->ops->writepage_start_hook(page, start, |
3240 | page_end); | 3230 | page_end); |
@@ -3244,9 +3234,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3244 | wbc->pages_skipped++; | 3234 | wbc->pages_skipped++; |
3245 | else | 3235 | else |
3246 | redirty_page_for_writepage(wbc, page); | 3236 | redirty_page_for_writepage(wbc, page); |
3237 | |||
3247 | update_nr_written(page, wbc, nr_written); | 3238 | update_nr_written(page, wbc, nr_written); |
3248 | unlock_page(page); | 3239 | unlock_page(page); |
3249 | ret = 0; | 3240 | ret = 1; |
3250 | goto done_unlocked; | 3241 | goto done_unlocked; |
3251 | } | 3242 | } |
3252 | } | 3243 | } |
@@ -3258,7 +3249,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3258 | update_nr_written(page, wbc, nr_written + 1); | 3249 | update_nr_written(page, wbc, nr_written + 1); |
3259 | 3250 | ||
3260 | end = page_end; | 3251 | end = page_end; |
3261 | if (last_byte <= start) { | 3252 | if (i_size <= start) { |
3262 | if (tree->ops && tree->ops->writepage_end_io_hook) | 3253 | if (tree->ops && tree->ops->writepage_end_io_hook) |
3263 | tree->ops->writepage_end_io_hook(page, start, | 3254 | tree->ops->writepage_end_io_hook(page, start, |
3264 | page_end, NULL, 1); | 3255 | page_end, NULL, 1); |
@@ -3268,7 +3259,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3268 | blocksize = inode->i_sb->s_blocksize; | 3259 | blocksize = inode->i_sb->s_blocksize; |
3269 | 3260 | ||
3270 | while (cur <= end) { | 3261 | while (cur <= end) { |
3271 | if (cur >= last_byte) { | 3262 | u64 em_end; |
3263 | if (cur >= i_size) { | ||
3272 | if (tree->ops && tree->ops->writepage_end_io_hook) | 3264 | if (tree->ops && tree->ops->writepage_end_io_hook) |
3273 | tree->ops->writepage_end_io_hook(page, cur, | 3265 | tree->ops->writepage_end_io_hook(page, cur, |
3274 | page_end, NULL, 1); | 3266 | page_end, NULL, 1); |
@@ -3278,13 +3270,15 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3278 | end - cur + 1, 1); | 3270 | end - cur + 1, 1); |
3279 | if (IS_ERR_OR_NULL(em)) { | 3271 | if (IS_ERR_OR_NULL(em)) { |
3280 | SetPageError(page); | 3272 | SetPageError(page); |
3273 | ret = PTR_ERR_OR_ZERO(em); | ||
3281 | break; | 3274 | break; |
3282 | } | 3275 | } |
3283 | 3276 | ||
3284 | extent_offset = cur - em->start; | 3277 | extent_offset = cur - em->start; |
3285 | BUG_ON(extent_map_end(em) <= cur); | 3278 | em_end = extent_map_end(em); |
3279 | BUG_ON(em_end <= cur); | ||
3286 | BUG_ON(end < cur); | 3280 | BUG_ON(end < cur); |
3287 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 3281 | iosize = min(em_end - cur, end - cur + 1); |
3288 | iosize = ALIGN(iosize, blocksize); | 3282 | iosize = ALIGN(iosize, blocksize); |
3289 | sector = (em->block_start + extent_offset) >> 9; | 3283 | sector = (em->block_start + extent_offset) >> 9; |
3290 | bdev = em->bdev; | 3284 | bdev = em->bdev; |
@@ -3320,13 +3314,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3320 | pg_offset += iosize; | 3314 | pg_offset += iosize; |
3321 | continue; | 3315 | continue; |
3322 | } | 3316 | } |
3323 | /* leave this out until we have a page_mkwrite call */ | ||
3324 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, | ||
3325 | EXTENT_DIRTY, 0, NULL)) { | ||
3326 | cur = cur + iosize; | ||
3327 | pg_offset += iosize; | ||
3328 | continue; | ||
3329 | } | ||
3330 | 3317 | ||
3331 | if (tree->ops && tree->ops->writepage_io_hook) { | 3318 | if (tree->ops && tree->ops->writepage_io_hook) { |
3332 | ret = tree->ops->writepage_io_hook(page, cur, | 3319 | ret = tree->ops->writepage_io_hook(page, cur, |
@@ -3337,7 +3324,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3337 | if (ret) { | 3324 | if (ret) { |
3338 | SetPageError(page); | 3325 | SetPageError(page); |
3339 | } else { | 3326 | } else { |
3340 | unsigned long max_nr = end_index + 1; | 3327 | unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1; |
3341 | 3328 | ||
3342 | set_range_writeback(tree, cur, cur + iosize - 1); | 3329 | set_range_writeback(tree, cur, cur + iosize - 1); |
3343 | if (!PageWriteback(page)) { | 3330 | if (!PageWriteback(page)) { |
@@ -3359,17 +3346,94 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
3359 | nr++; | 3346 | nr++; |
3360 | } | 3347 | } |
3361 | done: | 3348 | done: |
3349 | *nr_ret = nr; | ||
3350 | |||
3351 | done_unlocked: | ||
3352 | |||
3353 | /* drop our reference on any cached states */ | ||
3354 | free_extent_state(cached_state); | ||
3355 | return ret; | ||
3356 | } | ||
3357 | |||
3358 | /* | ||
3359 | * the writepage semantics are similar to regular writepage. extent | ||
3360 | * records are inserted to lock ranges in the tree, and as dirty areas | ||
3361 | * are found, they are marked writeback. Then the lock bits are removed | ||
3362 | * and the end_io handler clears the writeback ranges | ||
3363 | */ | ||
3364 | static int __extent_writepage(struct page *page, struct writeback_control *wbc, | ||
3365 | void *data) | ||
3366 | { | ||
3367 | struct inode *inode = page->mapping->host; | ||
3368 | struct extent_page_data *epd = data; | ||
3369 | u64 start = page_offset(page); | ||
3370 | u64 page_end = start + PAGE_CACHE_SIZE - 1; | ||
3371 | int ret; | ||
3372 | int nr = 0; | ||
3373 | size_t pg_offset = 0; | ||
3374 | loff_t i_size = i_size_read(inode); | ||
3375 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; | ||
3376 | int write_flags; | ||
3377 | unsigned long nr_written = 0; | ||
3378 | |||
3379 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
3380 | write_flags = WRITE_SYNC; | ||
3381 | else | ||
3382 | write_flags = WRITE; | ||
3383 | |||
3384 | trace___extent_writepage(page, inode, wbc); | ||
3385 | |||
3386 | WARN_ON(!PageLocked(page)); | ||
3387 | |||
3388 | ClearPageError(page); | ||
3389 | |||
3390 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | ||
3391 | if (page->index > end_index || | ||
3392 | (page->index == end_index && !pg_offset)) { | ||
3393 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); | ||
3394 | unlock_page(page); | ||
3395 | return 0; | ||
3396 | } | ||
3397 | |||
3398 | if (page->index == end_index) { | ||
3399 | char *userpage; | ||
3400 | |||
3401 | userpage = kmap_atomic(page); | ||
3402 | memset(userpage + pg_offset, 0, | ||
3403 | PAGE_CACHE_SIZE - pg_offset); | ||
3404 | kunmap_atomic(userpage); | ||
3405 | flush_dcache_page(page); | ||
3406 | } | ||
3407 | |||
3408 | pg_offset = 0; | ||
3409 | |||
3410 | set_page_extent_mapped(page); | ||
3411 | |||
3412 | ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written); | ||
3413 | if (ret == 1) | ||
3414 | goto done_unlocked; | ||
3415 | if (ret) | ||
3416 | goto done; | ||
3417 | |||
3418 | ret = __extent_writepage_io(inode, page, wbc, epd, | ||
3419 | i_size, nr_written, write_flags, &nr); | ||
3420 | if (ret == 1) | ||
3421 | goto done_unlocked; | ||
3422 | |||
3423 | done: | ||
3362 | if (nr == 0) { | 3424 | if (nr == 0) { |
3363 | /* make sure the mapping tag for page dirty gets cleared */ | 3425 | /* make sure the mapping tag for page dirty gets cleared */ |
3364 | set_page_writeback(page); | 3426 | set_page_writeback(page); |
3365 | end_page_writeback(page); | 3427 | end_page_writeback(page); |
3366 | } | 3428 | } |
3429 | if (PageError(page)) { | ||
3430 | ret = ret < 0 ? ret : -EIO; | ||
3431 | end_extent_writepage(page, ret, start, page_end); | ||
3432 | } | ||
3367 | unlock_page(page); | 3433 | unlock_page(page); |
3434 | return ret; | ||
3368 | 3435 | ||
3369 | done_unlocked: | 3436 | done_unlocked: |
3370 | |||
3371 | /* drop our reference on any cached states */ | ||
3372 | free_extent_state(cached_state); | ||
3373 | return 0; | 3437 | return 0; |
3374 | } | 3438 | } |
3375 | 3439 | ||
@@ -3385,9 +3449,10 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb) | |||
3385 | TASK_UNINTERRUPTIBLE); | 3449 | TASK_UNINTERRUPTIBLE); |
3386 | } | 3450 | } |
3387 | 3451 | ||
3388 | static int lock_extent_buffer_for_io(struct extent_buffer *eb, | 3452 | static noinline_for_stack int |
3389 | struct btrfs_fs_info *fs_info, | 3453 | lock_extent_buffer_for_io(struct extent_buffer *eb, |
3390 | struct extent_page_data *epd) | 3454 | struct btrfs_fs_info *fs_info, |
3455 | struct extent_page_data *epd) | ||
3391 | { | 3456 | { |
3392 | unsigned long i, num_pages; | 3457 | unsigned long i, num_pages; |
3393 | int flush = 0; | 3458 | int flush = 0; |
@@ -3458,7 +3523,7 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb, | |||
3458 | static void end_extent_buffer_writeback(struct extent_buffer *eb) | 3523 | static void end_extent_buffer_writeback(struct extent_buffer *eb) |
3459 | { | 3524 | { |
3460 | clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); | 3525 | clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); |
3461 | smp_mb__after_clear_bit(); | 3526 | smp_mb__after_atomic(); |
3462 | wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); | 3527 | wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); |
3463 | } | 3528 | } |
3464 | 3529 | ||
@@ -3492,7 +3557,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err) | |||
3492 | bio_put(bio); | 3557 | bio_put(bio); |
3493 | } | 3558 | } |
3494 | 3559 | ||
3495 | static int write_one_eb(struct extent_buffer *eb, | 3560 | static noinline_for_stack int write_one_eb(struct extent_buffer *eb, |
3496 | struct btrfs_fs_info *fs_info, | 3561 | struct btrfs_fs_info *fs_info, |
3497 | struct writeback_control *wbc, | 3562 | struct writeback_control *wbc, |
3498 | struct extent_page_data *epd) | 3563 | struct extent_page_data *epd) |
@@ -3690,6 +3755,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
3690 | struct inode *inode = mapping->host; | 3755 | struct inode *inode = mapping->host; |
3691 | int ret = 0; | 3756 | int ret = 0; |
3692 | int done = 0; | 3757 | int done = 0; |
3758 | int err = 0; | ||
3693 | int nr_to_write_done = 0; | 3759 | int nr_to_write_done = 0; |
3694 | struct pagevec pvec; | 3760 | struct pagevec pvec; |
3695 | int nr_pages; | 3761 | int nr_pages; |
@@ -3776,8 +3842,8 @@ retry: | |||
3776 | unlock_page(page); | 3842 | unlock_page(page); |
3777 | ret = 0; | 3843 | ret = 0; |
3778 | } | 3844 | } |
3779 | if (ret) | 3845 | if (!err && ret < 0) |
3780 | done = 1; | 3846 | err = ret; |
3781 | 3847 | ||
3782 | /* | 3848 | /* |
3783 | * the filesystem may choose to bump up nr_to_write. | 3849 | * the filesystem may choose to bump up nr_to_write. |
@@ -3789,7 +3855,7 @@ retry: | |||
3789 | pagevec_release(&pvec); | 3855 | pagevec_release(&pvec); |
3790 | cond_resched(); | 3856 | cond_resched(); |
3791 | } | 3857 | } |
3792 | if (!scanned && !done) { | 3858 | if (!scanned && !done && !err) { |
3793 | /* | 3859 | /* |
3794 | * We hit the last page and there is more work to be done: wrap | 3860 | * We hit the last page and there is more work to be done: wrap |
3795 | * back to the start of the file | 3861 | * back to the start of the file |
@@ -3799,7 +3865,7 @@ retry: | |||
3799 | goto retry; | 3865 | goto retry; |
3800 | } | 3866 | } |
3801 | btrfs_add_delayed_iput(inode); | 3867 | btrfs_add_delayed_iput(inode); |
3802 | return ret; | 3868 | return err; |
3803 | } | 3869 | } |
3804 | 3870 | ||
3805 | static void flush_epd_write_bio(struct extent_page_data *epd) | 3871 | static void flush_epd_write_bio(struct extent_page_data *epd) |
@@ -4510,7 +4576,8 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) | |||
4510 | spin_unlock(&eb->refs_lock); | 4576 | spin_unlock(&eb->refs_lock); |
4511 | } | 4577 | } |
4512 | 4578 | ||
4513 | static void mark_extent_buffer_accessed(struct extent_buffer *eb) | 4579 | static void mark_extent_buffer_accessed(struct extent_buffer *eb, |
4580 | struct page *accessed) | ||
4514 | { | 4581 | { |
4515 | unsigned long num_pages, i; | 4582 | unsigned long num_pages, i; |
4516 | 4583 | ||
@@ -4519,7 +4586,8 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb) | |||
4519 | num_pages = num_extent_pages(eb->start, eb->len); | 4586 | num_pages = num_extent_pages(eb->start, eb->len); |
4520 | for (i = 0; i < num_pages; i++) { | 4587 | for (i = 0; i < num_pages; i++) { |
4521 | struct page *p = extent_buffer_page(eb, i); | 4588 | struct page *p = extent_buffer_page(eb, i); |
4522 | mark_page_accessed(p); | 4589 | if (p != accessed) |
4590 | mark_page_accessed(p); | ||
4523 | } | 4591 | } |
4524 | } | 4592 | } |
4525 | 4593 | ||
@@ -4533,7 +4601,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, | |||
4533 | start >> PAGE_CACHE_SHIFT); | 4601 | start >> PAGE_CACHE_SHIFT); |
4534 | if (eb && atomic_inc_not_zero(&eb->refs)) { | 4602 | if (eb && atomic_inc_not_zero(&eb->refs)) { |
4535 | rcu_read_unlock(); | 4603 | rcu_read_unlock(); |
4536 | mark_extent_buffer_accessed(eb); | 4604 | mark_extent_buffer_accessed(eb, NULL); |
4537 | return eb; | 4605 | return eb; |
4538 | } | 4606 | } |
4539 | rcu_read_unlock(); | 4607 | rcu_read_unlock(); |
@@ -4541,6 +4609,53 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, | |||
4541 | return NULL; | 4609 | return NULL; |
4542 | } | 4610 | } |
4543 | 4611 | ||
4612 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
4613 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, | ||
4614 | u64 start, unsigned long len) | ||
4615 | { | ||
4616 | struct extent_buffer *eb, *exists = NULL; | ||
4617 | int ret; | ||
4618 | |||
4619 | eb = find_extent_buffer(fs_info, start); | ||
4620 | if (eb) | ||
4621 | return eb; | ||
4622 | eb = alloc_dummy_extent_buffer(start, len); | ||
4623 | if (!eb) | ||
4624 | return NULL; | ||
4625 | eb->fs_info = fs_info; | ||
4626 | again: | ||
4627 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
4628 | if (ret) | ||
4629 | goto free_eb; | ||
4630 | spin_lock(&fs_info->buffer_lock); | ||
4631 | ret = radix_tree_insert(&fs_info->buffer_radix, | ||
4632 | start >> PAGE_CACHE_SHIFT, eb); | ||
4633 | spin_unlock(&fs_info->buffer_lock); | ||
4634 | radix_tree_preload_end(); | ||
4635 | if (ret == -EEXIST) { | ||
4636 | exists = find_extent_buffer(fs_info, start); | ||
4637 | if (exists) | ||
4638 | goto free_eb; | ||
4639 | else | ||
4640 | goto again; | ||
4641 | } | ||
4642 | check_buffer_tree_ref(eb); | ||
4643 | set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags); | ||
4644 | |||
4645 | /* | ||
4646 | * We will free dummy extent buffer's if they come into | ||
4647 | * free_extent_buffer with a ref count of 2, but if we are using this we | ||
4648 | * want the buffers to stay in memory until we're done with them, so | ||
4649 | * bump the ref count again. | ||
4650 | */ | ||
4651 | atomic_inc(&eb->refs); | ||
4652 | return eb; | ||
4653 | free_eb: | ||
4654 | btrfs_release_extent_buffer(eb); | ||
4655 | return exists; | ||
4656 | } | ||
4657 | #endif | ||
4658 | |||
4544 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | 4659 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, |
4545 | u64 start, unsigned long len) | 4660 | u64 start, unsigned long len) |
4546 | { | 4661 | { |
@@ -4581,7 +4696,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | |||
4581 | spin_unlock(&mapping->private_lock); | 4696 | spin_unlock(&mapping->private_lock); |
4582 | unlock_page(p); | 4697 | unlock_page(p); |
4583 | page_cache_release(p); | 4698 | page_cache_release(p); |
4584 | mark_extent_buffer_accessed(exists); | 4699 | mark_extent_buffer_accessed(exists, p); |
4585 | goto free_eb; | 4700 | goto free_eb; |
4586 | } | 4701 | } |
4587 | 4702 | ||
@@ -4596,7 +4711,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | |||
4596 | attach_extent_buffer_page(eb, p); | 4711 | attach_extent_buffer_page(eb, p); |
4597 | spin_unlock(&mapping->private_lock); | 4712 | spin_unlock(&mapping->private_lock); |
4598 | WARN_ON(PageDirty(p)); | 4713 | WARN_ON(PageDirty(p)); |
4599 | mark_page_accessed(p); | ||
4600 | eb->pages[i] = p; | 4714 | eb->pages[i] = p; |
4601 | if (!PageUptodate(p)) | 4715 | if (!PageUptodate(p)) |
4602 | uptodate = 0; | 4716 | uptodate = 0; |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index c488b45237bf..8b63f2d46518 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -350,5 +350,7 @@ noinline u64 find_lock_delalloc_range(struct inode *inode, | |||
350 | struct extent_io_tree *tree, | 350 | struct extent_io_tree *tree, |
351 | struct page *locked_page, u64 *start, | 351 | struct page *locked_page, u64 *start, |
352 | u64 *end, u64 max_bytes); | 352 | u64 *end, u64 max_bytes); |
353 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, | ||
354 | u64 start, unsigned long len); | ||
353 | #endif | 355 | #endif |
354 | #endif | 356 | #endif |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 127555b29f58..f46cfe45d686 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -281,10 +281,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
281 | found: | 281 | found: |
282 | csum += count * csum_size; | 282 | csum += count * csum_size; |
283 | nblocks -= count; | 283 | nblocks -= count; |
284 | bio_index += count; | ||
284 | while (count--) { | 285 | while (count--) { |
285 | disk_bytenr += bvec->bv_len; | 286 | disk_bytenr += bvec->bv_len; |
286 | offset += bvec->bv_len; | 287 | offset += bvec->bv_len; |
287 | bio_index++; | ||
288 | bvec++; | 288 | bvec++; |
289 | } | 289 | } |
290 | } | 290 | } |
@@ -750,7 +750,7 @@ again: | |||
750 | int slot = path->slots[0] + 1; | 750 | int slot = path->slots[0] + 1; |
751 | /* we didn't find a csum item, insert one */ | 751 | /* we didn't find a csum item, insert one */ |
752 | nritems = btrfs_header_nritems(path->nodes[0]); | 752 | nritems = btrfs_header_nritems(path->nodes[0]); |
753 | if (path->slots[0] >= nritems - 1) { | 753 | if (!nritems || (path->slots[0] >= nritems - 1)) { |
754 | ret = btrfs_next_leaf(root, path); | 754 | ret = btrfs_next_leaf(root, path); |
755 | if (ret == 1) | 755 | if (ret == 1) |
756 | found_next = 1; | 756 | found_next = 1; |
@@ -885,3 +885,79 @@ out: | |||
885 | fail_unlock: | 885 | fail_unlock: |
886 | goto out; | 886 | goto out; |
887 | } | 887 | } |
888 | |||
889 | void btrfs_extent_item_to_extent_map(struct inode *inode, | ||
890 | const struct btrfs_path *path, | ||
891 | struct btrfs_file_extent_item *fi, | ||
892 | const bool new_inline, | ||
893 | struct extent_map *em) | ||
894 | { | ||
895 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
896 | struct extent_buffer *leaf = path->nodes[0]; | ||
897 | const int slot = path->slots[0]; | ||
898 | struct btrfs_key key; | ||
899 | u64 extent_start, extent_end; | ||
900 | u64 bytenr; | ||
901 | u8 type = btrfs_file_extent_type(leaf, fi); | ||
902 | int compress_type = btrfs_file_extent_compression(leaf, fi); | ||
903 | |||
904 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
905 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
906 | extent_start = key.offset; | ||
907 | |||
908 | if (type == BTRFS_FILE_EXTENT_REG || | ||
909 | type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
910 | extent_end = extent_start + | ||
911 | btrfs_file_extent_num_bytes(leaf, fi); | ||
912 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { | ||
913 | size_t size; | ||
914 | size = btrfs_file_extent_inline_len(leaf, slot, fi); | ||
915 | extent_end = ALIGN(extent_start + size, root->sectorsize); | ||
916 | } | ||
917 | |||
918 | em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); | ||
919 | if (type == BTRFS_FILE_EXTENT_REG || | ||
920 | type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
921 | em->start = extent_start; | ||
922 | em->len = extent_end - extent_start; | ||
923 | em->orig_start = extent_start - | ||
924 | btrfs_file_extent_offset(leaf, fi); | ||
925 | em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
926 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
927 | if (bytenr == 0) { | ||
928 | em->block_start = EXTENT_MAP_HOLE; | ||
929 | return; | ||
930 | } | ||
931 | if (compress_type != BTRFS_COMPRESS_NONE) { | ||
932 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
933 | em->compress_type = compress_type; | ||
934 | em->block_start = bytenr; | ||
935 | em->block_len = em->orig_block_len; | ||
936 | } else { | ||
937 | bytenr += btrfs_file_extent_offset(leaf, fi); | ||
938 | em->block_start = bytenr; | ||
939 | em->block_len = em->len; | ||
940 | if (type == BTRFS_FILE_EXTENT_PREALLOC) | ||
941 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
942 | } | ||
943 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { | ||
944 | em->block_start = EXTENT_MAP_INLINE; | ||
945 | em->start = extent_start; | ||
946 | em->len = extent_end - extent_start; | ||
947 | /* | ||
948 | * Initialize orig_start and block_len with the same values | ||
949 | * as in inode.c:btrfs_get_extent(). | ||
950 | */ | ||
951 | em->orig_start = EXTENT_MAP_HOLE; | ||
952 | em->block_len = (u64)-1; | ||
953 | if (!new_inline && compress_type != BTRFS_COMPRESS_NONE) { | ||
954 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
955 | em->compress_type = compress_type; | ||
956 | } | ||
957 | } else { | ||
958 | btrfs_err(root->fs_info, | ||
959 | "unknown file extent item type %d, inode %llu, offset %llu, root %llu", | ||
960 | type, btrfs_ino(inode), extent_start, | ||
961 | root->root_key.objectid); | ||
962 | } | ||
963 | } | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 17e7393c50f0..1f2b99cb55ea 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include "tree-log.h" | 40 | #include "tree-log.h" |
41 | #include "locking.h" | 41 | #include "locking.h" |
42 | #include "volumes.h" | 42 | #include "volumes.h" |
43 | #include "qgroup.h" | ||
43 | 44 | ||
44 | static struct kmem_cache *btrfs_inode_defrag_cachep; | 45 | static struct kmem_cache *btrfs_inode_defrag_cachep; |
45 | /* | 46 | /* |
@@ -470,11 +471,12 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
470 | for (i = 0; i < num_pages; i++) { | 471 | for (i = 0; i < num_pages; i++) { |
471 | /* page checked is some magic around finding pages that | 472 | /* page checked is some magic around finding pages that |
472 | * have been modified without going through btrfs_set_page_dirty | 473 | * have been modified without going through btrfs_set_page_dirty |
473 | * clear it here | 474 | * clear it here. There should be no need to mark the pages |
475 | * accessed as prepare_pages should have marked them accessed | ||
476 | * in prepare_pages via find_or_create_page() | ||
474 | */ | 477 | */ |
475 | ClearPageChecked(pages[i]); | 478 | ClearPageChecked(pages[i]); |
476 | unlock_page(pages[i]); | 479 | unlock_page(pages[i]); |
477 | mark_page_accessed(pages[i]); | ||
478 | page_cache_release(pages[i]); | 480 | page_cache_release(pages[i]); |
479 | } | 481 | } |
480 | } | 482 | } |
@@ -714,7 +716,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
714 | int recow; | 716 | int recow; |
715 | int ret; | 717 | int ret; |
716 | int modify_tree = -1; | 718 | int modify_tree = -1; |
717 | int update_refs = (root->ref_cows || root == root->fs_info->tree_root); | 719 | int update_refs; |
718 | int found = 0; | 720 | int found = 0; |
719 | int leafs_visited = 0; | 721 | int leafs_visited = 0; |
720 | 722 | ||
@@ -724,6 +726,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
724 | if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) | 726 | if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) |
725 | modify_tree = 0; | 727 | modify_tree = 0; |
726 | 728 | ||
729 | update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || | ||
730 | root == root->fs_info->tree_root); | ||
727 | while (1) { | 731 | while (1) { |
728 | recow = 0; | 732 | recow = 0; |
729 | ret = btrfs_lookup_file_extent(trans, root, path, ino, | 733 | ret = btrfs_lookup_file_extent(trans, root, path, ino, |
@@ -780,6 +784,18 @@ next_slot: | |||
780 | extent_end = search_start; | 784 | extent_end = search_start; |
781 | } | 785 | } |
782 | 786 | ||
787 | /* | ||
788 | * Don't skip extent items representing 0 byte lengths. They | ||
789 | * used to be created (bug) if while punching holes we hit | ||
790 | * -ENOSPC condition. So if we find one here, just ensure we | ||
791 | * delete it, otherwise we would insert a new file extent item | ||
792 | * with the same key (offset) as that 0 bytes length file | ||
793 | * extent item in the call to setup_items_for_insert() later | ||
794 | * in this function. | ||
795 | */ | ||
796 | if (extent_end == key.offset && extent_end >= search_start) | ||
797 | goto delete_extent_item; | ||
798 | |||
783 | if (extent_end <= search_start) { | 799 | if (extent_end <= search_start) { |
784 | path->slots[0]++; | 800 | path->slots[0]++; |
785 | goto next_slot; | 801 | goto next_slot; |
@@ -835,7 +851,7 @@ next_slot: | |||
835 | disk_bytenr, num_bytes, 0, | 851 | disk_bytenr, num_bytes, 0, |
836 | root->root_key.objectid, | 852 | root->root_key.objectid, |
837 | new_key.objectid, | 853 | new_key.objectid, |
838 | start - extent_offset, 0); | 854 | start - extent_offset, 1); |
839 | BUG_ON(ret); /* -ENOMEM */ | 855 | BUG_ON(ret); /* -ENOMEM */ |
840 | } | 856 | } |
841 | key.offset = start; | 857 | key.offset = start; |
@@ -893,6 +909,7 @@ next_slot: | |||
893 | * | ------ extent ------ | | 909 | * | ------ extent ------ | |
894 | */ | 910 | */ |
895 | if (start <= key.offset && end >= extent_end) { | 911 | if (start <= key.offset && end >= extent_end) { |
912 | delete_extent_item: | ||
896 | if (del_nr == 0) { | 913 | if (del_nr == 0) { |
897 | del_slot = path->slots[0]; | 914 | del_slot = path->slots[0]; |
898 | del_nr = 1; | 915 | del_nr = 1; |
@@ -1191,7 +1208,7 @@ again: | |||
1191 | 1208 | ||
1192 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, | 1209 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, |
1193 | root->root_key.objectid, | 1210 | root->root_key.objectid, |
1194 | ino, orig_offset, 0); | 1211 | ino, orig_offset, 1); |
1195 | BUG_ON(ret); /* -ENOMEM */ | 1212 | BUG_ON(ret); /* -ENOMEM */ |
1196 | 1213 | ||
1197 | if (split == start) { | 1214 | if (split == start) { |
@@ -1994,8 +2011,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1994 | if (!full_sync) { | 2011 | if (!full_sync) { |
1995 | ret = btrfs_wait_ordered_range(inode, start, | 2012 | ret = btrfs_wait_ordered_range(inode, start, |
1996 | end - start + 1); | 2013 | end - start + 1); |
1997 | if (ret) | 2014 | if (ret) { |
2015 | btrfs_end_transaction(trans, root); | ||
1998 | goto out; | 2016 | goto out; |
2017 | } | ||
1999 | } | 2018 | } |
2000 | ret = btrfs_commit_transaction(trans, root); | 2019 | ret = btrfs_commit_transaction(trans, root); |
2001 | } else { | 2020 | } else { |
@@ -2153,6 +2172,37 @@ out: | |||
2153 | return 0; | 2172 | return 0; |
2154 | } | 2173 | } |
2155 | 2174 | ||
2175 | /* | ||
2176 | * Find a hole extent on given inode and change start/len to the end of hole | ||
2177 | * extent.(hole/vacuum extent whose em->start <= start && | ||
2178 | * em->start + em->len > start) | ||
2179 | * When a hole extent is found, return 1 and modify start/len. | ||
2180 | */ | ||
2181 | static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len) | ||
2182 | { | ||
2183 | struct extent_map *em; | ||
2184 | int ret = 0; | ||
2185 | |||
2186 | em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0); | ||
2187 | if (IS_ERR_OR_NULL(em)) { | ||
2188 | if (!em) | ||
2189 | ret = -ENOMEM; | ||
2190 | else | ||
2191 | ret = PTR_ERR(em); | ||
2192 | return ret; | ||
2193 | } | ||
2194 | |||
2195 | /* Hole or vacuum extent(only exists in no-hole mode) */ | ||
2196 | if (em->block_start == EXTENT_MAP_HOLE) { | ||
2197 | ret = 1; | ||
2198 | *len = em->start + em->len > *start + *len ? | ||
2199 | 0 : *start + *len - em->start - em->len; | ||
2200 | *start = em->start + em->len; | ||
2201 | } | ||
2202 | free_extent_map(em); | ||
2203 | return ret; | ||
2204 | } | ||
2205 | |||
2156 | static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | 2206 | static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) |
2157 | { | 2207 | { |
2158 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2208 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -2160,25 +2210,42 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2160 | struct btrfs_path *path; | 2210 | struct btrfs_path *path; |
2161 | struct btrfs_block_rsv *rsv; | 2211 | struct btrfs_block_rsv *rsv; |
2162 | struct btrfs_trans_handle *trans; | 2212 | struct btrfs_trans_handle *trans; |
2163 | u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); | 2213 | u64 lockstart; |
2164 | u64 lockend = round_down(offset + len, | 2214 | u64 lockend; |
2165 | BTRFS_I(inode)->root->sectorsize) - 1; | 2215 | u64 tail_start; |
2166 | u64 cur_offset = lockstart; | 2216 | u64 tail_len; |
2217 | u64 orig_start = offset; | ||
2218 | u64 cur_offset; | ||
2167 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 2219 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
2168 | u64 drop_end; | 2220 | u64 drop_end; |
2169 | int ret = 0; | 2221 | int ret = 0; |
2170 | int err = 0; | 2222 | int err = 0; |
2171 | int rsv_count; | 2223 | int rsv_count; |
2172 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == | 2224 | bool same_page; |
2173 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); | ||
2174 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); | 2225 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); |
2175 | u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); | 2226 | u64 ino_size; |
2176 | 2227 | ||
2177 | ret = btrfs_wait_ordered_range(inode, offset, len); | 2228 | ret = btrfs_wait_ordered_range(inode, offset, len); |
2178 | if (ret) | 2229 | if (ret) |
2179 | return ret; | 2230 | return ret; |
2180 | 2231 | ||
2181 | mutex_lock(&inode->i_mutex); | 2232 | mutex_lock(&inode->i_mutex); |
2233 | ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); | ||
2234 | ret = find_first_non_hole(inode, &offset, &len); | ||
2235 | if (ret < 0) | ||
2236 | goto out_only_mutex; | ||
2237 | if (ret && !len) { | ||
2238 | /* Already in a large hole */ | ||
2239 | ret = 0; | ||
2240 | goto out_only_mutex; | ||
2241 | } | ||
2242 | |||
2243 | lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize); | ||
2244 | lockend = round_down(offset + len, | ||
2245 | BTRFS_I(inode)->root->sectorsize) - 1; | ||
2246 | same_page = ((offset >> PAGE_CACHE_SHIFT) == | ||
2247 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); | ||
2248 | |||
2182 | /* | 2249 | /* |
2183 | * We needn't truncate any page which is beyond the end of the file | 2250 | * We needn't truncate any page which is beyond the end of the file |
2184 | * because we are sure there is no data there. | 2251 | * because we are sure there is no data there. |
@@ -2190,8 +2257,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2190 | if (same_page && len < PAGE_CACHE_SIZE) { | 2257 | if (same_page && len < PAGE_CACHE_SIZE) { |
2191 | if (offset < ino_size) | 2258 | if (offset < ino_size) |
2192 | ret = btrfs_truncate_page(inode, offset, len, 0); | 2259 | ret = btrfs_truncate_page(inode, offset, len, 0); |
2193 | mutex_unlock(&inode->i_mutex); | 2260 | goto out_only_mutex; |
2194 | return ret; | ||
2195 | } | 2261 | } |
2196 | 2262 | ||
2197 | /* zero back part of the first page */ | 2263 | /* zero back part of the first page */ |
@@ -2203,12 +2269,39 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2203 | } | 2269 | } |
2204 | } | 2270 | } |
2205 | 2271 | ||
2206 | /* zero the front end of the last page */ | 2272 | /* Check the aligned pages after the first unaligned page, |
2207 | if (offset + len < ino_size) { | 2273 | * if offset != orig_start, which means the first unaligned page |
2208 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | 2274 | * including serveral following pages are already in holes, |
2209 | if (ret) { | 2275 | * the extra check can be skipped */ |
2210 | mutex_unlock(&inode->i_mutex); | 2276 | if (offset == orig_start) { |
2211 | return ret; | 2277 | /* after truncate page, check hole again */ |
2278 | len = offset + len - lockstart; | ||
2279 | offset = lockstart; | ||
2280 | ret = find_first_non_hole(inode, &offset, &len); | ||
2281 | if (ret < 0) | ||
2282 | goto out_only_mutex; | ||
2283 | if (ret && !len) { | ||
2284 | ret = 0; | ||
2285 | goto out_only_mutex; | ||
2286 | } | ||
2287 | lockstart = offset; | ||
2288 | } | ||
2289 | |||
2290 | /* Check the tail unaligned part is in a hole */ | ||
2291 | tail_start = lockend + 1; | ||
2292 | tail_len = offset + len - tail_start; | ||
2293 | if (tail_len) { | ||
2294 | ret = find_first_non_hole(inode, &tail_start, &tail_len); | ||
2295 | if (unlikely(ret < 0)) | ||
2296 | goto out_only_mutex; | ||
2297 | if (!ret) { | ||
2298 | /* zero the front end of the last page */ | ||
2299 | if (tail_start + tail_len < ino_size) { | ||
2300 | ret = btrfs_truncate_page(inode, | ||
2301 | tail_start + tail_len, 0, 1); | ||
2302 | if (ret) | ||
2303 | goto out_only_mutex; | ||
2304 | } | ||
2212 | } | 2305 | } |
2213 | } | 2306 | } |
2214 | 2307 | ||
@@ -2234,9 +2327,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2234 | if ((!ordered || | 2327 | if ((!ordered || |
2235 | (ordered->file_offset + ordered->len <= lockstart || | 2328 | (ordered->file_offset + ordered->len <= lockstart || |
2236 | ordered->file_offset > lockend)) && | 2329 | ordered->file_offset > lockend)) && |
2237 | !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, | 2330 | !btrfs_page_exists_in_range(inode, lockstart, lockend)) { |
2238 | lockend, EXTENT_UPTODATE, 0, | ||
2239 | cached_state)) { | ||
2240 | if (ordered) | 2331 | if (ordered) |
2241 | btrfs_put_ordered_extent(ordered); | 2332 | btrfs_put_ordered_extent(ordered); |
2242 | break; | 2333 | break; |
@@ -2284,6 +2375,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2284 | BUG_ON(ret); | 2375 | BUG_ON(ret); |
2285 | trans->block_rsv = rsv; | 2376 | trans->block_rsv = rsv; |
2286 | 2377 | ||
2378 | cur_offset = lockstart; | ||
2379 | len = lockend - cur_offset; | ||
2287 | while (cur_offset < lockend) { | 2380 | while (cur_offset < lockend) { |
2288 | ret = __btrfs_drop_extents(trans, root, inode, path, | 2381 | ret = __btrfs_drop_extents(trans, root, inode, path, |
2289 | cur_offset, lockend + 1, | 2382 | cur_offset, lockend + 1, |
@@ -2324,6 +2417,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2324 | rsv, min_size); | 2417 | rsv, min_size); |
2325 | BUG_ON(ret); /* shouldn't happen */ | 2418 | BUG_ON(ret); /* shouldn't happen */ |
2326 | trans->block_rsv = rsv; | 2419 | trans->block_rsv = rsv; |
2420 | |||
2421 | ret = find_first_non_hole(inode, &cur_offset, &len); | ||
2422 | if (unlikely(ret < 0)) | ||
2423 | break; | ||
2424 | if (ret && !len) { | ||
2425 | ret = 0; | ||
2426 | break; | ||
2427 | } | ||
2327 | } | 2428 | } |
2328 | 2429 | ||
2329 | if (ret) { | 2430 | if (ret) { |
@@ -2332,7 +2433,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2332 | } | 2433 | } |
2333 | 2434 | ||
2334 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2435 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
2335 | if (cur_offset < ino_size) { | 2436 | /* |
2437 | * Don't insert file hole extent item if it's for a range beyond eof | ||
2438 | * (because it's useless) or if it represents a 0 bytes range (when | ||
2439 | * cur_offset == drop_end). | ||
2440 | */ | ||
2441 | if (cur_offset < ino_size && cur_offset < drop_end) { | ||
2336 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | 2442 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); |
2337 | if (ret) { | 2443 | if (ret) { |
2338 | err = ret; | 2444 | err = ret; |
@@ -2357,6 +2463,7 @@ out_free: | |||
2357 | out: | 2463 | out: |
2358 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 2464 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
2359 | &cached_state, GFP_NOFS); | 2465 | &cached_state, GFP_NOFS); |
2466 | out_only_mutex: | ||
2360 | mutex_unlock(&inode->i_mutex); | 2467 | mutex_unlock(&inode->i_mutex); |
2361 | if (ret && !err) | 2468 | if (ret && !err) |
2362 | err = ret; | 2469 | err = ret; |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 73f3de7a083c..372b05ff1943 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -831,7 +831,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
831 | 831 | ||
832 | if (!matched) { | 832 | if (!matched) { |
833 | __btrfs_remove_free_space_cache(ctl); | 833 | __btrfs_remove_free_space_cache(ctl); |
834 | btrfs_err(fs_info, "block group %llu has wrong amount of free space", | 834 | btrfs_warn(fs_info, "block group %llu has wrong amount of free space", |
835 | block_group->key.objectid); | 835 | block_group->key.objectid); |
836 | ret = -1; | 836 | ret = -1; |
837 | } | 837 | } |
@@ -843,7 +843,7 @@ out: | |||
843 | spin_unlock(&block_group->lock); | 843 | spin_unlock(&block_group->lock); |
844 | ret = 0; | 844 | ret = 0; |
845 | 845 | ||
846 | btrfs_err(fs_info, "failed to load free space cache for block group %llu", | 846 | btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuild it now", |
847 | block_group->key.objectid); | 847 | block_group->key.objectid); |
848 | } | 848 | } |
849 | 849 | ||
@@ -851,90 +851,44 @@ out: | |||
851 | return ret; | 851 | return ret; |
852 | } | 852 | } |
853 | 853 | ||
854 | /** | 854 | static noinline_for_stack |
855 | * __btrfs_write_out_cache - write out cached info to an inode | 855 | int write_cache_extent_entries(struct io_ctl *io_ctl, |
856 | * @root - the root the inode belongs to | 856 | struct btrfs_free_space_ctl *ctl, |
857 | * @ctl - the free space cache we are going to write out | 857 | struct btrfs_block_group_cache *block_group, |
858 | * @block_group - the block_group for this cache if it belongs to a block_group | 858 | int *entries, int *bitmaps, |
859 | * @trans - the trans handle | 859 | struct list_head *bitmap_list) |
860 | * @path - the path to use | ||
861 | * @offset - the offset for the key we'll insert | ||
862 | * | ||
863 | * This function writes out a free space cache struct to disk for quick recovery | ||
864 | * on mount. This will return 0 if it was successfull in writing the cache out, | ||
865 | * and -1 if it was not. | ||
866 | */ | ||
867 | static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | ||
868 | struct btrfs_free_space_ctl *ctl, | ||
869 | struct btrfs_block_group_cache *block_group, | ||
870 | struct btrfs_trans_handle *trans, | ||
871 | struct btrfs_path *path, u64 offset) | ||
872 | { | 860 | { |
873 | struct btrfs_free_space_header *header; | ||
874 | struct extent_buffer *leaf; | ||
875 | struct rb_node *node; | ||
876 | struct list_head *pos, *n; | ||
877 | struct extent_state *cached_state = NULL; | ||
878 | struct btrfs_free_cluster *cluster = NULL; | ||
879 | struct extent_io_tree *unpin = NULL; | ||
880 | struct io_ctl io_ctl; | ||
881 | struct list_head bitmap_list; | ||
882 | struct btrfs_key key; | ||
883 | u64 start, extent_start, extent_end, len; | ||
884 | int entries = 0; | ||
885 | int bitmaps = 0; | ||
886 | int ret; | 861 | int ret; |
887 | int err = -1; | 862 | struct btrfs_free_cluster *cluster = NULL; |
888 | 863 | struct rb_node *node = rb_first(&ctl->free_space_offset); | |
889 | INIT_LIST_HEAD(&bitmap_list); | ||
890 | |||
891 | if (!i_size_read(inode)) | ||
892 | return -1; | ||
893 | |||
894 | ret = io_ctl_init(&io_ctl, inode, root); | ||
895 | if (ret) | ||
896 | return -1; | ||
897 | 864 | ||
898 | /* Get the cluster for this block_group if it exists */ | 865 | /* Get the cluster for this block_group if it exists */ |
899 | if (block_group && !list_empty(&block_group->cluster_list)) | 866 | if (block_group && !list_empty(&block_group->cluster_list)) { |
900 | cluster = list_entry(block_group->cluster_list.next, | 867 | cluster = list_entry(block_group->cluster_list.next, |
901 | struct btrfs_free_cluster, | 868 | struct btrfs_free_cluster, |
902 | block_group_list); | 869 | block_group_list); |
870 | } | ||
903 | 871 | ||
904 | /* Lock all pages first so we can lock the extent safely. */ | ||
905 | io_ctl_prepare_pages(&io_ctl, inode, 0); | ||
906 | |||
907 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | ||
908 | 0, &cached_state); | ||
909 | |||
910 | node = rb_first(&ctl->free_space_offset); | ||
911 | if (!node && cluster) { | 872 | if (!node && cluster) { |
912 | node = rb_first(&cluster->root); | 873 | node = rb_first(&cluster->root); |
913 | cluster = NULL; | 874 | cluster = NULL; |
914 | } | 875 | } |
915 | 876 | ||
916 | /* Make sure we can fit our crcs into the first page */ | ||
917 | if (io_ctl.check_crcs && | ||
918 | (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) | ||
919 | goto out_nospc; | ||
920 | |||
921 | io_ctl_set_generation(&io_ctl, trans->transid); | ||
922 | |||
923 | /* Write out the extent entries */ | 877 | /* Write out the extent entries */ |
924 | while (node) { | 878 | while (node) { |
925 | struct btrfs_free_space *e; | 879 | struct btrfs_free_space *e; |
926 | 880 | ||
927 | e = rb_entry(node, struct btrfs_free_space, offset_index); | 881 | e = rb_entry(node, struct btrfs_free_space, offset_index); |
928 | entries++; | 882 | *entries += 1; |
929 | 883 | ||
930 | ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes, | 884 | ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, |
931 | e->bitmap); | 885 | e->bitmap); |
932 | if (ret) | 886 | if (ret) |
933 | goto out_nospc; | 887 | goto fail; |
934 | 888 | ||
935 | if (e->bitmap) { | 889 | if (e->bitmap) { |
936 | list_add_tail(&e->list, &bitmap_list); | 890 | list_add_tail(&e->list, bitmap_list); |
937 | bitmaps++; | 891 | *bitmaps += 1; |
938 | } | 892 | } |
939 | node = rb_next(node); | 893 | node = rb_next(node); |
940 | if (!node && cluster) { | 894 | if (!node && cluster) { |
@@ -942,13 +896,84 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
942 | cluster = NULL; | 896 | cluster = NULL; |
943 | } | 897 | } |
944 | } | 898 | } |
899 | return 0; | ||
900 | fail: | ||
901 | return -ENOSPC; | ||
902 | } | ||
903 | |||
904 | static noinline_for_stack int | ||
905 | update_cache_item(struct btrfs_trans_handle *trans, | ||
906 | struct btrfs_root *root, | ||
907 | struct inode *inode, | ||
908 | struct btrfs_path *path, u64 offset, | ||
909 | int entries, int bitmaps) | ||
910 | { | ||
911 | struct btrfs_key key; | ||
912 | struct btrfs_free_space_header *header; | ||
913 | struct extent_buffer *leaf; | ||
914 | int ret; | ||
915 | |||
916 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
917 | key.offset = offset; | ||
918 | key.type = 0; | ||
919 | |||
920 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
921 | if (ret < 0) { | ||
922 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, | ||
923 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, | ||
924 | GFP_NOFS); | ||
925 | goto fail; | ||
926 | } | ||
927 | leaf = path->nodes[0]; | ||
928 | if (ret > 0) { | ||
929 | struct btrfs_key found_key; | ||
930 | ASSERT(path->slots[0]); | ||
931 | path->slots[0]--; | ||
932 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
933 | if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || | ||
934 | found_key.offset != offset) { | ||
935 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, | ||
936 | inode->i_size - 1, | ||
937 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, | ||
938 | NULL, GFP_NOFS); | ||
939 | btrfs_release_path(path); | ||
940 | goto fail; | ||
941 | } | ||
942 | } | ||
943 | |||
944 | BTRFS_I(inode)->generation = trans->transid; | ||
945 | header = btrfs_item_ptr(leaf, path->slots[0], | ||
946 | struct btrfs_free_space_header); | ||
947 | btrfs_set_free_space_entries(leaf, header, entries); | ||
948 | btrfs_set_free_space_bitmaps(leaf, header, bitmaps); | ||
949 | btrfs_set_free_space_generation(leaf, header, trans->transid); | ||
950 | btrfs_mark_buffer_dirty(leaf); | ||
951 | btrfs_release_path(path); | ||
952 | |||
953 | return 0; | ||
954 | |||
955 | fail: | ||
956 | return -1; | ||
957 | } | ||
958 | |||
959 | static noinline_for_stack int | ||
960 | add_ioctl_entries(struct btrfs_root *root, | ||
961 | struct inode *inode, | ||
962 | struct btrfs_block_group_cache *block_group, | ||
963 | struct io_ctl *io_ctl, | ||
964 | struct extent_state **cached_state, | ||
965 | struct list_head *bitmap_list, | ||
966 | int *entries) | ||
967 | { | ||
968 | u64 start, extent_start, extent_end, len; | ||
969 | struct list_head *pos, *n; | ||
970 | struct extent_io_tree *unpin = NULL; | ||
971 | int ret; | ||
945 | 972 | ||
946 | /* | 973 | /* |
947 | * We want to add any pinned extents to our free space cache | 974 | * We want to add any pinned extents to our free space cache |
948 | * so we don't leak the space | 975 | * so we don't leak the space |
949 | */ | 976 | * |
950 | |||
951 | /* | ||
952 | * We shouldn't have switched the pinned extents yet so this is the | 977 | * We shouldn't have switched the pinned extents yet so this is the |
953 | * right one | 978 | * right one |
954 | */ | 979 | */ |
@@ -977,8 +1002,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
977 | block_group->key.offset, extent_end + 1); | 1002 | block_group->key.offset, extent_end + 1); |
978 | len = extent_end - extent_start; | 1003 | len = extent_end - extent_start; |
979 | 1004 | ||
980 | entries++; | 1005 | *entries += 1; |
981 | ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL); | 1006 | ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); |
982 | if (ret) | 1007 | if (ret) |
983 | goto out_nospc; | 1008 | goto out_nospc; |
984 | 1009 | ||
@@ -986,74 +1011,129 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
986 | } | 1011 | } |
987 | 1012 | ||
988 | /* Write out the bitmaps */ | 1013 | /* Write out the bitmaps */ |
989 | list_for_each_safe(pos, n, &bitmap_list) { | 1014 | list_for_each_safe(pos, n, bitmap_list) { |
990 | struct btrfs_free_space *entry = | 1015 | struct btrfs_free_space *entry = |
991 | list_entry(pos, struct btrfs_free_space, list); | 1016 | list_entry(pos, struct btrfs_free_space, list); |
992 | 1017 | ||
993 | ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap); | 1018 | ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); |
994 | if (ret) | 1019 | if (ret) |
995 | goto out_nospc; | 1020 | goto out_nospc; |
996 | list_del_init(&entry->list); | 1021 | list_del_init(&entry->list); |
997 | } | 1022 | } |
998 | 1023 | ||
999 | /* Zero out the rest of the pages just to make sure */ | 1024 | /* Zero out the rest of the pages just to make sure */ |
1000 | io_ctl_zero_remaining_pages(&io_ctl); | 1025 | io_ctl_zero_remaining_pages(io_ctl); |
1001 | 1026 | ||
1002 | ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, | 1027 | ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages, |
1003 | 0, i_size_read(inode), &cached_state); | 1028 | 0, i_size_read(inode), cached_state); |
1004 | io_ctl_drop_pages(&io_ctl); | 1029 | io_ctl_drop_pages(io_ctl); |
1005 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | 1030 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, |
1006 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | 1031 | i_size_read(inode) - 1, cached_state, GFP_NOFS); |
1007 | 1032 | ||
1008 | if (ret) | 1033 | if (ret) |
1009 | goto out; | 1034 | goto fail; |
1010 | 1035 | ||
1011 | ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); | 1036 | ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); |
1012 | if (ret) { | 1037 | if (ret) { |
1013 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, | 1038 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, |
1014 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, | 1039 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, |
1015 | GFP_NOFS); | 1040 | GFP_NOFS); |
1016 | goto out; | 1041 | goto fail; |
1017 | } | 1042 | } |
1043 | return 0; | ||
1018 | 1044 | ||
1019 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 1045 | fail: |
1020 | key.offset = offset; | 1046 | return -1; |
1021 | key.type = 0; | ||
1022 | 1047 | ||
1023 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | 1048 | out_nospc: |
1024 | if (ret < 0) { | 1049 | return -ENOSPC; |
1025 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, | 1050 | } |
1026 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, | 1051 | |
1027 | GFP_NOFS); | 1052 | static void noinline_for_stack |
1028 | goto out; | 1053 | cleanup_write_cache_enospc(struct inode *inode, |
1029 | } | 1054 | struct io_ctl *io_ctl, |
1030 | leaf = path->nodes[0]; | 1055 | struct extent_state **cached_state, |
1031 | if (ret > 0) { | 1056 | struct list_head *bitmap_list) |
1032 | struct btrfs_key found_key; | 1057 | { |
1033 | ASSERT(path->slots[0]); | 1058 | struct list_head *pos, *n; |
1034 | path->slots[0]--; | 1059 | list_for_each_safe(pos, n, bitmap_list) { |
1035 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 1060 | struct btrfs_free_space *entry = |
1036 | if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || | 1061 | list_entry(pos, struct btrfs_free_space, list); |
1037 | found_key.offset != offset) { | 1062 | list_del_init(&entry->list); |
1038 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, | ||
1039 | inode->i_size - 1, | ||
1040 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, | ||
1041 | NULL, GFP_NOFS); | ||
1042 | btrfs_release_path(path); | ||
1043 | goto out; | ||
1044 | } | ||
1045 | } | 1063 | } |
1064 | io_ctl_drop_pages(io_ctl); | ||
1065 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
1066 | i_size_read(inode) - 1, cached_state, | ||
1067 | GFP_NOFS); | ||
1068 | } | ||
1046 | 1069 | ||
1047 | BTRFS_I(inode)->generation = trans->transid; | 1070 | /** |
1048 | header = btrfs_item_ptr(leaf, path->slots[0], | 1071 | * __btrfs_write_out_cache - write out cached info to an inode |
1049 | struct btrfs_free_space_header); | 1072 | * @root - the root the inode belongs to |
1050 | btrfs_set_free_space_entries(leaf, header, entries); | 1073 | * @ctl - the free space cache we are going to write out |
1051 | btrfs_set_free_space_bitmaps(leaf, header, bitmaps); | 1074 | * @block_group - the block_group for this cache if it belongs to a block_group |
1052 | btrfs_set_free_space_generation(leaf, header, trans->transid); | 1075 | * @trans - the trans handle |
1053 | btrfs_mark_buffer_dirty(leaf); | 1076 | * @path - the path to use |
1054 | btrfs_release_path(path); | 1077 | * @offset - the offset for the key we'll insert |
1078 | * | ||
1079 | * This function writes out a free space cache struct to disk for quick recovery | ||
1080 | * on mount. This will return 0 if it was successfull in writing the cache out, | ||
1081 | * and -1 if it was not. | ||
1082 | */ | ||
1083 | static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | ||
1084 | struct btrfs_free_space_ctl *ctl, | ||
1085 | struct btrfs_block_group_cache *block_group, | ||
1086 | struct btrfs_trans_handle *trans, | ||
1087 | struct btrfs_path *path, u64 offset) | ||
1088 | { | ||
1089 | struct extent_state *cached_state = NULL; | ||
1090 | struct io_ctl io_ctl; | ||
1091 | struct list_head bitmap_list; | ||
1092 | int entries = 0; | ||
1093 | int bitmaps = 0; | ||
1094 | int ret; | ||
1095 | int err = -1; | ||
1096 | |||
1097 | INIT_LIST_HEAD(&bitmap_list); | ||
1098 | |||
1099 | if (!i_size_read(inode)) | ||
1100 | return -1; | ||
1101 | |||
1102 | ret = io_ctl_init(&io_ctl, inode, root); | ||
1103 | if (ret) | ||
1104 | return -1; | ||
1105 | |||
1106 | /* Lock all pages first so we can lock the extent safely. */ | ||
1107 | io_ctl_prepare_pages(&io_ctl, inode, 0); | ||
1108 | |||
1109 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | ||
1110 | 0, &cached_state); | ||
1111 | |||
1112 | |||
1113 | /* Make sure we can fit our crcs into the first page */ | ||
1114 | if (io_ctl.check_crcs && | ||
1115 | (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) | ||
1116 | goto out_nospc; | ||
1117 | |||
1118 | io_ctl_set_generation(&io_ctl, trans->transid); | ||
1119 | |||
1120 | ret = write_cache_extent_entries(&io_ctl, ctl, | ||
1121 | block_group, &entries, &bitmaps, | ||
1122 | &bitmap_list); | ||
1123 | if (ret) | ||
1124 | goto out_nospc; | ||
1125 | |||
1126 | ret = add_ioctl_entries(root, inode, block_group, &io_ctl, | ||
1127 | &cached_state, &bitmap_list, &entries); | ||
1128 | |||
1129 | if (ret == -ENOSPC) | ||
1130 | goto out_nospc; | ||
1131 | else if (ret) | ||
1132 | goto out; | ||
1133 | |||
1134 | err = update_cache_item(trans, root, inode, path, offset, | ||
1135 | entries, bitmaps); | ||
1055 | 1136 | ||
1056 | err = 0; | ||
1057 | out: | 1137 | out: |
1058 | io_ctl_free(&io_ctl); | 1138 | io_ctl_free(&io_ctl); |
1059 | if (err) { | 1139 | if (err) { |
@@ -1064,14 +1144,8 @@ out: | |||
1064 | return err; | 1144 | return err; |
1065 | 1145 | ||
1066 | out_nospc: | 1146 | out_nospc: |
1067 | list_for_each_safe(pos, n, &bitmap_list) { | 1147 | |
1068 | struct btrfs_free_space *entry = | 1148 | cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); |
1069 | list_entry(pos, struct btrfs_free_space, list); | ||
1070 | list_del_init(&entry->list); | ||
1071 | } | ||
1072 | io_ctl_drop_pages(&io_ctl); | ||
1073 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
1074 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | ||
1075 | goto out; | 1149 | goto out; |
1076 | } | 1150 | } |
1077 | 1151 | ||
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 86935f5ae291..888fbe19079f 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -174,7 +174,7 @@ static void start_caching(struct btrfs_root *root) | |||
174 | BTRFS_LAST_FREE_OBJECTID - objectid + 1); | 174 | BTRFS_LAST_FREE_OBJECTID - objectid + 1); |
175 | } | 175 | } |
176 | 176 | ||
177 | tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", | 177 | tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu", |
178 | root->root_key.objectid); | 178 | root->root_key.objectid); |
179 | if (IS_ERR(tsk)) { | 179 | if (IS_ERR(tsk)) { |
180 | btrfs_warn(root->fs_info, "failed to start inode caching task"); | 180 | btrfs_warn(root->fs_info, "failed to start inode caching task"); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c8386f1961f0..8925f66a1411 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -125,7 +125,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, | |||
125 | * the btree. The caller should have done a btrfs_drop_extents so that | 125 | * the btree. The caller should have done a btrfs_drop_extents so that |
126 | * no overlapping inline items exist in the btree | 126 | * no overlapping inline items exist in the btree |
127 | */ | 127 | */ |
128 | static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | 128 | static int insert_inline_extent(struct btrfs_trans_handle *trans, |
129 | struct btrfs_path *path, int extent_inserted, | 129 | struct btrfs_path *path, int extent_inserted, |
130 | struct btrfs_root *root, struct inode *inode, | 130 | struct btrfs_root *root, struct inode *inode, |
131 | u64 start, size_t size, size_t compressed_size, | 131 | u64 start, size_t size, size_t compressed_size, |
@@ -2678,6 +2678,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
2678 | trans = NULL; | 2678 | trans = NULL; |
2679 | goto out_unlock; | 2679 | goto out_unlock; |
2680 | } | 2680 | } |
2681 | |||
2681 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 2682 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
2682 | 2683 | ||
2683 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 2684 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
@@ -2947,14 +2948,15 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | |||
2947 | root->orphan_block_rsv = NULL; | 2948 | root->orphan_block_rsv = NULL; |
2948 | spin_unlock(&root->orphan_lock); | 2949 | spin_unlock(&root->orphan_lock); |
2949 | 2950 | ||
2950 | if (root->orphan_item_inserted && | 2951 | if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state) && |
2951 | btrfs_root_refs(&root->root_item) > 0) { | 2952 | btrfs_root_refs(&root->root_item) > 0) { |
2952 | ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, | 2953 | ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, |
2953 | root->root_key.objectid); | 2954 | root->root_key.objectid); |
2954 | if (ret) | 2955 | if (ret) |
2955 | btrfs_abort_transaction(trans, root, ret); | 2956 | btrfs_abort_transaction(trans, root, ret); |
2956 | else | 2957 | else |
2957 | root->orphan_item_inserted = 0; | 2958 | clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, |
2959 | &root->state); | ||
2958 | } | 2960 | } |
2959 | 2961 | ||
2960 | if (block_rsv) { | 2962 | if (block_rsv) { |
@@ -3271,7 +3273,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
3271 | btrfs_block_rsv_release(root, root->orphan_block_rsv, | 3273 | btrfs_block_rsv_release(root, root->orphan_block_rsv, |
3272 | (u64)-1); | 3274 | (u64)-1); |
3273 | 3275 | ||
3274 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | 3276 | if (root->orphan_block_rsv || |
3277 | test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) { | ||
3275 | trans = btrfs_join_transaction(root); | 3278 | trans = btrfs_join_transaction(root); |
3276 | if (!IS_ERR(trans)) | 3279 | if (!IS_ERR(trans)) |
3277 | btrfs_end_transaction(trans, root); | 3280 | btrfs_end_transaction(trans, root); |
@@ -3473,7 +3476,7 @@ cache_acl: | |||
3473 | ret = btrfs_load_inode_props(inode, path); | 3476 | ret = btrfs_load_inode_props(inode, path); |
3474 | if (ret) | 3477 | if (ret) |
3475 | btrfs_err(root->fs_info, | 3478 | btrfs_err(root->fs_info, |
3476 | "error loading props for ino %llu (root %llu): %d\n", | 3479 | "error loading props for ino %llu (root %llu): %d", |
3477 | btrfs_ino(inode), | 3480 | btrfs_ino(inode), |
3478 | root->root_key.objectid, ret); | 3481 | root->root_key.objectid, ret); |
3479 | } | 3482 | } |
@@ -3998,7 +4001,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
3998 | * not block aligned since we will be keeping the last block of the | 4001 | * not block aligned since we will be keeping the last block of the |
3999 | * extent just the way it is. | 4002 | * extent just the way it is. |
4000 | */ | 4003 | */ |
4001 | if (root->ref_cows || root == root->fs_info->tree_root) | 4004 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || |
4005 | root == root->fs_info->tree_root) | ||
4002 | btrfs_drop_extent_cache(inode, ALIGN(new_size, | 4006 | btrfs_drop_extent_cache(inode, ALIGN(new_size, |
4003 | root->sectorsize), (u64)-1, 0); | 4007 | root->sectorsize), (u64)-1, 0); |
4004 | 4008 | ||
@@ -4091,7 +4095,9 @@ search_again: | |||
4091 | extent_num_bytes); | 4095 | extent_num_bytes); |
4092 | num_dec = (orig_num_bytes - | 4096 | num_dec = (orig_num_bytes - |
4093 | extent_num_bytes); | 4097 | extent_num_bytes); |
4094 | if (root->ref_cows && extent_start != 0) | 4098 | if (test_bit(BTRFS_ROOT_REF_COWS, |
4099 | &root->state) && | ||
4100 | extent_start != 0) | ||
4095 | inode_sub_bytes(inode, num_dec); | 4101 | inode_sub_bytes(inode, num_dec); |
4096 | btrfs_mark_buffer_dirty(leaf); | 4102 | btrfs_mark_buffer_dirty(leaf); |
4097 | } else { | 4103 | } else { |
@@ -4105,7 +4111,8 @@ search_again: | |||
4105 | num_dec = btrfs_file_extent_num_bytes(leaf, fi); | 4111 | num_dec = btrfs_file_extent_num_bytes(leaf, fi); |
4106 | if (extent_start != 0) { | 4112 | if (extent_start != 0) { |
4107 | found_extent = 1; | 4113 | found_extent = 1; |
4108 | if (root->ref_cows) | 4114 | if (test_bit(BTRFS_ROOT_REF_COWS, |
4115 | &root->state)) | ||
4109 | inode_sub_bytes(inode, num_dec); | 4116 | inode_sub_bytes(inode, num_dec); |
4110 | } | 4117 | } |
4111 | } | 4118 | } |
@@ -4120,10 +4127,9 @@ search_again: | |||
4120 | btrfs_file_extent_other_encoding(leaf, fi) == 0) { | 4127 | btrfs_file_extent_other_encoding(leaf, fi) == 0) { |
4121 | u32 size = new_size - found_key.offset; | 4128 | u32 size = new_size - found_key.offset; |
4122 | 4129 | ||
4123 | if (root->ref_cows) { | 4130 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
4124 | inode_sub_bytes(inode, item_end + 1 - | 4131 | inode_sub_bytes(inode, item_end + 1 - |
4125 | new_size); | 4132 | new_size); |
4126 | } | ||
4127 | 4133 | ||
4128 | /* | 4134 | /* |
4129 | * update the ram bytes to properly reflect | 4135 | * update the ram bytes to properly reflect |
@@ -4133,7 +4139,8 @@ search_again: | |||
4133 | size = | 4139 | size = |
4134 | btrfs_file_extent_calc_inline_size(size); | 4140 | btrfs_file_extent_calc_inline_size(size); |
4135 | btrfs_truncate_item(root, path, size, 1); | 4141 | btrfs_truncate_item(root, path, size, 1); |
4136 | } else if (root->ref_cows) { | 4142 | } else if (test_bit(BTRFS_ROOT_REF_COWS, |
4143 | &root->state)) { | ||
4137 | inode_sub_bytes(inode, item_end + 1 - | 4144 | inode_sub_bytes(inode, item_end + 1 - |
4138 | found_key.offset); | 4145 | found_key.offset); |
4139 | } | 4146 | } |
@@ -4155,8 +4162,9 @@ delete: | |||
4155 | } else { | 4162 | } else { |
4156 | break; | 4163 | break; |
4157 | } | 4164 | } |
4158 | if (found_extent && (root->ref_cows || | 4165 | if (found_extent && |
4159 | root == root->fs_info->tree_root)) { | 4166 | (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || |
4167 | root == root->fs_info->tree_root)) { | ||
4160 | btrfs_set_path_blocking(path); | 4168 | btrfs_set_path_blocking(path); |
4161 | ret = btrfs_free_extent(trans, root, extent_start, | 4169 | ret = btrfs_free_extent(trans, root, extent_start, |
4162 | extent_num_bytes, 0, | 4170 | extent_num_bytes, 0, |
@@ -5168,8 +5176,7 @@ static int btrfs_dentry_delete(const struct dentry *dentry) | |||
5168 | 5176 | ||
5169 | static void btrfs_dentry_release(struct dentry *dentry) | 5177 | static void btrfs_dentry_release(struct dentry *dentry) |
5170 | { | 5178 | { |
5171 | if (dentry->d_fsdata) | 5179 | kfree(dentry->d_fsdata); |
5172 | kfree(dentry->d_fsdata); | ||
5173 | } | 5180 | } |
5174 | 5181 | ||
5175 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, | 5182 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, |
@@ -5553,6 +5560,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5553 | struct btrfs_inode_ref *ref; | 5560 | struct btrfs_inode_ref *ref; |
5554 | struct btrfs_key key[2]; | 5561 | struct btrfs_key key[2]; |
5555 | u32 sizes[2]; | 5562 | u32 sizes[2]; |
5563 | int nitems = name ? 2 : 1; | ||
5556 | unsigned long ptr; | 5564 | unsigned long ptr; |
5557 | int ret; | 5565 | int ret; |
5558 | 5566 | ||
@@ -5572,7 +5580,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5572 | */ | 5580 | */ |
5573 | inode->i_ino = objectid; | 5581 | inode->i_ino = objectid; |
5574 | 5582 | ||
5575 | if (dir) { | 5583 | if (dir && name) { |
5576 | trace_btrfs_inode_request(dir); | 5584 | trace_btrfs_inode_request(dir); |
5577 | 5585 | ||
5578 | ret = btrfs_set_inode_index(dir, index); | 5586 | ret = btrfs_set_inode_index(dir, index); |
@@ -5581,6 +5589,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5581 | iput(inode); | 5589 | iput(inode); |
5582 | return ERR_PTR(ret); | 5590 | return ERR_PTR(ret); |
5583 | } | 5591 | } |
5592 | } else if (dir) { | ||
5593 | *index = 0; | ||
5584 | } | 5594 | } |
5585 | /* | 5595 | /* |
5586 | * index_cnt is ignored for everything but a dir, | 5596 | * index_cnt is ignored for everything but a dir, |
@@ -5605,21 +5615,24 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5605 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); | 5615 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); |
5606 | key[0].offset = 0; | 5616 | key[0].offset = 0; |
5607 | 5617 | ||
5608 | /* | ||
5609 | * Start new inodes with an inode_ref. This is slightly more | ||
5610 | * efficient for small numbers of hard links since they will | ||
5611 | * be packed into one item. Extended refs will kick in if we | ||
5612 | * add more hard links than can fit in the ref item. | ||
5613 | */ | ||
5614 | key[1].objectid = objectid; | ||
5615 | btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); | ||
5616 | key[1].offset = ref_objectid; | ||
5617 | |||
5618 | sizes[0] = sizeof(struct btrfs_inode_item); | 5618 | sizes[0] = sizeof(struct btrfs_inode_item); |
5619 | sizes[1] = name_len + sizeof(*ref); | 5619 | |
5620 | if (name) { | ||
5621 | /* | ||
5622 | * Start new inodes with an inode_ref. This is slightly more | ||
5623 | * efficient for small numbers of hard links since they will | ||
5624 | * be packed into one item. Extended refs will kick in if we | ||
5625 | * add more hard links than can fit in the ref item. | ||
5626 | */ | ||
5627 | key[1].objectid = objectid; | ||
5628 | btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); | ||
5629 | key[1].offset = ref_objectid; | ||
5630 | |||
5631 | sizes[1] = name_len + sizeof(*ref); | ||
5632 | } | ||
5620 | 5633 | ||
5621 | path->leave_spinning = 1; | 5634 | path->leave_spinning = 1; |
5622 | ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2); | 5635 | ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems); |
5623 | if (ret != 0) | 5636 | if (ret != 0) |
5624 | goto fail; | 5637 | goto fail; |
5625 | 5638 | ||
@@ -5632,12 +5645,14 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5632 | sizeof(*inode_item)); | 5645 | sizeof(*inode_item)); |
5633 | fill_inode_item(trans, path->nodes[0], inode_item, inode); | 5646 | fill_inode_item(trans, path->nodes[0], inode_item, inode); |
5634 | 5647 | ||
5635 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, | 5648 | if (name) { |
5636 | struct btrfs_inode_ref); | 5649 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, |
5637 | btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); | 5650 | struct btrfs_inode_ref); |
5638 | btrfs_set_inode_ref_index(path->nodes[0], ref, *index); | 5651 | btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); |
5639 | ptr = (unsigned long)(ref + 1); | 5652 | btrfs_set_inode_ref_index(path->nodes[0], ref, *index); |
5640 | write_extent_buffer(path->nodes[0], name, ptr, name_len); | 5653 | ptr = (unsigned long)(ref + 1); |
5654 | write_extent_buffer(path->nodes[0], name, ptr, name_len); | ||
5655 | } | ||
5641 | 5656 | ||
5642 | btrfs_mark_buffer_dirty(path->nodes[0]); | 5657 | btrfs_mark_buffer_dirty(path->nodes[0]); |
5643 | btrfs_free_path(path); | 5658 | btrfs_free_path(path); |
@@ -5673,7 +5688,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5673 | 5688 | ||
5674 | return inode; | 5689 | return inode; |
5675 | fail: | 5690 | fail: |
5676 | if (dir) | 5691 | if (dir && name) |
5677 | BTRFS_I(dir)->index_cnt--; | 5692 | BTRFS_I(dir)->index_cnt--; |
5678 | btrfs_free_path(path); | 5693 | btrfs_free_path(path); |
5679 | iput(inode); | 5694 | iput(inode); |
@@ -5958,6 +5973,15 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
5958 | err = btrfs_update_inode(trans, root, inode); | 5973 | err = btrfs_update_inode(trans, root, inode); |
5959 | if (err) | 5974 | if (err) |
5960 | goto fail; | 5975 | goto fail; |
5976 | if (inode->i_nlink == 1) { | ||
5977 | /* | ||
5978 | * If new hard link count is 1, it's a file created | ||
5979 | * with open(2) O_TMPFILE flag. | ||
5980 | */ | ||
5981 | err = btrfs_orphan_del(trans, inode); | ||
5982 | if (err) | ||
5983 | goto fail; | ||
5984 | } | ||
5961 | d_instantiate(dentry, inode); | 5985 | d_instantiate(dentry, inode); |
5962 | btrfs_log_new_name(trans, inode, NULL, parent); | 5986 | btrfs_log_new_name(trans, inode, NULL, parent); |
5963 | } | 5987 | } |
@@ -6086,16 +6110,8 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
6086 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); | 6110 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); |
6087 | ret = btrfs_decompress(compress_type, tmp, page, | 6111 | ret = btrfs_decompress(compress_type, tmp, page, |
6088 | extent_offset, inline_size, max_size); | 6112 | extent_offset, inline_size, max_size); |
6089 | if (ret) { | ||
6090 | char *kaddr = kmap_atomic(page); | ||
6091 | unsigned long copy_size = min_t(u64, | ||
6092 | PAGE_CACHE_SIZE - pg_offset, | ||
6093 | max_size - extent_offset); | ||
6094 | memset(kaddr + pg_offset, 0, copy_size); | ||
6095 | kunmap_atomic(kaddr); | ||
6096 | } | ||
6097 | kfree(tmp); | 6113 | kfree(tmp); |
6098 | return 0; | 6114 | return ret; |
6099 | } | 6115 | } |
6100 | 6116 | ||
6101 | /* | 6117 | /* |
@@ -6113,7 +6129,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
6113 | { | 6129 | { |
6114 | int ret; | 6130 | int ret; |
6115 | int err = 0; | 6131 | int err = 0; |
6116 | u64 bytenr; | ||
6117 | u64 extent_start = 0; | 6132 | u64 extent_start = 0; |
6118 | u64 extent_end = 0; | 6133 | u64 extent_end = 0; |
6119 | u64 objectid = btrfs_ino(inode); | 6134 | u64 objectid = btrfs_ino(inode); |
@@ -6127,7 +6142,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
6127 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 6142 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
6128 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 6143 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
6129 | struct btrfs_trans_handle *trans = NULL; | 6144 | struct btrfs_trans_handle *trans = NULL; |
6130 | int compress_type; | 6145 | const bool new_inline = !page || create; |
6131 | 6146 | ||
6132 | again: | 6147 | again: |
6133 | read_lock(&em_tree->lock); | 6148 | read_lock(&em_tree->lock); |
@@ -6201,7 +6216,6 @@ again: | |||
6201 | 6216 | ||
6202 | found_type = btrfs_file_extent_type(leaf, item); | 6217 | found_type = btrfs_file_extent_type(leaf, item); |
6203 | extent_start = found_key.offset; | 6218 | extent_start = found_key.offset; |
6204 | compress_type = btrfs_file_extent_compression(leaf, item); | ||
6205 | if (found_type == BTRFS_FILE_EXTENT_REG || | 6219 | if (found_type == BTRFS_FILE_EXTENT_REG || |
6206 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 6220 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
6207 | extent_end = extent_start + | 6221 | extent_end = extent_start + |
@@ -6236,32 +6250,10 @@ next: | |||
6236 | goto not_found_em; | 6250 | goto not_found_em; |
6237 | } | 6251 | } |
6238 | 6252 | ||
6239 | em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, item); | 6253 | btrfs_extent_item_to_extent_map(inode, path, item, new_inline, em); |
6254 | |||
6240 | if (found_type == BTRFS_FILE_EXTENT_REG || | 6255 | if (found_type == BTRFS_FILE_EXTENT_REG || |
6241 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 6256 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
6242 | em->start = extent_start; | ||
6243 | em->len = extent_end - extent_start; | ||
6244 | em->orig_start = extent_start - | ||
6245 | btrfs_file_extent_offset(leaf, item); | ||
6246 | em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, | ||
6247 | item); | ||
6248 | bytenr = btrfs_file_extent_disk_bytenr(leaf, item); | ||
6249 | if (bytenr == 0) { | ||
6250 | em->block_start = EXTENT_MAP_HOLE; | ||
6251 | goto insert; | ||
6252 | } | ||
6253 | if (compress_type != BTRFS_COMPRESS_NONE) { | ||
6254 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
6255 | em->compress_type = compress_type; | ||
6256 | em->block_start = bytenr; | ||
6257 | em->block_len = em->orig_block_len; | ||
6258 | } else { | ||
6259 | bytenr += btrfs_file_extent_offset(leaf, item); | ||
6260 | em->block_start = bytenr; | ||
6261 | em->block_len = em->len; | ||
6262 | if (found_type == BTRFS_FILE_EXTENT_PREALLOC) | ||
6263 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
6264 | } | ||
6265 | goto insert; | 6257 | goto insert; |
6266 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | 6258 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
6267 | unsigned long ptr; | 6259 | unsigned long ptr; |
@@ -6270,12 +6262,8 @@ next: | |||
6270 | size_t extent_offset; | 6262 | size_t extent_offset; |
6271 | size_t copy_size; | 6263 | size_t copy_size; |
6272 | 6264 | ||
6273 | em->block_start = EXTENT_MAP_INLINE; | 6265 | if (new_inline) |
6274 | if (!page || create) { | ||
6275 | em->start = extent_start; | ||
6276 | em->len = extent_end - extent_start; | ||
6277 | goto out; | 6266 | goto out; |
6278 | } | ||
6279 | 6267 | ||
6280 | size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); | 6268 | size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); |
6281 | extent_offset = page_offset(page) + pg_offset - extent_start; | 6269 | extent_offset = page_offset(page) + pg_offset - extent_start; |
@@ -6285,10 +6273,6 @@ next: | |||
6285 | em->len = ALIGN(copy_size, root->sectorsize); | 6273 | em->len = ALIGN(copy_size, root->sectorsize); |
6286 | em->orig_block_len = em->len; | 6274 | em->orig_block_len = em->len; |
6287 | em->orig_start = em->start; | 6275 | em->orig_start = em->start; |
6288 | if (compress_type) { | ||
6289 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
6290 | em->compress_type = compress_type; | ||
6291 | } | ||
6292 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; | 6276 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; |
6293 | if (create == 0 && !PageUptodate(page)) { | 6277 | if (create == 0 && !PageUptodate(page)) { |
6294 | if (btrfs_file_extent_compression(leaf, item) != | 6278 | if (btrfs_file_extent_compression(leaf, item) != |
@@ -6296,7 +6280,10 @@ next: | |||
6296 | ret = uncompress_inline(path, inode, page, | 6280 | ret = uncompress_inline(path, inode, page, |
6297 | pg_offset, | 6281 | pg_offset, |
6298 | extent_offset, item); | 6282 | extent_offset, item); |
6299 | BUG_ON(ret); /* -ENOMEM */ | 6283 | if (ret) { |
6284 | err = ret; | ||
6285 | goto out; | ||
6286 | } | ||
6300 | } else { | 6287 | } else { |
6301 | map = kmap(page); | 6288 | map = kmap(page); |
6302 | read_extent_buffer(leaf, map + pg_offset, ptr, | 6289 | read_extent_buffer(leaf, map + pg_offset, ptr, |
@@ -6332,8 +6319,6 @@ next: | |||
6332 | set_extent_uptodate(io_tree, em->start, | 6319 | set_extent_uptodate(io_tree, em->start, |
6333 | extent_map_end(em) - 1, NULL, GFP_NOFS); | 6320 | extent_map_end(em) - 1, NULL, GFP_NOFS); |
6334 | goto insert; | 6321 | goto insert; |
6335 | } else { | ||
6336 | WARN(1, KERN_ERR "btrfs unknown found_type %d\n", found_type); | ||
6337 | } | 6322 | } |
6338 | not_found: | 6323 | not_found: |
6339 | em->start = start; | 6324 | em->start = start; |
@@ -6717,6 +6702,76 @@ out: | |||
6717 | return ret; | 6702 | return ret; |
6718 | } | 6703 | } |
6719 | 6704 | ||
6705 | bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end) | ||
6706 | { | ||
6707 | struct radix_tree_root *root = &inode->i_mapping->page_tree; | ||
6708 | int found = false; | ||
6709 | void **pagep = NULL; | ||
6710 | struct page *page = NULL; | ||
6711 | int start_idx; | ||
6712 | int end_idx; | ||
6713 | |||
6714 | start_idx = start >> PAGE_CACHE_SHIFT; | ||
6715 | |||
6716 | /* | ||
6717 | * end is the last byte in the last page. end == start is legal | ||
6718 | */ | ||
6719 | end_idx = end >> PAGE_CACHE_SHIFT; | ||
6720 | |||
6721 | rcu_read_lock(); | ||
6722 | |||
6723 | /* Most of the code in this while loop is lifted from | ||
6724 | * find_get_page. It's been modified to begin searching from a | ||
6725 | * page and return just the first page found in that range. If the | ||
6726 | * found idx is less than or equal to the end idx then we know that | ||
6727 | * a page exists. If no pages are found or if those pages are | ||
6728 | * outside of the range then we're fine (yay!) */ | ||
6729 | while (page == NULL && | ||
6730 | radix_tree_gang_lookup_slot(root, &pagep, NULL, start_idx, 1)) { | ||
6731 | page = radix_tree_deref_slot(pagep); | ||
6732 | if (unlikely(!page)) | ||
6733 | break; | ||
6734 | |||
6735 | if (radix_tree_exception(page)) { | ||
6736 | if (radix_tree_deref_retry(page)) { | ||
6737 | page = NULL; | ||
6738 | continue; | ||
6739 | } | ||
6740 | /* | ||
6741 | * Otherwise, shmem/tmpfs must be storing a swap entry | ||
6742 | * here as an exceptional entry: so return it without | ||
6743 | * attempting to raise page count. | ||
6744 | */ | ||
6745 | page = NULL; | ||
6746 | break; /* TODO: Is this relevant for this use case? */ | ||
6747 | } | ||
6748 | |||
6749 | if (!page_cache_get_speculative(page)) { | ||
6750 | page = NULL; | ||
6751 | continue; | ||
6752 | } | ||
6753 | |||
6754 | /* | ||
6755 | * Has the page moved? | ||
6756 | * This is part of the lockless pagecache protocol. See | ||
6757 | * include/linux/pagemap.h for details. | ||
6758 | */ | ||
6759 | if (unlikely(page != *pagep)) { | ||
6760 | page_cache_release(page); | ||
6761 | page = NULL; | ||
6762 | } | ||
6763 | } | ||
6764 | |||
6765 | if (page) { | ||
6766 | if (page->index <= end_idx) | ||
6767 | found = true; | ||
6768 | page_cache_release(page); | ||
6769 | } | ||
6770 | |||
6771 | rcu_read_unlock(); | ||
6772 | return found; | ||
6773 | } | ||
6774 | |||
6720 | static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | 6775 | static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, |
6721 | struct extent_state **cached_state, int writing) | 6776 | struct extent_state **cached_state, int writing) |
6722 | { | 6777 | { |
@@ -6741,10 +6796,9 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | |||
6741 | * invalidate needs to happen so that reads after a write do not | 6796 | * invalidate needs to happen so that reads after a write do not |
6742 | * get stale data. | 6797 | * get stale data. |
6743 | */ | 6798 | */ |
6744 | if (!ordered && (!writing || | 6799 | if (!ordered && |
6745 | !test_range_bit(&BTRFS_I(inode)->io_tree, | 6800 | (!writing || |
6746 | lockstart, lockend, EXTENT_UPTODATE, 0, | 6801 | !btrfs_page_exists_in_range(inode, lockstart, lockend))) |
6747 | *cached_state))) | ||
6748 | break; | 6802 | break; |
6749 | 6803 | ||
6750 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 6804 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
@@ -7126,7 +7180,7 @@ static void btrfs_end_dio_bio(struct bio *bio, int err) | |||
7126 | * before atomic variable goto zero, we must make sure | 7180 | * before atomic variable goto zero, we must make sure |
7127 | * dip->errors is perceived to be set. | 7181 | * dip->errors is perceived to be set. |
7128 | */ | 7182 | */ |
7129 | smp_mb__before_atomic_dec(); | 7183 | smp_mb__before_atomic(); |
7130 | } | 7184 | } |
7131 | 7185 | ||
7132 | /* if there are more bios still pending for this dio, just exit */ | 7186 | /* if there are more bios still pending for this dio, just exit */ |
@@ -7306,7 +7360,7 @@ out_err: | |||
7306 | * before atomic variable goto zero, we must | 7360 | * before atomic variable goto zero, we must |
7307 | * make sure dip->errors is perceived to be set. | 7361 | * make sure dip->errors is perceived to be set. |
7308 | */ | 7362 | */ |
7309 | smp_mb__before_atomic_dec(); | 7363 | smp_mb__before_atomic(); |
7310 | if (atomic_dec_and_test(&dip->pending_bios)) | 7364 | if (atomic_dec_and_test(&dip->pending_bios)) |
7311 | bio_io_error(dip->orig_bio); | 7365 | bio_io_error(dip->orig_bio); |
7312 | 7366 | ||
@@ -7438,7 +7492,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
7438 | return 0; | 7492 | return 0; |
7439 | 7493 | ||
7440 | atomic_inc(&inode->i_dio_count); | 7494 | atomic_inc(&inode->i_dio_count); |
7441 | smp_mb__after_atomic_inc(); | 7495 | smp_mb__after_atomic(); |
7442 | 7496 | ||
7443 | /* | 7497 | /* |
7444 | * The generic stuff only does filemap_write_and_wait_range, which | 7498 | * The generic stuff only does filemap_write_and_wait_range, which |
@@ -7981,7 +8035,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
7981 | err = btrfs_subvol_inherit_props(trans, new_root, parent_root); | 8035 | err = btrfs_subvol_inherit_props(trans, new_root, parent_root); |
7982 | if (err) | 8036 | if (err) |
7983 | btrfs_err(new_root->fs_info, | 8037 | btrfs_err(new_root->fs_info, |
7984 | "error inheriting subvolume %llu properties: %d\n", | 8038 | "error inheriting subvolume %llu properties: %d", |
7985 | new_root->root_key.objectid, err); | 8039 | new_root->root_key.objectid, err); |
7986 | 8040 | ||
7987 | err = btrfs_update_inode(trans, new_root, inode); | 8041 | err = btrfs_update_inode(trans, new_root, inode); |
@@ -8300,7 +8354,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
8300 | BTRFS_I(old_inode)->dir_index = 0ULL; | 8354 | BTRFS_I(old_inode)->dir_index = 0ULL; |
8301 | if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { | 8355 | if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { |
8302 | /* force full log commit if subvolume involved. */ | 8356 | /* force full log commit if subvolume involved. */ |
8303 | root->fs_info->last_trans_log_full_commit = trans->transid; | 8357 | btrfs_set_log_full_commit(root->fs_info, trans); |
8304 | } else { | 8358 | } else { |
8305 | ret = btrfs_insert_inode_ref(trans, dest, | 8359 | ret = btrfs_insert_inode_ref(trans, dest, |
8306 | new_dentry->d_name.name, | 8360 | new_dentry->d_name.name, |
@@ -8878,6 +8932,66 @@ static int btrfs_permission(struct inode *inode, int mask) | |||
8878 | return generic_permission(inode, mask); | 8932 | return generic_permission(inode, mask); |
8879 | } | 8933 | } |
8880 | 8934 | ||
8935 | static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
8936 | { | ||
8937 | struct btrfs_trans_handle *trans; | ||
8938 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
8939 | struct inode *inode = NULL; | ||
8940 | u64 objectid; | ||
8941 | u64 index; | ||
8942 | int ret = 0; | ||
8943 | |||
8944 | /* | ||
8945 | * 5 units required for adding orphan entry | ||
8946 | */ | ||
8947 | trans = btrfs_start_transaction(root, 5); | ||
8948 | if (IS_ERR(trans)) | ||
8949 | return PTR_ERR(trans); | ||
8950 | |||
8951 | ret = btrfs_find_free_ino(root, &objectid); | ||
8952 | if (ret) | ||
8953 | goto out; | ||
8954 | |||
8955 | inode = btrfs_new_inode(trans, root, dir, NULL, 0, | ||
8956 | btrfs_ino(dir), objectid, mode, &index); | ||
8957 | if (IS_ERR(inode)) { | ||
8958 | ret = PTR_ERR(inode); | ||
8959 | inode = NULL; | ||
8960 | goto out; | ||
8961 | } | ||
8962 | |||
8963 | ret = btrfs_init_inode_security(trans, inode, dir, NULL); | ||
8964 | if (ret) | ||
8965 | goto out; | ||
8966 | |||
8967 | ret = btrfs_update_inode(trans, root, inode); | ||
8968 | if (ret) | ||
8969 | goto out; | ||
8970 | |||
8971 | inode->i_fop = &btrfs_file_operations; | ||
8972 | inode->i_op = &btrfs_file_inode_operations; | ||
8973 | |||
8974 | inode->i_mapping->a_ops = &btrfs_aops; | ||
8975 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
8976 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | ||
8977 | |||
8978 | ret = btrfs_orphan_add(trans, inode); | ||
8979 | if (ret) | ||
8980 | goto out; | ||
8981 | |||
8982 | d_tmpfile(dentry, inode); | ||
8983 | mark_inode_dirty(inode); | ||
8984 | |||
8985 | out: | ||
8986 | btrfs_end_transaction(trans, root); | ||
8987 | if (ret) | ||
8988 | iput(inode); | ||
8989 | btrfs_balance_delayed_items(root); | ||
8990 | btrfs_btree_balance_dirty(root); | ||
8991 | |||
8992 | return ret; | ||
8993 | } | ||
8994 | |||
8881 | static const struct inode_operations btrfs_dir_inode_operations = { | 8995 | static const struct inode_operations btrfs_dir_inode_operations = { |
8882 | .getattr = btrfs_getattr, | 8996 | .getattr = btrfs_getattr, |
8883 | .lookup = btrfs_lookup, | 8997 | .lookup = btrfs_lookup, |
@@ -8898,6 +9012,7 @@ static const struct inode_operations btrfs_dir_inode_operations = { | |||
8898 | .get_acl = btrfs_get_acl, | 9012 | .get_acl = btrfs_get_acl, |
8899 | .set_acl = btrfs_set_acl, | 9013 | .set_acl = btrfs_set_acl, |
8900 | .update_time = btrfs_update_time, | 9014 | .update_time = btrfs_update_time, |
9015 | .tmpfile = btrfs_tmpfile, | ||
8901 | }; | 9016 | }; |
8902 | static const struct inode_operations btrfs_dir_ro_inode_operations = { | 9017 | static const struct inode_operations btrfs_dir_ro_inode_operations = { |
8903 | .lookup = btrfs_lookup, | 9018 | .lookup = btrfs_lookup, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2f6d7b13b5bd..82c18ba12e3f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include "dev-replace.h" | 58 | #include "dev-replace.h" |
59 | #include "props.h" | 59 | #include "props.h" |
60 | #include "sysfs.h" | 60 | #include "sysfs.h" |
61 | #include "qgroup.h" | ||
61 | 62 | ||
62 | #ifdef CONFIG_64BIT | 63 | #ifdef CONFIG_64BIT |
63 | /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI | 64 | /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI |
@@ -638,11 +639,11 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
638 | struct btrfs_trans_handle *trans; | 639 | struct btrfs_trans_handle *trans; |
639 | int ret; | 640 | int ret; |
640 | 641 | ||
641 | if (!root->ref_cows) | 642 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
642 | return -EINVAL; | 643 | return -EINVAL; |
643 | 644 | ||
644 | atomic_inc(&root->will_be_snapshoted); | 645 | atomic_inc(&root->will_be_snapshoted); |
645 | smp_mb__after_atomic_inc(); | 646 | smp_mb__after_atomic(); |
646 | btrfs_wait_nocow_write(root); | 647 | btrfs_wait_nocow_write(root); |
647 | 648 | ||
648 | ret = btrfs_start_delalloc_inodes(root, 0); | 649 | ret = btrfs_start_delalloc_inodes(root, 0); |
@@ -711,6 +712,35 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
711 | if (ret) | 712 | if (ret) |
712 | goto fail; | 713 | goto fail; |
713 | 714 | ||
715 | /* | ||
716 | * If orphan cleanup did remove any orphans, it means the tree was | ||
717 | * modified and therefore the commit root is not the same as the | ||
718 | * current root anymore. This is a problem, because send uses the | ||
719 | * commit root and therefore can see inode items that don't exist | ||
720 | * in the current root anymore, and for example make calls to | ||
721 | * btrfs_iget, which will do tree lookups based on the current root | ||
722 | * and not on the commit root. Those lookups will fail, returning a | ||
723 | * -ESTALE error, and making send fail with that error. So make sure | ||
724 | * a send does not see any orphans we have just removed, and that it | ||
725 | * will see the same inodes regardless of whether a transaction | ||
726 | * commit happened before it started (meaning that the commit root | ||
727 | * will be the same as the current root) or not. | ||
728 | */ | ||
729 | if (readonly && pending_snapshot->snap->node != | ||
730 | pending_snapshot->snap->commit_root) { | ||
731 | trans = btrfs_join_transaction(pending_snapshot->snap); | ||
732 | if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) { | ||
733 | ret = PTR_ERR(trans); | ||
734 | goto fail; | ||
735 | } | ||
736 | if (!IS_ERR(trans)) { | ||
737 | ret = btrfs_commit_transaction(trans, | ||
738 | pending_snapshot->snap); | ||
739 | if (ret) | ||
740 | goto fail; | ||
741 | } | ||
742 | } | ||
743 | |||
714 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | 744 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); |
715 | if (IS_ERR(inode)) { | 745 | if (IS_ERR(inode)) { |
716 | ret = PTR_ERR(inode); | 746 | ret = PTR_ERR(inode); |
@@ -1502,11 +1532,12 @@ static noinline int btrfs_ioctl_resize(struct file *file, | |||
1502 | sizestr = vol_args->name; | 1532 | sizestr = vol_args->name; |
1503 | devstr = strchr(sizestr, ':'); | 1533 | devstr = strchr(sizestr, ':'); |
1504 | if (devstr) { | 1534 | if (devstr) { |
1505 | char *end; | ||
1506 | sizestr = devstr + 1; | 1535 | sizestr = devstr + 1; |
1507 | *devstr = '\0'; | 1536 | *devstr = '\0'; |
1508 | devstr = vol_args->name; | 1537 | devstr = vol_args->name; |
1509 | devid = simple_strtoull(devstr, &end, 10); | 1538 | ret = kstrtoull(devstr, 10, &devid); |
1539 | if (ret) | ||
1540 | goto out_free; | ||
1510 | if (!devid) { | 1541 | if (!devid) { |
1511 | ret = -EINVAL; | 1542 | ret = -EINVAL; |
1512 | goto out_free; | 1543 | goto out_free; |
@@ -1562,7 +1593,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, | |||
1562 | new_size = old_size - new_size; | 1593 | new_size = old_size - new_size; |
1563 | } else if (mod > 0) { | 1594 | } else if (mod > 0) { |
1564 | if (new_size > ULLONG_MAX - old_size) { | 1595 | if (new_size > ULLONG_MAX - old_size) { |
1565 | ret = -EINVAL; | 1596 | ret = -ERANGE; |
1566 | goto out_free; | 1597 | goto out_free; |
1567 | } | 1598 | } |
1568 | new_size = old_size + new_size; | 1599 | new_size = old_size + new_size; |
@@ -2219,6 +2250,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
2219 | struct btrfs_ioctl_vol_args *vol_args; | 2250 | struct btrfs_ioctl_vol_args *vol_args; |
2220 | struct btrfs_trans_handle *trans; | 2251 | struct btrfs_trans_handle *trans; |
2221 | struct btrfs_block_rsv block_rsv; | 2252 | struct btrfs_block_rsv block_rsv; |
2253 | u64 root_flags; | ||
2222 | u64 qgroup_reserved; | 2254 | u64 qgroup_reserved; |
2223 | int namelen; | 2255 | int namelen; |
2224 | int ret; | 2256 | int ret; |
@@ -2240,6 +2272,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
2240 | if (err) | 2272 | if (err) |
2241 | goto out; | 2273 | goto out; |
2242 | 2274 | ||
2275 | |||
2243 | err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); | 2276 | err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); |
2244 | if (err == -EINTR) | 2277 | if (err == -EINTR) |
2245 | goto out_drop_write; | 2278 | goto out_drop_write; |
@@ -2301,6 +2334,27 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
2301 | } | 2334 | } |
2302 | 2335 | ||
2303 | mutex_lock(&inode->i_mutex); | 2336 | mutex_lock(&inode->i_mutex); |
2337 | |||
2338 | /* | ||
2339 | * Don't allow to delete a subvolume with send in progress. This is | ||
2340 | * inside the i_mutex so the error handling that has to drop the bit | ||
2341 | * again is not run concurrently. | ||
2342 | */ | ||
2343 | spin_lock(&dest->root_item_lock); | ||
2344 | root_flags = btrfs_root_flags(&dest->root_item); | ||
2345 | if (dest->send_in_progress == 0) { | ||
2346 | btrfs_set_root_flags(&dest->root_item, | ||
2347 | root_flags | BTRFS_ROOT_SUBVOL_DEAD); | ||
2348 | spin_unlock(&dest->root_item_lock); | ||
2349 | } else { | ||
2350 | spin_unlock(&dest->root_item_lock); | ||
2351 | btrfs_warn(root->fs_info, | ||
2352 | "Attempt to delete subvolume %llu during send", | ||
2353 | dest->root_key.objectid); | ||
2354 | err = -EPERM; | ||
2355 | goto out_dput; | ||
2356 | } | ||
2357 | |||
2304 | err = d_invalidate(dentry); | 2358 | err = d_invalidate(dentry); |
2305 | if (err) | 2359 | if (err) |
2306 | goto out_unlock; | 2360 | goto out_unlock; |
@@ -2346,7 +2400,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
2346 | dest->root_item.drop_level = 0; | 2400 | dest->root_item.drop_level = 0; |
2347 | btrfs_set_root_refs(&dest->root_item, 0); | 2401 | btrfs_set_root_refs(&dest->root_item, 0); |
2348 | 2402 | ||
2349 | if (!xchg(&dest->orphan_item_inserted, 1)) { | 2403 | if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) { |
2350 | ret = btrfs_insert_orphan_item(trans, | 2404 | ret = btrfs_insert_orphan_item(trans, |
2351 | root->fs_info->tree_root, | 2405 | root->fs_info->tree_root, |
2352 | dest->root_key.objectid); | 2406 | dest->root_key.objectid); |
@@ -2389,11 +2443,19 @@ out_release: | |||
2389 | out_up_write: | 2443 | out_up_write: |
2390 | up_write(&root->fs_info->subvol_sem); | 2444 | up_write(&root->fs_info->subvol_sem); |
2391 | out_unlock: | 2445 | out_unlock: |
2446 | if (err) { | ||
2447 | spin_lock(&dest->root_item_lock); | ||
2448 | root_flags = btrfs_root_flags(&dest->root_item); | ||
2449 | btrfs_set_root_flags(&dest->root_item, | ||
2450 | root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); | ||
2451 | spin_unlock(&dest->root_item_lock); | ||
2452 | } | ||
2392 | mutex_unlock(&inode->i_mutex); | 2453 | mutex_unlock(&inode->i_mutex); |
2393 | if (!err) { | 2454 | if (!err) { |
2394 | shrink_dcache_sb(root->fs_info->sb); | 2455 | shrink_dcache_sb(root->fs_info->sb); |
2395 | btrfs_invalidate_inodes(dest); | 2456 | btrfs_invalidate_inodes(dest); |
2396 | d_delete(dentry); | 2457 | d_delete(dentry); |
2458 | ASSERT(dest->send_in_progress == 0); | ||
2397 | 2459 | ||
2398 | /* the last ref */ | 2460 | /* the last ref */ |
2399 | if (dest->cache_inode) { | 2461 | if (dest->cache_inode) { |
@@ -2557,9 +2619,6 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) | |||
2557 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 2619 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
2558 | int ret = 0; | 2620 | int ret = 0; |
2559 | 2621 | ||
2560 | if (!capable(CAP_SYS_ADMIN)) | ||
2561 | return -EPERM; | ||
2562 | |||
2563 | fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); | 2622 | fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); |
2564 | if (!fi_args) | 2623 | if (!fi_args) |
2565 | return -ENOMEM; | 2624 | return -ENOMEM; |
@@ -2574,6 +2633,10 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) | |||
2574 | } | 2633 | } |
2575 | mutex_unlock(&fs_devices->device_list_mutex); | 2634 | mutex_unlock(&fs_devices->device_list_mutex); |
2576 | 2635 | ||
2636 | fi_args->nodesize = root->fs_info->super_copy->nodesize; | ||
2637 | fi_args->sectorsize = root->fs_info->super_copy->sectorsize; | ||
2638 | fi_args->clone_alignment = root->fs_info->super_copy->sectorsize; | ||
2639 | |||
2577 | if (copy_to_user(arg, fi_args, sizeof(*fi_args))) | 2640 | if (copy_to_user(arg, fi_args, sizeof(*fi_args))) |
2578 | ret = -EFAULT; | 2641 | ret = -EFAULT; |
2579 | 2642 | ||
@@ -2589,9 +2652,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) | |||
2589 | int ret = 0; | 2652 | int ret = 0; |
2590 | char *s_uuid = NULL; | 2653 | char *s_uuid = NULL; |
2591 | 2654 | ||
2592 | if (!capable(CAP_SYS_ADMIN)) | ||
2593 | return -EPERM; | ||
2594 | |||
2595 | di_args = memdup_user(arg, sizeof(*di_args)); | 2655 | di_args = memdup_user(arg, sizeof(*di_args)); |
2596 | if (IS_ERR(di_args)) | 2656 | if (IS_ERR(di_args)) |
2597 | return PTR_ERR(di_args); | 2657 | return PTR_ERR(di_args); |
@@ -2669,10 +2729,15 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) | |||
2669 | lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); | 2729 | lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); |
2670 | ordered = btrfs_lookup_first_ordered_extent(inode, | 2730 | ordered = btrfs_lookup_first_ordered_extent(inode, |
2671 | off + len - 1); | 2731 | off + len - 1); |
2672 | if (!ordered && | 2732 | if ((!ordered || |
2733 | ordered->file_offset + ordered->len <= off || | ||
2734 | ordered->file_offset >= off + len) && | ||
2673 | !test_range_bit(&BTRFS_I(inode)->io_tree, off, | 2735 | !test_range_bit(&BTRFS_I(inode)->io_tree, off, |
2674 | off + len - 1, EXTENT_DELALLOC, 0, NULL)) | 2736 | off + len - 1, EXTENT_DELALLOC, 0, NULL)) { |
2737 | if (ordered) | ||
2738 | btrfs_put_ordered_extent(ordered); | ||
2675 | break; | 2739 | break; |
2740 | } | ||
2676 | unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); | 2741 | unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); |
2677 | if (ordered) | 2742 | if (ordered) |
2678 | btrfs_put_ordered_extent(ordered); | 2743 | btrfs_put_ordered_extent(ordered); |
@@ -2912,6 +2977,126 @@ out: | |||
2912 | return ret; | 2977 | return ret; |
2913 | } | 2978 | } |
2914 | 2979 | ||
2980 | /* Helper to check and see if this root currently has a ref on the given disk | ||
2981 | * bytenr. If it does then we need to update the quota for this root. This | ||
2982 | * doesn't do anything if quotas aren't enabled. | ||
2983 | */ | ||
2984 | static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
2985 | u64 disko) | ||
2986 | { | ||
2987 | struct seq_list tree_mod_seq_elem = {}; | ||
2988 | struct ulist *roots; | ||
2989 | struct ulist_iterator uiter; | ||
2990 | struct ulist_node *root_node = NULL; | ||
2991 | int ret; | ||
2992 | |||
2993 | if (!root->fs_info->quota_enabled) | ||
2994 | return 1; | ||
2995 | |||
2996 | btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem); | ||
2997 | ret = btrfs_find_all_roots(trans, root->fs_info, disko, | ||
2998 | tree_mod_seq_elem.seq, &roots); | ||
2999 | if (ret < 0) | ||
3000 | goto out; | ||
3001 | ret = 0; | ||
3002 | ULIST_ITER_INIT(&uiter); | ||
3003 | while ((root_node = ulist_next(roots, &uiter))) { | ||
3004 | if (root_node->val == root->objectid) { | ||
3005 | ret = 1; | ||
3006 | break; | ||
3007 | } | ||
3008 | } | ||
3009 | ulist_free(roots); | ||
3010 | out: | ||
3011 | btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem); | ||
3012 | return ret; | ||
3013 | } | ||
3014 | |||
3015 | static int clone_finish_inode_update(struct btrfs_trans_handle *trans, | ||
3016 | struct inode *inode, | ||
3017 | u64 endoff, | ||
3018 | const u64 destoff, | ||
3019 | const u64 olen) | ||
3020 | { | ||
3021 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3022 | int ret; | ||
3023 | |||
3024 | inode_inc_iversion(inode); | ||
3025 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
3026 | /* | ||
3027 | * We round up to the block size at eof when determining which | ||
3028 | * extents to clone above, but shouldn't round up the file size. | ||
3029 | */ | ||
3030 | if (endoff > destoff + olen) | ||
3031 | endoff = destoff + olen; | ||
3032 | if (endoff > inode->i_size) | ||
3033 | btrfs_i_size_write(inode, endoff); | ||
3034 | |||
3035 | ret = btrfs_update_inode(trans, root, inode); | ||
3036 | if (ret) { | ||
3037 | btrfs_abort_transaction(trans, root, ret); | ||
3038 | btrfs_end_transaction(trans, root); | ||
3039 | goto out; | ||
3040 | } | ||
3041 | ret = btrfs_end_transaction(trans, root); | ||
3042 | out: | ||
3043 | return ret; | ||
3044 | } | ||
3045 | |||
3046 | static void clone_update_extent_map(struct inode *inode, | ||
3047 | const struct btrfs_trans_handle *trans, | ||
3048 | const struct btrfs_path *path, | ||
3049 | struct btrfs_file_extent_item *fi, | ||
3050 | const u64 hole_offset, | ||
3051 | const u64 hole_len) | ||
3052 | { | ||
3053 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
3054 | struct extent_map *em; | ||
3055 | int ret; | ||
3056 | |||
3057 | em = alloc_extent_map(); | ||
3058 | if (!em) { | ||
3059 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
3060 | &BTRFS_I(inode)->runtime_flags); | ||
3061 | return; | ||
3062 | } | ||
3063 | |||
3064 | if (fi) { | ||
3065 | btrfs_extent_item_to_extent_map(inode, path, fi, false, em); | ||
3066 | em->generation = -1; | ||
3067 | if (btrfs_file_extent_type(path->nodes[0], fi) == | ||
3068 | BTRFS_FILE_EXTENT_INLINE) | ||
3069 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
3070 | &BTRFS_I(inode)->runtime_flags); | ||
3071 | } else { | ||
3072 | em->start = hole_offset; | ||
3073 | em->len = hole_len; | ||
3074 | em->ram_bytes = em->len; | ||
3075 | em->orig_start = hole_offset; | ||
3076 | em->block_start = EXTENT_MAP_HOLE; | ||
3077 | em->block_len = 0; | ||
3078 | em->orig_block_len = 0; | ||
3079 | em->compress_type = BTRFS_COMPRESS_NONE; | ||
3080 | em->generation = trans->transid; | ||
3081 | } | ||
3082 | |||
3083 | while (1) { | ||
3084 | write_lock(&em_tree->lock); | ||
3085 | ret = add_extent_mapping(em_tree, em, 1); | ||
3086 | write_unlock(&em_tree->lock); | ||
3087 | if (ret != -EEXIST) { | ||
3088 | free_extent_map(em); | ||
3089 | break; | ||
3090 | } | ||
3091 | btrfs_drop_extent_cache(inode, em->start, | ||
3092 | em->start + em->len - 1, 0); | ||
3093 | } | ||
3094 | |||
3095 | if (unlikely(ret)) | ||
3096 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
3097 | &BTRFS_I(inode)->runtime_flags); | ||
3098 | } | ||
3099 | |||
2915 | /** | 3100 | /** |
2916 | * btrfs_clone() - clone a range from inode file to another | 3101 | * btrfs_clone() - clone a range from inode file to another |
2917 | * | 3102 | * |
@@ -2924,7 +3109,8 @@ out: | |||
2924 | * @destoff: Offset within @inode to start clone | 3109 | * @destoff: Offset within @inode to start clone |
2925 | */ | 3110 | */ |
2926 | static int btrfs_clone(struct inode *src, struct inode *inode, | 3111 | static int btrfs_clone(struct inode *src, struct inode *inode, |
2927 | u64 off, u64 olen, u64 olen_aligned, u64 destoff) | 3112 | const u64 off, const u64 olen, const u64 olen_aligned, |
3113 | const u64 destoff) | ||
2928 | { | 3114 | { |
2929 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3115 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2930 | struct btrfs_path *path = NULL; | 3116 | struct btrfs_path *path = NULL; |
@@ -2935,7 +3121,10 @@ static int btrfs_clone(struct inode *src, struct inode *inode, | |||
2935 | u32 nritems; | 3121 | u32 nritems; |
2936 | int slot; | 3122 | int slot; |
2937 | int ret; | 3123 | int ret; |
2938 | u64 len = olen_aligned; | 3124 | int no_quota; |
3125 | const u64 len = olen_aligned; | ||
3126 | u64 last_disko = 0; | ||
3127 | u64 last_dest_end = destoff; | ||
2939 | 3128 | ||
2940 | ret = -ENOMEM; | 3129 | ret = -ENOMEM; |
2941 | buf = vmalloc(btrfs_level_size(root, 0)); | 3130 | buf = vmalloc(btrfs_level_size(root, 0)); |
@@ -2952,7 +3141,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, | |||
2952 | /* clone data */ | 3141 | /* clone data */ |
2953 | key.objectid = btrfs_ino(src); | 3142 | key.objectid = btrfs_ino(src); |
2954 | key.type = BTRFS_EXTENT_DATA_KEY; | 3143 | key.type = BTRFS_EXTENT_DATA_KEY; |
2955 | key.offset = 0; | 3144 | key.offset = off; |
2956 | 3145 | ||
2957 | while (1) { | 3146 | while (1) { |
2958 | /* | 3147 | /* |
@@ -2964,9 +3153,21 @@ static int btrfs_clone(struct inode *src, struct inode *inode, | |||
2964 | 0, 0); | 3153 | 0, 0); |
2965 | if (ret < 0) | 3154 | if (ret < 0) |
2966 | goto out; | 3155 | goto out; |
3156 | /* | ||
3157 | * First search, if no extent item that starts at offset off was | ||
3158 | * found but the previous item is an extent item, it's possible | ||
3159 | * it might overlap our target range, therefore process it. | ||
3160 | */ | ||
3161 | if (key.offset == off && ret > 0 && path->slots[0] > 0) { | ||
3162 | btrfs_item_key_to_cpu(path->nodes[0], &key, | ||
3163 | path->slots[0] - 1); | ||
3164 | if (key.type == BTRFS_EXTENT_DATA_KEY) | ||
3165 | path->slots[0]--; | ||
3166 | } | ||
2967 | 3167 | ||
2968 | nritems = btrfs_header_nritems(path->nodes[0]); | 3168 | nritems = btrfs_header_nritems(path->nodes[0]); |
2969 | process_slot: | 3169 | process_slot: |
3170 | no_quota = 1; | ||
2970 | if (path->slots[0] >= nritems) { | 3171 | if (path->slots[0] >= nritems) { |
2971 | ret = btrfs_next_leaf(BTRFS_I(src)->root, path); | 3172 | ret = btrfs_next_leaf(BTRFS_I(src)->root, path); |
2972 | if (ret < 0) | 3173 | if (ret < 0) |
@@ -2991,7 +3192,7 @@ process_slot: | |||
2991 | u64 disko = 0, diskl = 0; | 3192 | u64 disko = 0, diskl = 0; |
2992 | u64 datao = 0, datal = 0; | 3193 | u64 datao = 0, datal = 0; |
2993 | u8 comp; | 3194 | u8 comp; |
2994 | u64 endoff; | 3195 | u64 drop_start; |
2995 | 3196 | ||
2996 | extent = btrfs_item_ptr(leaf, slot, | 3197 | extent = btrfs_item_ptr(leaf, slot, |
2997 | struct btrfs_file_extent_item); | 3198 | struct btrfs_file_extent_item); |
@@ -3012,10 +3213,16 @@ process_slot: | |||
3012 | extent); | 3213 | extent); |
3013 | } | 3214 | } |
3014 | 3215 | ||
3015 | if (key.offset + datal <= off || | 3216 | /* |
3016 | key.offset >= off + len - 1) { | 3217 | * The first search might have left us at an extent |
3218 | * item that ends before our target range's start, can | ||
3219 | * happen if we have holes and NO_HOLES feature enabled. | ||
3220 | */ | ||
3221 | if (key.offset + datal <= off) { | ||
3017 | path->slots[0]++; | 3222 | path->slots[0]++; |
3018 | goto process_slot; | 3223 | goto process_slot; |
3224 | } else if (key.offset >= off + len) { | ||
3225 | break; | ||
3019 | } | 3226 | } |
3020 | 3227 | ||
3021 | size = btrfs_item_size_nr(leaf, slot); | 3228 | size = btrfs_item_size_nr(leaf, slot); |
@@ -3034,6 +3241,18 @@ process_slot: | |||
3034 | new_key.offset = destoff; | 3241 | new_key.offset = destoff; |
3035 | 3242 | ||
3036 | /* | 3243 | /* |
3244 | * Deal with a hole that doesn't have an extent item | ||
3245 | * that represents it (NO_HOLES feature enabled). | ||
3246 | * This hole is either in the middle of the cloning | ||
3247 | * range or at the beginning (fully overlaps it or | ||
3248 | * partially overlaps it). | ||
3249 | */ | ||
3250 | if (new_key.offset != last_dest_end) | ||
3251 | drop_start = last_dest_end; | ||
3252 | else | ||
3253 | drop_start = new_key.offset; | ||
3254 | |||
3255 | /* | ||
3037 | * 1 - adjusting old extent (we may have to split it) | 3256 | * 1 - adjusting old extent (we may have to split it) |
3038 | * 1 - add new extent | 3257 | * 1 - add new extent |
3039 | * 1 - inode update | 3258 | * 1 - inode update |
@@ -3051,18 +3270,18 @@ process_slot: | |||
3051 | * | ------------- extent ------------- | | 3270 | * | ------------- extent ------------- | |
3052 | */ | 3271 | */ |
3053 | 3272 | ||
3054 | /* substract range b */ | 3273 | /* subtract range b */ |
3055 | if (key.offset + datal > off + len) | 3274 | if (key.offset + datal > off + len) |
3056 | datal = off + len - key.offset; | 3275 | datal = off + len - key.offset; |
3057 | 3276 | ||
3058 | /* substract range a */ | 3277 | /* subtract range a */ |
3059 | if (off > key.offset) { | 3278 | if (off > key.offset) { |
3060 | datao += off - key.offset; | 3279 | datao += off - key.offset; |
3061 | datal -= off - key.offset; | 3280 | datal -= off - key.offset; |
3062 | } | 3281 | } |
3063 | 3282 | ||
3064 | ret = btrfs_drop_extents(trans, root, inode, | 3283 | ret = btrfs_drop_extents(trans, root, inode, |
3065 | new_key.offset, | 3284 | drop_start, |
3066 | new_key.offset + datal, | 3285 | new_key.offset + datal, |
3067 | 1); | 3286 | 1); |
3068 | if (ret) { | 3287 | if (ret) { |
@@ -3099,6 +3318,28 @@ process_slot: | |||
3099 | datao); | 3318 | datao); |
3100 | btrfs_set_file_extent_num_bytes(leaf, extent, | 3319 | btrfs_set_file_extent_num_bytes(leaf, extent, |
3101 | datal); | 3320 | datal); |
3321 | |||
3322 | /* | ||
3323 | * We need to look up the roots that point at | ||
3324 | * this bytenr and see if the new root does. If | ||
3325 | * it does not we need to make sure we update | ||
3326 | * quotas appropriately. | ||
3327 | */ | ||
3328 | if (disko && root != BTRFS_I(src)->root && | ||
3329 | disko != last_disko) { | ||
3330 | no_quota = check_ref(trans, root, | ||
3331 | disko); | ||
3332 | if (no_quota < 0) { | ||
3333 | btrfs_abort_transaction(trans, | ||
3334 | root, | ||
3335 | ret); | ||
3336 | btrfs_end_transaction(trans, | ||
3337 | root); | ||
3338 | ret = no_quota; | ||
3339 | goto out; | ||
3340 | } | ||
3341 | } | ||
3342 | |||
3102 | if (disko) { | 3343 | if (disko) { |
3103 | inode_add_bytes(inode, datal); | 3344 | inode_add_bytes(inode, datal); |
3104 | ret = btrfs_inc_extent_ref(trans, root, | 3345 | ret = btrfs_inc_extent_ref(trans, root, |
@@ -3106,7 +3347,7 @@ process_slot: | |||
3106 | root->root_key.objectid, | 3347 | root->root_key.objectid, |
3107 | btrfs_ino(inode), | 3348 | btrfs_ino(inode), |
3108 | new_key.offset - datao, | 3349 | new_key.offset - datao, |
3109 | 0); | 3350 | no_quota); |
3110 | if (ret) { | 3351 | if (ret) { |
3111 | btrfs_abort_transaction(trans, | 3352 | btrfs_abort_transaction(trans, |
3112 | root, | 3353 | root, |
@@ -3141,7 +3382,7 @@ process_slot: | |||
3141 | aligned_end = ALIGN(new_key.offset + datal, | 3382 | aligned_end = ALIGN(new_key.offset + datal, |
3142 | root->sectorsize); | 3383 | root->sectorsize); |
3143 | ret = btrfs_drop_extents(trans, root, inode, | 3384 | ret = btrfs_drop_extents(trans, root, inode, |
3144 | new_key.offset, | 3385 | drop_start, |
3145 | aligned_end, | 3386 | aligned_end, |
3146 | 1); | 3387 | 1); |
3147 | if (ret) { | 3388 | if (ret) { |
@@ -3174,40 +3415,69 @@ process_slot: | |||
3174 | btrfs_item_ptr_offset(leaf, slot), | 3415 | btrfs_item_ptr_offset(leaf, slot), |
3175 | size); | 3416 | size); |
3176 | inode_add_bytes(inode, datal); | 3417 | inode_add_bytes(inode, datal); |
3418 | extent = btrfs_item_ptr(leaf, slot, | ||
3419 | struct btrfs_file_extent_item); | ||
3177 | } | 3420 | } |
3178 | 3421 | ||
3422 | /* If we have an implicit hole (NO_HOLES feature). */ | ||
3423 | if (drop_start < new_key.offset) | ||
3424 | clone_update_extent_map(inode, trans, | ||
3425 | path, NULL, drop_start, | ||
3426 | new_key.offset - drop_start); | ||
3427 | |||
3428 | clone_update_extent_map(inode, trans, path, | ||
3429 | extent, 0, 0); | ||
3430 | |||
3179 | btrfs_mark_buffer_dirty(leaf); | 3431 | btrfs_mark_buffer_dirty(leaf); |
3180 | btrfs_release_path(path); | 3432 | btrfs_release_path(path); |
3181 | 3433 | ||
3182 | inode_inc_iversion(inode); | 3434 | last_dest_end = new_key.offset + datal; |
3183 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 3435 | ret = clone_finish_inode_update(trans, inode, |
3184 | 3436 | last_dest_end, | |
3185 | /* | 3437 | destoff, olen); |
3186 | * we round up to the block size at eof when | 3438 | if (ret) |
3187 | * determining which extents to clone above, | ||
3188 | * but shouldn't round up the file size | ||
3189 | */ | ||
3190 | endoff = new_key.offset + datal; | ||
3191 | if (endoff > destoff+olen) | ||
3192 | endoff = destoff+olen; | ||
3193 | if (endoff > inode->i_size) | ||
3194 | btrfs_i_size_write(inode, endoff); | ||
3195 | |||
3196 | ret = btrfs_update_inode(trans, root, inode); | ||
3197 | if (ret) { | ||
3198 | btrfs_abort_transaction(trans, root, ret); | ||
3199 | btrfs_end_transaction(trans, root); | ||
3200 | goto out; | 3439 | goto out; |
3201 | } | 3440 | if (new_key.offset + datal >= destoff + len) |
3202 | ret = btrfs_end_transaction(trans, root); | 3441 | break; |
3203 | } | 3442 | } |
3204 | btrfs_release_path(path); | 3443 | btrfs_release_path(path); |
3205 | key.offset++; | 3444 | key.offset++; |
3206 | } | 3445 | } |
3207 | ret = 0; | 3446 | ret = 0; |
3208 | 3447 | ||
3448 | if (last_dest_end < destoff + len) { | ||
3449 | /* | ||
3450 | * We have an implicit hole (NO_HOLES feature is enabled) that | ||
3451 | * fully or partially overlaps our cloning range at its end. | ||
3452 | */ | ||
3453 | btrfs_release_path(path); | ||
3454 | |||
3455 | /* | ||
3456 | * 1 - remove extent(s) | ||
3457 | * 1 - inode update | ||
3458 | */ | ||
3459 | trans = btrfs_start_transaction(root, 2); | ||
3460 | if (IS_ERR(trans)) { | ||
3461 | ret = PTR_ERR(trans); | ||
3462 | goto out; | ||
3463 | } | ||
3464 | ret = btrfs_drop_extents(trans, root, inode, | ||
3465 | last_dest_end, destoff + len, 1); | ||
3466 | if (ret) { | ||
3467 | if (ret != -EOPNOTSUPP) | ||
3468 | btrfs_abort_transaction(trans, root, ret); | ||
3469 | btrfs_end_transaction(trans, root); | ||
3470 | goto out; | ||
3471 | } | ||
3472 | ret = clone_finish_inode_update(trans, inode, destoff + len, | ||
3473 | destoff, olen); | ||
3474 | if (ret) | ||
3475 | goto out; | ||
3476 | clone_update_extent_map(inode, trans, path, NULL, last_dest_end, | ||
3477 | destoff + len - last_dest_end); | ||
3478 | } | ||
3479 | |||
3209 | out: | 3480 | out: |
3210 | btrfs_release_path(path); | ||
3211 | btrfs_free_path(path); | 3481 | btrfs_free_path(path); |
3212 | vfree(buf); | 3482 | vfree(buf); |
3213 | return ret; | 3483 | return ret; |
@@ -3319,15 +3589,41 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
3319 | goto out_unlock; | 3589 | goto out_unlock; |
3320 | } | 3590 | } |
3321 | 3591 | ||
3322 | /* truncate page cache pages from target inode range */ | 3592 | /* |
3323 | truncate_inode_pages_range(&inode->i_data, destoff, | 3593 | * Lock the target range too. Right after we replace the file extent |
3324 | PAGE_CACHE_ALIGN(destoff + len) - 1); | 3594 | * items in the fs tree (which now point to the cloned data), we might |
3595 | * have a worker replace them with extent items relative to a write | ||
3596 | * operation that was issued before this clone operation (i.e. confront | ||
3597 | * with inode.c:btrfs_finish_ordered_io). | ||
3598 | */ | ||
3599 | if (same_inode) { | ||
3600 | u64 lock_start = min_t(u64, off, destoff); | ||
3601 | u64 lock_len = max_t(u64, off, destoff) + len - lock_start; | ||
3325 | 3602 | ||
3326 | lock_extent_range(src, off, len); | 3603 | lock_extent_range(src, lock_start, lock_len); |
3604 | } else { | ||
3605 | lock_extent_range(src, off, len); | ||
3606 | lock_extent_range(inode, destoff, len); | ||
3607 | } | ||
3327 | 3608 | ||
3328 | ret = btrfs_clone(src, inode, off, olen, len, destoff); | 3609 | ret = btrfs_clone(src, inode, off, olen, len, destoff); |
3329 | 3610 | ||
3330 | unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); | 3611 | if (same_inode) { |
3612 | u64 lock_start = min_t(u64, off, destoff); | ||
3613 | u64 lock_end = max_t(u64, off, destoff) + len - 1; | ||
3614 | |||
3615 | unlock_extent(&BTRFS_I(src)->io_tree, lock_start, lock_end); | ||
3616 | } else { | ||
3617 | unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); | ||
3618 | unlock_extent(&BTRFS_I(inode)->io_tree, destoff, | ||
3619 | destoff + len - 1); | ||
3620 | } | ||
3621 | /* | ||
3622 | * Truncate page cache pages so that future reads will see the cloned | ||
3623 | * data immediately and not the previous data. | ||
3624 | */ | ||
3625 | truncate_inode_pages_range(&inode->i_data, destoff, | ||
3626 | PAGE_CACHE_ALIGN(destoff + len) - 1); | ||
3331 | out_unlock: | 3627 | out_unlock: |
3332 | if (!same_inode) { | 3628 | if (!same_inode) { |
3333 | if (inode < src) { | 3629 | if (inode < src) { |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index b47f669aca75..dfad8514f0da 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c | |||
@@ -143,7 +143,7 @@ static int lzo_compress_pages(struct list_head *ws, | |||
143 | if (ret != LZO_E_OK) { | 143 | if (ret != LZO_E_OK) { |
144 | printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n", | 144 | printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n", |
145 | ret); | 145 | ret); |
146 | ret = -1; | 146 | ret = -EIO; |
147 | goto out; | 147 | goto out; |
148 | } | 148 | } |
149 | 149 | ||
@@ -189,7 +189,7 @@ static int lzo_compress_pages(struct list_head *ws, | |||
189 | kunmap(out_page); | 189 | kunmap(out_page); |
190 | if (nr_pages == nr_dest_pages) { | 190 | if (nr_pages == nr_dest_pages) { |
191 | out_page = NULL; | 191 | out_page = NULL; |
192 | ret = -1; | 192 | ret = -E2BIG; |
193 | goto out; | 193 | goto out; |
194 | } | 194 | } |
195 | 195 | ||
@@ -208,7 +208,7 @@ static int lzo_compress_pages(struct list_head *ws, | |||
208 | 208 | ||
209 | /* we're making it bigger, give up */ | 209 | /* we're making it bigger, give up */ |
210 | if (tot_in > 8192 && tot_in < tot_out) { | 210 | if (tot_in > 8192 && tot_in < tot_out) { |
211 | ret = -1; | 211 | ret = -E2BIG; |
212 | goto out; | 212 | goto out; |
213 | } | 213 | } |
214 | 214 | ||
@@ -335,7 +335,7 @@ cont: | |||
335 | break; | 335 | break; |
336 | 336 | ||
337 | if (page_in_index + 1 >= total_pages_in) { | 337 | if (page_in_index + 1 >= total_pages_in) { |
338 | ret = -1; | 338 | ret = -EIO; |
339 | goto done; | 339 | goto done; |
340 | } | 340 | } |
341 | 341 | ||
@@ -358,7 +358,7 @@ cont: | |||
358 | kunmap(pages_in[page_in_index - 1]); | 358 | kunmap(pages_in[page_in_index - 1]); |
359 | if (ret != LZO_E_OK) { | 359 | if (ret != LZO_E_OK) { |
360 | printk(KERN_WARNING "BTRFS: decompress failed\n"); | 360 | printk(KERN_WARNING "BTRFS: decompress failed\n"); |
361 | ret = -1; | 361 | ret = -EIO; |
362 | break; | 362 | break; |
363 | } | 363 | } |
364 | 364 | ||
@@ -402,12 +402,12 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in, | |||
402 | ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); | 402 | ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); |
403 | if (ret != LZO_E_OK) { | 403 | if (ret != LZO_E_OK) { |
404 | printk(KERN_WARNING "BTRFS: decompress failed!\n"); | 404 | printk(KERN_WARNING "BTRFS: decompress failed!\n"); |
405 | ret = -1; | 405 | ret = -EIO; |
406 | goto out; | 406 | goto out; |
407 | } | 407 | } |
408 | 408 | ||
409 | if (out_len < start_byte) { | 409 | if (out_len < start_byte) { |
410 | ret = -1; | 410 | ret = -EIO; |
411 | goto out; | 411 | goto out; |
412 | } | 412 | } |
413 | 413 | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a94b05f72869..e12441c7cf1d 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -67,7 +67,7 @@ static void ordered_data_tree_panic(struct inode *inode, int errno, | |||
67 | { | 67 | { |
68 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 68 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
69 | btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset " | 69 | btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset " |
70 | "%llu\n", offset); | 70 | "%llu", offset); |
71 | } | 71 | } |
72 | 72 | ||
73 | /* | 73 | /* |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 2cf905877aaf..cf5aead95a7f 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include "ulist.h" | 32 | #include "ulist.h" |
33 | #include "backref.h" | 33 | #include "backref.h" |
34 | #include "extent_io.h" | 34 | #include "extent_io.h" |
35 | #include "qgroup.h" | ||
35 | 36 | ||
36 | /* TODO XXX FIXME | 37 | /* TODO XXX FIXME |
37 | * - subvol delete -> delete when ref goes to 0? delete limits also? | 38 | * - subvol delete -> delete when ref goes to 0? delete limits also? |
@@ -84,8 +85,8 @@ struct btrfs_qgroup { | |||
84 | /* | 85 | /* |
85 | * temp variables for accounting operations | 86 | * temp variables for accounting operations |
86 | */ | 87 | */ |
87 | u64 tag; | 88 | u64 old_refcnt; |
88 | u64 refcnt; | 89 | u64 new_refcnt; |
89 | }; | 90 | }; |
90 | 91 | ||
91 | /* | 92 | /* |
@@ -98,6 +99,9 @@ struct btrfs_qgroup_list { | |||
98 | struct btrfs_qgroup *member; | 99 | struct btrfs_qgroup *member; |
99 | }; | 100 | }; |
100 | 101 | ||
102 | #define ptr_to_u64(x) ((u64)(uintptr_t)x) | ||
103 | #define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x) | ||
104 | |||
101 | static int | 105 | static int |
102 | qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, | 106 | qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, |
103 | int init_flags); | 107 | int init_flags); |
@@ -242,6 +246,21 @@ static int del_relation_rb(struct btrfs_fs_info *fs_info, | |||
242 | return -ENOENT; | 246 | return -ENOENT; |
243 | } | 247 | } |
244 | 248 | ||
249 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
250 | int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
251 | u64 rfer, u64 excl) | ||
252 | { | ||
253 | struct btrfs_qgroup *qgroup; | ||
254 | |||
255 | qgroup = find_qgroup_rb(fs_info, qgroupid); | ||
256 | if (!qgroup) | ||
257 | return -EINVAL; | ||
258 | if (qgroup->rfer != rfer || qgroup->excl != excl) | ||
259 | return -EINVAL; | ||
260 | return 0; | ||
261 | } | ||
262 | #endif | ||
263 | |||
245 | /* | 264 | /* |
246 | * The full config is read in one go, only called from open_ctree() | 265 | * The full config is read in one go, only called from open_ctree() |
247 | * It doesn't use any locking, as at this point we're still single-threaded | 266 | * It doesn't use any locking, as at this point we're still single-threaded |
@@ -520,6 +539,10 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, | |||
520 | struct extent_buffer *leaf; | 539 | struct extent_buffer *leaf; |
521 | struct btrfs_key key; | 540 | struct btrfs_key key; |
522 | 541 | ||
542 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
543 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, "a_root->state))) | ||
544 | return 0; | ||
545 | #endif | ||
523 | path = btrfs_alloc_path(); | 546 | path = btrfs_alloc_path(); |
524 | if (!path) | 547 | if (!path) |
525 | return -ENOMEM; | 548 | return -ENOMEM; |
@@ -669,6 +692,10 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans, | |||
669 | int ret; | 692 | int ret; |
670 | int slot; | 693 | int slot; |
671 | 694 | ||
695 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
696 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
697 | return 0; | ||
698 | #endif | ||
672 | key.objectid = 0; | 699 | key.objectid = 0; |
673 | key.type = BTRFS_QGROUP_INFO_KEY; | 700 | key.type = BTRFS_QGROUP_INFO_KEY; |
674 | key.offset = qgroup->qgroupid; | 701 | key.offset = qgroup->qgroupid; |
@@ -1174,33 +1201,198 @@ out: | |||
1174 | mutex_unlock(&fs_info->qgroup_ioctl_lock); | 1201 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
1175 | return ret; | 1202 | return ret; |
1176 | } | 1203 | } |
1204 | static int comp_oper(struct btrfs_qgroup_operation *oper1, | ||
1205 | struct btrfs_qgroup_operation *oper2) | ||
1206 | { | ||
1207 | if (oper1->bytenr < oper2->bytenr) | ||
1208 | return -1; | ||
1209 | if (oper1->bytenr > oper2->bytenr) | ||
1210 | return 1; | ||
1211 | if (oper1->seq < oper2->seq) | ||
1212 | return -1; | ||
1213 | if (oper1->seq > oper2->seq) | ||
1214 | return -1; | ||
1215 | if (oper1->ref_root < oper2->ref_root) | ||
1216 | return -1; | ||
1217 | if (oper1->ref_root > oper2->ref_root) | ||
1218 | return 1; | ||
1219 | if (oper1->type < oper2->type) | ||
1220 | return -1; | ||
1221 | if (oper1->type > oper2->type) | ||
1222 | return 1; | ||
1223 | return 0; | ||
1224 | } | ||
1225 | |||
1226 | static int insert_qgroup_oper(struct btrfs_fs_info *fs_info, | ||
1227 | struct btrfs_qgroup_operation *oper) | ||
1228 | { | ||
1229 | struct rb_node **p; | ||
1230 | struct rb_node *parent = NULL; | ||
1231 | struct btrfs_qgroup_operation *cur; | ||
1232 | int cmp; | ||
1233 | |||
1234 | spin_lock(&fs_info->qgroup_op_lock); | ||
1235 | p = &fs_info->qgroup_op_tree.rb_node; | ||
1236 | while (*p) { | ||
1237 | parent = *p; | ||
1238 | cur = rb_entry(parent, struct btrfs_qgroup_operation, n); | ||
1239 | cmp = comp_oper(cur, oper); | ||
1240 | if (cmp < 0) { | ||
1241 | p = &(*p)->rb_right; | ||
1242 | } else if (cmp) { | ||
1243 | p = &(*p)->rb_left; | ||
1244 | } else { | ||
1245 | spin_unlock(&fs_info->qgroup_op_lock); | ||
1246 | return -EEXIST; | ||
1247 | } | ||
1248 | } | ||
1249 | rb_link_node(&oper->n, parent, p); | ||
1250 | rb_insert_color(&oper->n, &fs_info->qgroup_op_tree); | ||
1251 | spin_unlock(&fs_info->qgroup_op_lock); | ||
1252 | return 0; | ||
1253 | } | ||
1177 | 1254 | ||
1178 | /* | 1255 | /* |
1179 | * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts | 1256 | * Record a quota operation for processing later on. |
1180 | * the modification into a list that's later used by btrfs_end_transaction to | 1257 | * @trans: the transaction we are adding the delayed op to. |
1181 | * pass the recorded modifications on to btrfs_qgroup_account_ref. | 1258 | * @fs_info: the fs_info for this fs. |
1259 | * @ref_root: the root of the reference we are acting on, | ||
1260 | * @bytenr: the bytenr we are acting on. | ||
1261 | * @num_bytes: the number of bytes in the reference. | ||
1262 | * @type: the type of operation this is. | ||
1263 | * @mod_seq: do we need to get a sequence number for looking up roots. | ||
1264 | * | ||
1265 | * We just add it to our trans qgroup_ref_list and carry on and process these | ||
1266 | * operations in order at some later point. If the reference root isn't a fs | ||
1267 | * root then we don't bother with doing anything. | ||
1268 | * | ||
1269 | * MUST BE HOLDING THE REF LOCK. | ||
1182 | */ | 1270 | */ |
1183 | int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, | 1271 | int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, |
1184 | struct btrfs_delayed_ref_node *node, | 1272 | struct btrfs_fs_info *fs_info, u64 ref_root, |
1185 | struct btrfs_delayed_extent_op *extent_op) | 1273 | u64 bytenr, u64 num_bytes, |
1274 | enum btrfs_qgroup_operation_type type, int mod_seq) | ||
1186 | { | 1275 | { |
1187 | struct qgroup_update *u; | 1276 | struct btrfs_qgroup_operation *oper; |
1277 | int ret; | ||
1278 | |||
1279 | if (!is_fstree(ref_root) || !fs_info->quota_enabled) | ||
1280 | return 0; | ||
1188 | 1281 | ||
1189 | BUG_ON(!trans->delayed_ref_elem.seq); | 1282 | oper = kmalloc(sizeof(*oper), GFP_NOFS); |
1190 | u = kmalloc(sizeof(*u), GFP_NOFS); | 1283 | if (!oper) |
1191 | if (!u) | ||
1192 | return -ENOMEM; | 1284 | return -ENOMEM; |
1193 | 1285 | ||
1194 | u->node = node; | 1286 | oper->ref_root = ref_root; |
1195 | u->extent_op = extent_op; | 1287 | oper->bytenr = bytenr; |
1196 | list_add_tail(&u->list, &trans->qgroup_ref_list); | 1288 | oper->num_bytes = num_bytes; |
1289 | oper->type = type; | ||
1290 | oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); | ||
1291 | INIT_LIST_HEAD(&oper->elem.list); | ||
1292 | oper->elem.seq = 0; | ||
1293 | ret = insert_qgroup_oper(fs_info, oper); | ||
1294 | if (ret) { | ||
1295 | /* Shouldn't happen so have an assert for developers */ | ||
1296 | ASSERT(0); | ||
1297 | kfree(oper); | ||
1298 | return ret; | ||
1299 | } | ||
1300 | list_add_tail(&oper->list, &trans->qgroup_ref_list); | ||
1301 | |||
1302 | if (mod_seq) | ||
1303 | btrfs_get_tree_mod_seq(fs_info, &oper->elem); | ||
1197 | 1304 | ||
1198 | return 0; | 1305 | return 0; |
1199 | } | 1306 | } |
1200 | 1307 | ||
1201 | static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info, | 1308 | /* |
1202 | struct ulist *roots, struct ulist *tmp, | 1309 | * The easy accounting, if we are adding/removing the only ref for an extent |
1203 | u64 seq) | 1310 | * then this qgroup and all of the parent qgroups get their refrence and |
1311 | * exclusive counts adjusted. | ||
1312 | */ | ||
1313 | static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, | ||
1314 | struct btrfs_qgroup_operation *oper) | ||
1315 | { | ||
1316 | struct btrfs_qgroup *qgroup; | ||
1317 | struct ulist *tmp; | ||
1318 | struct btrfs_qgroup_list *glist; | ||
1319 | struct ulist_node *unode; | ||
1320 | struct ulist_iterator uiter; | ||
1321 | int sign = 0; | ||
1322 | int ret = 0; | ||
1323 | |||
1324 | tmp = ulist_alloc(GFP_NOFS); | ||
1325 | if (!tmp) | ||
1326 | return -ENOMEM; | ||
1327 | |||
1328 | spin_lock(&fs_info->qgroup_lock); | ||
1329 | if (!fs_info->quota_root) | ||
1330 | goto out; | ||
1331 | qgroup = find_qgroup_rb(fs_info, oper->ref_root); | ||
1332 | if (!qgroup) | ||
1333 | goto out; | ||
1334 | switch (oper->type) { | ||
1335 | case BTRFS_QGROUP_OPER_ADD_EXCL: | ||
1336 | sign = 1; | ||
1337 | break; | ||
1338 | case BTRFS_QGROUP_OPER_SUB_EXCL: | ||
1339 | sign = -1; | ||
1340 | break; | ||
1341 | default: | ||
1342 | ASSERT(0); | ||
1343 | } | ||
1344 | qgroup->rfer += sign * oper->num_bytes; | ||
1345 | qgroup->rfer_cmpr += sign * oper->num_bytes; | ||
1346 | |||
1347 | WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); | ||
1348 | qgroup->excl += sign * oper->num_bytes; | ||
1349 | qgroup->excl_cmpr += sign * oper->num_bytes; | ||
1350 | |||
1351 | qgroup_dirty(fs_info, qgroup); | ||
1352 | |||
1353 | /* Get all of the parent groups that contain this qgroup */ | ||
1354 | list_for_each_entry(glist, &qgroup->groups, next_group) { | ||
1355 | ret = ulist_add(tmp, glist->group->qgroupid, | ||
1356 | ptr_to_u64(glist->group), GFP_ATOMIC); | ||
1357 | if (ret < 0) | ||
1358 | goto out; | ||
1359 | } | ||
1360 | |||
1361 | /* Iterate all of the parents and adjust their reference counts */ | ||
1362 | ULIST_ITER_INIT(&uiter); | ||
1363 | while ((unode = ulist_next(tmp, &uiter))) { | ||
1364 | qgroup = u64_to_ptr(unode->aux); | ||
1365 | qgroup->rfer += sign * oper->num_bytes; | ||
1366 | qgroup->rfer_cmpr += sign * oper->num_bytes; | ||
1367 | qgroup->excl += sign * oper->num_bytes; | ||
1368 | if (sign < 0) | ||
1369 | WARN_ON(qgroup->excl < oper->num_bytes); | ||
1370 | qgroup->excl_cmpr += sign * oper->num_bytes; | ||
1371 | qgroup_dirty(fs_info, qgroup); | ||
1372 | |||
1373 | /* Add any parents of the parents */ | ||
1374 | list_for_each_entry(glist, &qgroup->groups, next_group) { | ||
1375 | ret = ulist_add(tmp, glist->group->qgroupid, | ||
1376 | ptr_to_u64(glist->group), GFP_ATOMIC); | ||
1377 | if (ret < 0) | ||
1378 | goto out; | ||
1379 | } | ||
1380 | } | ||
1381 | ret = 0; | ||
1382 | out: | ||
1383 | spin_unlock(&fs_info->qgroup_lock); | ||
1384 | ulist_free(tmp); | ||
1385 | return ret; | ||
1386 | } | ||
1387 | |||
1388 | /* | ||
1389 | * Walk all of the roots that pointed to our bytenr and adjust their refcnts as | ||
1390 | * properly. | ||
1391 | */ | ||
1392 | static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info, | ||
1393 | u64 root_to_skip, struct ulist *tmp, | ||
1394 | struct ulist *roots, struct ulist *qgroups, | ||
1395 | u64 seq, int *old_roots, int rescan) | ||
1204 | { | 1396 | { |
1205 | struct ulist_node *unode; | 1397 | struct ulist_node *unode; |
1206 | struct ulist_iterator uiter; | 1398 | struct ulist_iterator uiter; |
@@ -1211,256 +1403,549 @@ static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info, | |||
1211 | 1403 | ||
1212 | ULIST_ITER_INIT(&uiter); | 1404 | ULIST_ITER_INIT(&uiter); |
1213 | while ((unode = ulist_next(roots, &uiter))) { | 1405 | while ((unode = ulist_next(roots, &uiter))) { |
1406 | /* We don't count our current root here */ | ||
1407 | if (unode->val == root_to_skip) | ||
1408 | continue; | ||
1214 | qg = find_qgroup_rb(fs_info, unode->val); | 1409 | qg = find_qgroup_rb(fs_info, unode->val); |
1215 | if (!qg) | 1410 | if (!qg) |
1216 | continue; | 1411 | continue; |
1412 | /* | ||
1413 | * We could have a pending removal of this same ref so we may | ||
1414 | * not have actually found our ref root when doing | ||
1415 | * btrfs_find_all_roots, so we need to keep track of how many | ||
1416 | * old roots we find in case we removed ours and added a | ||
1417 | * different one at the same time. I don't think this could | ||
1418 | * happen in practice but that sort of thinking leads to pain | ||
1419 | * and suffering and to the dark side. | ||
1420 | */ | ||
1421 | (*old_roots)++; | ||
1217 | 1422 | ||
1218 | ulist_reinit(tmp); | 1423 | ulist_reinit(tmp); |
1219 | /* XXX id not needed */ | 1424 | ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), |
1220 | ret = ulist_add(tmp, qg->qgroupid, | 1425 | GFP_ATOMIC); |
1221 | (u64)(uintptr_t)qg, GFP_ATOMIC); | 1426 | if (ret < 0) |
1427 | return ret; | ||
1428 | ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC); | ||
1222 | if (ret < 0) | 1429 | if (ret < 0) |
1223 | return ret; | 1430 | return ret; |
1224 | ULIST_ITER_INIT(&tmp_uiter); | 1431 | ULIST_ITER_INIT(&tmp_uiter); |
1225 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { | 1432 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { |
1226 | struct btrfs_qgroup_list *glist; | 1433 | struct btrfs_qgroup_list *glist; |
1227 | 1434 | ||
1228 | qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; | 1435 | qg = u64_to_ptr(tmp_unode->aux); |
1229 | if (qg->refcnt < seq) | 1436 | /* |
1230 | qg->refcnt = seq + 1; | 1437 | * We use this sequence number to keep from having to |
1438 | * run the whole list and 0 out the refcnt every time. | ||
1439 | * We basically use sequnce as the known 0 count and | ||
1440 | * then add 1 everytime we see a qgroup. This is how we | ||
1441 | * get how many of the roots actually point up to the | ||
1442 | * upper level qgroups in order to determine exclusive | ||
1443 | * counts. | ||
1444 | * | ||
1445 | * For rescan we want to set old_refcnt to seq so our | ||
1446 | * exclusive calculations end up correct. | ||
1447 | */ | ||
1448 | if (rescan) | ||
1449 | qg->old_refcnt = seq; | ||
1450 | else if (qg->old_refcnt < seq) | ||
1451 | qg->old_refcnt = seq + 1; | ||
1231 | else | 1452 | else |
1232 | ++qg->refcnt; | 1453 | qg->old_refcnt++; |
1233 | 1454 | ||
1455 | if (qg->new_refcnt < seq) | ||
1456 | qg->new_refcnt = seq + 1; | ||
1457 | else | ||
1458 | qg->new_refcnt++; | ||
1234 | list_for_each_entry(glist, &qg->groups, next_group) { | 1459 | list_for_each_entry(glist, &qg->groups, next_group) { |
1460 | ret = ulist_add(qgroups, glist->group->qgroupid, | ||
1461 | ptr_to_u64(glist->group), | ||
1462 | GFP_ATOMIC); | ||
1463 | if (ret < 0) | ||
1464 | return ret; | ||
1235 | ret = ulist_add(tmp, glist->group->qgroupid, | 1465 | ret = ulist_add(tmp, glist->group->qgroupid, |
1236 | (u64)(uintptr_t)glist->group, | 1466 | ptr_to_u64(glist->group), |
1237 | GFP_ATOMIC); | 1467 | GFP_ATOMIC); |
1238 | if (ret < 0) | 1468 | if (ret < 0) |
1239 | return ret; | 1469 | return ret; |
1240 | } | 1470 | } |
1241 | } | 1471 | } |
1242 | } | 1472 | } |
1473 | return 0; | ||
1474 | } | ||
1475 | |||
1476 | /* | ||
1477 | * We need to walk forward in our operation tree and account for any roots that | ||
1478 | * were deleted after we made this operation. | ||
1479 | */ | ||
1480 | static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info, | ||
1481 | struct btrfs_qgroup_operation *oper, | ||
1482 | struct ulist *tmp, | ||
1483 | struct ulist *qgroups, u64 seq, | ||
1484 | int *old_roots) | ||
1485 | { | ||
1486 | struct ulist_node *unode; | ||
1487 | struct ulist_iterator uiter; | ||
1488 | struct btrfs_qgroup *qg; | ||
1489 | struct btrfs_qgroup_operation *tmp_oper; | ||
1490 | struct rb_node *n; | ||
1491 | int ret; | ||
1492 | |||
1493 | ulist_reinit(tmp); | ||
1243 | 1494 | ||
1495 | /* | ||
1496 | * We only walk forward in the tree since we're only interested in | ||
1497 | * removals that happened _after_ our operation. | ||
1498 | */ | ||
1499 | spin_lock(&fs_info->qgroup_op_lock); | ||
1500 | n = rb_next(&oper->n); | ||
1501 | spin_unlock(&fs_info->qgroup_op_lock); | ||
1502 | if (!n) | ||
1503 | return 0; | ||
1504 | tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); | ||
1505 | while (tmp_oper->bytenr == oper->bytenr) { | ||
1506 | /* | ||
1507 | * If it's not a removal we don't care, additions work out | ||
1508 | * properly with our refcnt tracking. | ||
1509 | */ | ||
1510 | if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED && | ||
1511 | tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL) | ||
1512 | goto next; | ||
1513 | qg = find_qgroup_rb(fs_info, tmp_oper->ref_root); | ||
1514 | if (!qg) | ||
1515 | goto next; | ||
1516 | ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), | ||
1517 | GFP_ATOMIC); | ||
1518 | if (ret) { | ||
1519 | if (ret < 0) | ||
1520 | return ret; | ||
1521 | /* | ||
1522 | * We only want to increase old_roots if this qgroup is | ||
1523 | * not already in the list of qgroups. If it is already | ||
1524 | * there then that means it must have been re-added or | ||
1525 | * the delete will be discarded because we had an | ||
1526 | * existing ref that we haven't looked up yet. In this | ||
1527 | * case we don't want to increase old_roots. So if ret | ||
1528 | * == 1 then we know that this is the first time we've | ||
1529 | * seen this qgroup and we can bump the old_roots. | ||
1530 | */ | ||
1531 | (*old_roots)++; | ||
1532 | ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), | ||
1533 | GFP_ATOMIC); | ||
1534 | if (ret < 0) | ||
1535 | return ret; | ||
1536 | } | ||
1537 | next: | ||
1538 | spin_lock(&fs_info->qgroup_op_lock); | ||
1539 | n = rb_next(&tmp_oper->n); | ||
1540 | spin_unlock(&fs_info->qgroup_op_lock); | ||
1541 | if (!n) | ||
1542 | break; | ||
1543 | tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); | ||
1544 | } | ||
1545 | |||
1546 | /* Ok now process the qgroups we found */ | ||
1547 | ULIST_ITER_INIT(&uiter); | ||
1548 | while ((unode = ulist_next(tmp, &uiter))) { | ||
1549 | struct btrfs_qgroup_list *glist; | ||
1550 | |||
1551 | qg = u64_to_ptr(unode->aux); | ||
1552 | if (qg->old_refcnt < seq) | ||
1553 | qg->old_refcnt = seq + 1; | ||
1554 | else | ||
1555 | qg->old_refcnt++; | ||
1556 | if (qg->new_refcnt < seq) | ||
1557 | qg->new_refcnt = seq + 1; | ||
1558 | else | ||
1559 | qg->new_refcnt++; | ||
1560 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
1561 | ret = ulist_add(qgroups, glist->group->qgroupid, | ||
1562 | ptr_to_u64(glist->group), GFP_ATOMIC); | ||
1563 | if (ret < 0) | ||
1564 | return ret; | ||
1565 | ret = ulist_add(tmp, glist->group->qgroupid, | ||
1566 | ptr_to_u64(glist->group), GFP_ATOMIC); | ||
1567 | if (ret < 0) | ||
1568 | return ret; | ||
1569 | } | ||
1570 | } | ||
1244 | return 0; | 1571 | return 0; |
1245 | } | 1572 | } |
1246 | 1573 | ||
1247 | static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info, | 1574 | /* Add refcnt for the newly added reference. */ |
1248 | struct ulist *roots, struct ulist *tmp, | 1575 | static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info, |
1249 | u64 seq, int sgn, u64 num_bytes, | 1576 | struct btrfs_qgroup_operation *oper, |
1250 | struct btrfs_qgroup *qgroup) | 1577 | struct btrfs_qgroup *qgroup, |
1578 | struct ulist *tmp, struct ulist *qgroups, | ||
1579 | u64 seq) | ||
1251 | { | 1580 | { |
1252 | struct ulist_node *unode; | 1581 | struct ulist_node *unode; |
1253 | struct ulist_iterator uiter; | 1582 | struct ulist_iterator uiter; |
1254 | struct btrfs_qgroup *qg; | 1583 | struct btrfs_qgroup *qg; |
1255 | struct btrfs_qgroup_list *glist; | ||
1256 | int ret; | 1584 | int ret; |
1257 | 1585 | ||
1258 | ulist_reinit(tmp); | 1586 | ulist_reinit(tmp); |
1259 | ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); | 1587 | ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup), |
1588 | GFP_ATOMIC); | ||
1589 | if (ret < 0) | ||
1590 | return ret; | ||
1591 | ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup), | ||
1592 | GFP_ATOMIC); | ||
1260 | if (ret < 0) | 1593 | if (ret < 0) |
1261 | return ret; | 1594 | return ret; |
1262 | |||
1263 | ULIST_ITER_INIT(&uiter); | 1595 | ULIST_ITER_INIT(&uiter); |
1264 | while ((unode = ulist_next(tmp, &uiter))) { | 1596 | while ((unode = ulist_next(tmp, &uiter))) { |
1265 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; | 1597 | struct btrfs_qgroup_list *glist; |
1266 | if (qg->refcnt < seq) { | ||
1267 | /* not visited by step 1 */ | ||
1268 | qg->rfer += sgn * num_bytes; | ||
1269 | qg->rfer_cmpr += sgn * num_bytes; | ||
1270 | if (roots->nnodes == 0) { | ||
1271 | qg->excl += sgn * num_bytes; | ||
1272 | qg->excl_cmpr += sgn * num_bytes; | ||
1273 | } | ||
1274 | qgroup_dirty(fs_info, qg); | ||
1275 | } | ||
1276 | WARN_ON(qg->tag >= seq); | ||
1277 | qg->tag = seq; | ||
1278 | 1598 | ||
1599 | qg = u64_to_ptr(unode->aux); | ||
1600 | if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { | ||
1601 | if (qg->new_refcnt < seq) | ||
1602 | qg->new_refcnt = seq + 1; | ||
1603 | else | ||
1604 | qg->new_refcnt++; | ||
1605 | } else { | ||
1606 | if (qg->old_refcnt < seq) | ||
1607 | qg->old_refcnt = seq + 1; | ||
1608 | else | ||
1609 | qg->old_refcnt++; | ||
1610 | } | ||
1279 | list_for_each_entry(glist, &qg->groups, next_group) { | 1611 | list_for_each_entry(glist, &qg->groups, next_group) { |
1280 | ret = ulist_add(tmp, glist->group->qgroupid, | 1612 | ret = ulist_add(tmp, glist->group->qgroupid, |
1281 | (uintptr_t)glist->group, GFP_ATOMIC); | 1613 | ptr_to_u64(glist->group), GFP_ATOMIC); |
1614 | if (ret < 0) | ||
1615 | return ret; | ||
1616 | ret = ulist_add(qgroups, glist->group->qgroupid, | ||
1617 | ptr_to_u64(glist->group), GFP_ATOMIC); | ||
1282 | if (ret < 0) | 1618 | if (ret < 0) |
1283 | return ret; | 1619 | return ret; |
1284 | } | 1620 | } |
1285 | } | 1621 | } |
1286 | |||
1287 | return 0; | 1622 | return 0; |
1288 | } | 1623 | } |
1289 | 1624 | ||
1290 | static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info, | 1625 | /* |
1291 | struct ulist *roots, struct ulist *tmp, | 1626 | * This adjusts the counters for all referenced qgroups if need be. |
1292 | u64 seq, int sgn, u64 num_bytes) | 1627 | */ |
1628 | static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info, | ||
1629 | u64 root_to_skip, u64 num_bytes, | ||
1630 | struct ulist *qgroups, u64 seq, | ||
1631 | int old_roots, int new_roots, int rescan) | ||
1293 | { | 1632 | { |
1294 | struct ulist_node *unode; | 1633 | struct ulist_node *unode; |
1295 | struct ulist_iterator uiter; | 1634 | struct ulist_iterator uiter; |
1296 | struct btrfs_qgroup *qg; | 1635 | struct btrfs_qgroup *qg; |
1297 | struct ulist_node *tmp_unode; | 1636 | u64 cur_new_count, cur_old_count; |
1298 | struct ulist_iterator tmp_uiter; | ||
1299 | int ret; | ||
1300 | 1637 | ||
1301 | ULIST_ITER_INIT(&uiter); | 1638 | ULIST_ITER_INIT(&uiter); |
1302 | while ((unode = ulist_next(roots, &uiter))) { | 1639 | while ((unode = ulist_next(qgroups, &uiter))) { |
1303 | qg = find_qgroup_rb(fs_info, unode->val); | 1640 | bool dirty = false; |
1304 | if (!qg) | ||
1305 | continue; | ||
1306 | 1641 | ||
1307 | ulist_reinit(tmp); | 1642 | qg = u64_to_ptr(unode->aux); |
1308 | ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC); | 1643 | /* |
1309 | if (ret < 0) | 1644 | * Wasn't referenced before but is now, add to the reference |
1310 | return ret; | 1645 | * counters. |
1646 | */ | ||
1647 | if (qg->old_refcnt <= seq && qg->new_refcnt > seq) { | ||
1648 | qg->rfer += num_bytes; | ||
1649 | qg->rfer_cmpr += num_bytes; | ||
1650 | dirty = true; | ||
1651 | } | ||
1311 | 1652 | ||
1312 | ULIST_ITER_INIT(&tmp_uiter); | 1653 | /* |
1313 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { | 1654 | * Was referenced before but isn't now, subtract from the |
1314 | struct btrfs_qgroup_list *glist; | 1655 | * reference counters. |
1656 | */ | ||
1657 | if (qg->old_refcnt > seq && qg->new_refcnt <= seq) { | ||
1658 | qg->rfer -= num_bytes; | ||
1659 | qg->rfer_cmpr -= num_bytes; | ||
1660 | dirty = true; | ||
1661 | } | ||
1315 | 1662 | ||
1316 | qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; | 1663 | if (qg->old_refcnt < seq) |
1317 | if (qg->tag == seq) | 1664 | cur_old_count = 0; |
1318 | continue; | 1665 | else |
1666 | cur_old_count = qg->old_refcnt - seq; | ||
1667 | if (qg->new_refcnt < seq) | ||
1668 | cur_new_count = 0; | ||
1669 | else | ||
1670 | cur_new_count = qg->new_refcnt - seq; | ||
1319 | 1671 | ||
1320 | if (qg->refcnt - seq == roots->nnodes) { | 1672 | /* |
1321 | qg->excl -= sgn * num_bytes; | 1673 | * If our refcount was the same as the roots previously but our |
1322 | qg->excl_cmpr -= sgn * num_bytes; | 1674 | * new count isn't the same as the number of roots now then we |
1323 | qgroup_dirty(fs_info, qg); | 1675 | * went from having a exclusive reference on this range to not. |
1324 | } | 1676 | */ |
1677 | if (old_roots && cur_old_count == old_roots && | ||
1678 | (cur_new_count != new_roots || new_roots == 0)) { | ||
1679 | WARN_ON(cur_new_count != new_roots && new_roots == 0); | ||
1680 | qg->excl -= num_bytes; | ||
1681 | qg->excl_cmpr -= num_bytes; | ||
1682 | dirty = true; | ||
1683 | } | ||
1325 | 1684 | ||
1326 | list_for_each_entry(glist, &qg->groups, next_group) { | 1685 | /* |
1327 | ret = ulist_add(tmp, glist->group->qgroupid, | 1686 | * If we didn't reference all the roots before but now we do we |
1328 | (uintptr_t)glist->group, | 1687 | * have an exclusive reference to this range. |
1329 | GFP_ATOMIC); | 1688 | */ |
1330 | if (ret < 0) | 1689 | if ((!old_roots || (old_roots && cur_old_count != old_roots)) |
1331 | return ret; | 1690 | && cur_new_count == new_roots) { |
1332 | } | 1691 | qg->excl += num_bytes; |
1692 | qg->excl_cmpr += num_bytes; | ||
1693 | dirty = true; | ||
1333 | } | 1694 | } |
1334 | } | ||
1335 | 1695 | ||
1696 | if (dirty) | ||
1697 | qgroup_dirty(fs_info, qg); | ||
1698 | } | ||
1336 | return 0; | 1699 | return 0; |
1337 | } | 1700 | } |
1338 | 1701 | ||
1339 | /* | 1702 | /* |
1340 | * btrfs_qgroup_account_ref is called for every ref that is added to or deleted | 1703 | * If we removed a data extent and there were other references for that bytenr |
1341 | * from the fs. First, all roots referencing the extent are searched, and | 1704 | * then we need to lookup all referenced roots to make sure we still don't |
1342 | * then the space is accounted accordingly to the different roots. The | 1705 | * reference this bytenr. If we do then we can just discard this operation. |
1343 | * accounting algorithm works in 3 steps documented inline. | ||
1344 | */ | 1706 | */ |
1345 | int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | 1707 | static int check_existing_refs(struct btrfs_trans_handle *trans, |
1346 | struct btrfs_fs_info *fs_info, | 1708 | struct btrfs_fs_info *fs_info, |
1347 | struct btrfs_delayed_ref_node *node, | 1709 | struct btrfs_qgroup_operation *oper) |
1348 | struct btrfs_delayed_extent_op *extent_op) | ||
1349 | { | 1710 | { |
1350 | struct btrfs_root *quota_root; | ||
1351 | u64 ref_root; | ||
1352 | struct btrfs_qgroup *qgroup; | ||
1353 | struct ulist *roots = NULL; | 1711 | struct ulist *roots = NULL; |
1354 | u64 seq; | 1712 | struct ulist_node *unode; |
1713 | struct ulist_iterator uiter; | ||
1355 | int ret = 0; | 1714 | int ret = 0; |
1356 | int sgn; | ||
1357 | 1715 | ||
1358 | if (!fs_info->quota_enabled) | 1716 | ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, |
1359 | return 0; | 1717 | oper->elem.seq, &roots); |
1360 | 1718 | if (ret < 0) | |
1361 | BUG_ON(!fs_info->quota_root); | 1719 | return ret; |
1720 | ret = 0; | ||
1362 | 1721 | ||
1363 | if (node->type == BTRFS_TREE_BLOCK_REF_KEY || | 1722 | ULIST_ITER_INIT(&uiter); |
1364 | node->type == BTRFS_SHARED_BLOCK_REF_KEY) { | 1723 | while ((unode = ulist_next(roots, &uiter))) { |
1365 | struct btrfs_delayed_tree_ref *ref; | 1724 | if (unode->val == oper->ref_root) { |
1366 | ref = btrfs_delayed_node_to_tree_ref(node); | 1725 | ret = 1; |
1367 | ref_root = ref->root; | 1726 | break; |
1368 | } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || | 1727 | } |
1369 | node->type == BTRFS_SHARED_DATA_REF_KEY) { | ||
1370 | struct btrfs_delayed_data_ref *ref; | ||
1371 | ref = btrfs_delayed_node_to_data_ref(node); | ||
1372 | ref_root = ref->root; | ||
1373 | } else { | ||
1374 | BUG(); | ||
1375 | } | 1728 | } |
1729 | ulist_free(roots); | ||
1730 | btrfs_put_tree_mod_seq(fs_info, &oper->elem); | ||
1376 | 1731 | ||
1377 | if (!is_fstree(ref_root)) { | 1732 | return ret; |
1378 | /* | 1733 | } |
1379 | * non-fs-trees are not being accounted | ||
1380 | */ | ||
1381 | return 0; | ||
1382 | } | ||
1383 | 1734 | ||
1384 | switch (node->action) { | 1735 | /* |
1385 | case BTRFS_ADD_DELAYED_REF: | 1736 | * If we share a reference across multiple roots then we may need to adjust |
1386 | case BTRFS_ADD_DELAYED_EXTENT: | 1737 | * various qgroups referenced and exclusive counters. The basic premise is this |
1387 | sgn = 1; | 1738 | * |
1388 | seq = btrfs_tree_mod_seq_prev(node->seq); | 1739 | * 1) We have seq to represent a 0 count. Instead of looping through all of the |
1389 | break; | 1740 | * qgroups and resetting their refcount to 0 we just constantly bump this |
1390 | case BTRFS_DROP_DELAYED_REF: | 1741 | * sequence number to act as the base reference count. This means that if |
1391 | sgn = -1; | 1742 | * anybody is equal to or below this sequence they were never referenced. We |
1392 | seq = node->seq; | 1743 | * jack this sequence up by the number of roots we found each time in order to |
1393 | break; | 1744 | * make sure we don't have any overlap. |
1394 | case BTRFS_UPDATE_DELAYED_HEAD: | 1745 | * |
1395 | return 0; | 1746 | * 2) We first search all the roots that reference the area _except_ the root |
1396 | default: | 1747 | * we're acting on currently. This makes up the old_refcnt of all the qgroups |
1397 | BUG(); | 1748 | * before. |
1398 | } | 1749 | * |
1750 | * 3) We walk all of the qgroups referenced by the root we are currently acting | ||
1751 | * on, and will either adjust old_refcnt in the case of a removal or the | ||
1752 | * new_refcnt in the case of an addition. | ||
1753 | * | ||
1754 | * 4) Finally we walk all the qgroups that are referenced by this range | ||
1755 | * including the root we are acting on currently. We will adjust the counters | ||
1756 | * based on the number of roots we had and will have after this operation. | ||
1757 | * | ||
1758 | * Take this example as an illustration | ||
1759 | * | ||
1760 | * [qgroup 1/0] | ||
1761 | * / | \ | ||
1762 | * [qg 0/0] [qg 0/1] [qg 0/2] | ||
1763 | * \ | / | ||
1764 | * [ extent ] | ||
1765 | * | ||
1766 | * Say we are adding a reference that is covered by qg 0/0. The first step | ||
1767 | * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with | ||
1768 | * old_roots being 2. Because it is adding new_roots will be 1. We then go | ||
1769 | * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's | ||
1770 | * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we | ||
1771 | * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a | ||
1772 | * reference and thus must add the size to the referenced bytes. Everything | ||
1773 | * else is the same so nothing else changes. | ||
1774 | */ | ||
1775 | static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, | ||
1776 | struct btrfs_fs_info *fs_info, | ||
1777 | struct btrfs_qgroup_operation *oper) | ||
1778 | { | ||
1779 | struct ulist *roots = NULL; | ||
1780 | struct ulist *qgroups, *tmp; | ||
1781 | struct btrfs_qgroup *qgroup; | ||
1782 | struct seq_list elem = {}; | ||
1783 | u64 seq; | ||
1784 | int old_roots = 0; | ||
1785 | int new_roots = 0; | ||
1786 | int ret = 0; | ||
1399 | 1787 | ||
1400 | mutex_lock(&fs_info->qgroup_rescan_lock); | 1788 | if (oper->elem.seq) { |
1401 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { | 1789 | ret = check_existing_refs(trans, fs_info, oper); |
1402 | if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { | 1790 | if (ret < 0) |
1403 | mutex_unlock(&fs_info->qgroup_rescan_lock); | 1791 | return ret; |
1792 | if (ret) | ||
1404 | return 0; | 1793 | return 0; |
1405 | } | ||
1406 | } | 1794 | } |
1407 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
1408 | 1795 | ||
1409 | /* | 1796 | qgroups = ulist_alloc(GFP_NOFS); |
1410 | * the delayed ref sequence number we pass depends on the direction of | 1797 | if (!qgroups) |
1411 | * the operation. for add operations, we pass | 1798 | return -ENOMEM; |
1412 | * tree_mod_log_prev_seq(node->seq) to skip | ||
1413 | * the delayed ref's current sequence number, because we need the state | ||
1414 | * of the tree before the add operation. for delete operations, we pass | ||
1415 | * (node->seq) to include the delayed ref's current sequence number, | ||
1416 | * because we need the state of the tree after the delete operation. | ||
1417 | */ | ||
1418 | ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots); | ||
1419 | if (ret < 0) | ||
1420 | return ret; | ||
1421 | |||
1422 | spin_lock(&fs_info->qgroup_lock); | ||
1423 | 1799 | ||
1424 | quota_root = fs_info->quota_root; | 1800 | tmp = ulist_alloc(GFP_NOFS); |
1425 | if (!quota_root) | 1801 | if (!tmp) |
1426 | goto unlock; | 1802 | return -ENOMEM; |
1427 | 1803 | ||
1428 | qgroup = find_qgroup_rb(fs_info, ref_root); | 1804 | btrfs_get_tree_mod_seq(fs_info, &elem); |
1805 | ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, | ||
1806 | &roots); | ||
1807 | btrfs_put_tree_mod_seq(fs_info, &elem); | ||
1808 | if (ret < 0) { | ||
1809 | ulist_free(qgroups); | ||
1810 | ulist_free(tmp); | ||
1811 | return ret; | ||
1812 | } | ||
1813 | spin_lock(&fs_info->qgroup_lock); | ||
1814 | qgroup = find_qgroup_rb(fs_info, oper->ref_root); | ||
1429 | if (!qgroup) | 1815 | if (!qgroup) |
1430 | goto unlock; | 1816 | goto out; |
1817 | seq = fs_info->qgroup_seq; | ||
1431 | 1818 | ||
1432 | /* | 1819 | /* |
1433 | * step 1: for each old ref, visit all nodes once and inc refcnt | 1820 | * So roots is the list of all the roots currently pointing at the |
1821 | * bytenr, including the ref we are adding if we are adding, or not if | ||
1822 | * we are removing a ref. So we pass in the ref_root to skip that root | ||
1823 | * in our calculations. We set old_refnct and new_refcnt cause who the | ||
1824 | * hell knows what everything looked like before, and it doesn't matter | ||
1825 | * except... | ||
1434 | */ | 1826 | */ |
1435 | ulist_reinit(fs_info->qgroup_ulist); | 1827 | ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups, |
1436 | seq = fs_info->qgroup_seq; | 1828 | seq, &old_roots, 0); |
1437 | fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ | 1829 | if (ret < 0) |
1830 | goto out; | ||
1438 | 1831 | ||
1439 | ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist, | 1832 | /* |
1440 | seq); | 1833 | * Now adjust the refcounts of the qgroups that care about this |
1441 | if (ret) | 1834 | * reference, either the old_count in the case of removal or new_count |
1442 | goto unlock; | 1835 | * in the case of an addition. |
1836 | */ | ||
1837 | ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups, | ||
1838 | seq); | ||
1839 | if (ret < 0) | ||
1840 | goto out; | ||
1443 | 1841 | ||
1444 | /* | 1842 | /* |
1445 | * step 2: walk from the new root | 1843 | * ...in the case of removals. If we had a removal before we got around |
1844 | * to processing this operation then we need to find that guy and count | ||
1845 | * his references as if they really existed so we don't end up screwing | ||
1846 | * up the exclusive counts. Then whenever we go to process the delete | ||
1847 | * everything will be grand and we can account for whatever exclusive | ||
1848 | * changes need to be made there. We also have to pass in old_roots so | ||
1849 | * we have an accurate count of the roots as it pertains to this | ||
1850 | * operations view of the world. | ||
1446 | */ | 1851 | */ |
1447 | ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist, | 1852 | ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq, |
1448 | seq, sgn, node->num_bytes, qgroup); | 1853 | &old_roots); |
1449 | if (ret) | 1854 | if (ret < 0) |
1450 | goto unlock; | 1855 | goto out; |
1451 | 1856 | ||
1452 | /* | 1857 | /* |
1453 | * step 3: walk again from old refs | 1858 | * We are adding our root, need to adjust up the number of roots, |
1859 | * otherwise old_roots is the number of roots we want. | ||
1454 | */ | 1860 | */ |
1455 | ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist, | 1861 | if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { |
1456 | seq, sgn, node->num_bytes); | 1862 | new_roots = old_roots + 1; |
1457 | if (ret) | 1863 | } else { |
1458 | goto unlock; | 1864 | new_roots = old_roots; |
1865 | old_roots++; | ||
1866 | } | ||
1867 | fs_info->qgroup_seq += old_roots + 1; | ||
1459 | 1868 | ||
1460 | unlock: | 1869 | |
1870 | /* | ||
1871 | * And now the magic happens, bless Arne for having a pretty elegant | ||
1872 | * solution for this. | ||
1873 | */ | ||
1874 | qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes, | ||
1875 | qgroups, seq, old_roots, new_roots, 0); | ||
1876 | out: | ||
1461 | spin_unlock(&fs_info->qgroup_lock); | 1877 | spin_unlock(&fs_info->qgroup_lock); |
1878 | ulist_free(qgroups); | ||
1462 | ulist_free(roots); | 1879 | ulist_free(roots); |
1880 | ulist_free(tmp); | ||
1881 | return ret; | ||
1882 | } | ||
1883 | |||
1884 | /* | ||
1885 | * btrfs_qgroup_account_ref is called for every ref that is added to or deleted | ||
1886 | * from the fs. First, all roots referencing the extent are searched, and | ||
1887 | * then the space is accounted accordingly to the different roots. The | ||
1888 | * accounting algorithm works in 3 steps documented inline. | ||
1889 | */ | ||
1890 | static int btrfs_qgroup_account(struct btrfs_trans_handle *trans, | ||
1891 | struct btrfs_fs_info *fs_info, | ||
1892 | struct btrfs_qgroup_operation *oper) | ||
1893 | { | ||
1894 | int ret = 0; | ||
1895 | |||
1896 | if (!fs_info->quota_enabled) | ||
1897 | return 0; | ||
1898 | |||
1899 | BUG_ON(!fs_info->quota_root); | ||
1900 | |||
1901 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
1902 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { | ||
1903 | if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) { | ||
1904 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
1905 | return 0; | ||
1906 | } | ||
1907 | } | ||
1908 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
1909 | |||
1910 | ASSERT(is_fstree(oper->ref_root)); | ||
1911 | |||
1912 | switch (oper->type) { | ||
1913 | case BTRFS_QGROUP_OPER_ADD_EXCL: | ||
1914 | case BTRFS_QGROUP_OPER_SUB_EXCL: | ||
1915 | ret = qgroup_excl_accounting(fs_info, oper); | ||
1916 | break; | ||
1917 | case BTRFS_QGROUP_OPER_ADD_SHARED: | ||
1918 | case BTRFS_QGROUP_OPER_SUB_SHARED: | ||
1919 | ret = qgroup_shared_accounting(trans, fs_info, oper); | ||
1920 | break; | ||
1921 | default: | ||
1922 | ASSERT(0); | ||
1923 | } | ||
1924 | return ret; | ||
1925 | } | ||
1926 | |||
1927 | /* | ||
1928 | * Needs to be called everytime we run delayed refs, even if there is an error | ||
1929 | * in order to cleanup outstanding operations. | ||
1930 | */ | ||
1931 | int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans, | ||
1932 | struct btrfs_fs_info *fs_info) | ||
1933 | { | ||
1934 | struct btrfs_qgroup_operation *oper; | ||
1935 | int ret = 0; | ||
1463 | 1936 | ||
1937 | while (!list_empty(&trans->qgroup_ref_list)) { | ||
1938 | oper = list_first_entry(&trans->qgroup_ref_list, | ||
1939 | struct btrfs_qgroup_operation, list); | ||
1940 | list_del_init(&oper->list); | ||
1941 | if (!ret || !trans->aborted) | ||
1942 | ret = btrfs_qgroup_account(trans, fs_info, oper); | ||
1943 | spin_lock(&fs_info->qgroup_op_lock); | ||
1944 | rb_erase(&oper->n, &fs_info->qgroup_op_tree); | ||
1945 | spin_unlock(&fs_info->qgroup_op_lock); | ||
1946 | btrfs_put_tree_mod_seq(fs_info, &oper->elem); | ||
1947 | kfree(oper); | ||
1948 | } | ||
1464 | return ret; | 1949 | return ret; |
1465 | } | 1950 | } |
1466 | 1951 | ||
@@ -1629,8 +2114,16 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | |||
1629 | srcgroup = find_qgroup_rb(fs_info, srcid); | 2114 | srcgroup = find_qgroup_rb(fs_info, srcid); |
1630 | if (!srcgroup) | 2115 | if (!srcgroup) |
1631 | goto unlock; | 2116 | goto unlock; |
1632 | dstgroup->rfer = srcgroup->rfer - level_size; | 2117 | |
1633 | dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; | 2118 | /* |
2119 | * We call inherit after we clone the root in order to make sure | ||
2120 | * our counts don't go crazy, so at this point the only | ||
2121 | * difference between the two roots should be the root node. | ||
2122 | */ | ||
2123 | dstgroup->rfer = srcgroup->rfer; | ||
2124 | dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; | ||
2125 | dstgroup->excl = level_size; | ||
2126 | dstgroup->excl_cmpr = level_size; | ||
1634 | srcgroup->excl = level_size; | 2127 | srcgroup->excl = level_size; |
1635 | srcgroup->excl_cmpr = level_size; | 2128 | srcgroup->excl_cmpr = level_size; |
1636 | qgroup_dirty(fs_info, dstgroup); | 2129 | qgroup_dirty(fs_info, dstgroup); |
@@ -1734,7 +2227,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
1734 | struct btrfs_qgroup *qg; | 2227 | struct btrfs_qgroup *qg; |
1735 | struct btrfs_qgroup_list *glist; | 2228 | struct btrfs_qgroup_list *glist; |
1736 | 2229 | ||
1737 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; | 2230 | qg = u64_to_ptr(unode->aux); |
1738 | 2231 | ||
1739 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && | 2232 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && |
1740 | qg->reserved + (s64)qg->rfer + num_bytes > | 2233 | qg->reserved + (s64)qg->rfer + num_bytes > |
@@ -1766,7 +2259,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
1766 | while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { | 2259 | while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { |
1767 | struct btrfs_qgroup *qg; | 2260 | struct btrfs_qgroup *qg; |
1768 | 2261 | ||
1769 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; | 2262 | qg = u64_to_ptr(unode->aux); |
1770 | 2263 | ||
1771 | qg->reserved += num_bytes; | 2264 | qg->reserved += num_bytes; |
1772 | } | 2265 | } |
@@ -1812,7 +2305,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | |||
1812 | struct btrfs_qgroup *qg; | 2305 | struct btrfs_qgroup *qg; |
1813 | struct btrfs_qgroup_list *glist; | 2306 | struct btrfs_qgroup_list *glist; |
1814 | 2307 | ||
1815 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; | 2308 | qg = u64_to_ptr(unode->aux); |
1816 | 2309 | ||
1817 | qg->reserved -= num_bytes; | 2310 | qg->reserved -= num_bytes; |
1818 | 2311 | ||
@@ -1848,15 +2341,15 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) | |||
1848 | */ | 2341 | */ |
1849 | static int | 2342 | static int |
1850 | qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, | 2343 | qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, |
1851 | struct btrfs_trans_handle *trans, struct ulist *tmp, | 2344 | struct btrfs_trans_handle *trans, struct ulist *qgroups, |
1852 | struct extent_buffer *scratch_leaf) | 2345 | struct ulist *tmp, struct extent_buffer *scratch_leaf) |
1853 | { | 2346 | { |
1854 | struct btrfs_key found; | 2347 | struct btrfs_key found; |
1855 | struct ulist *roots = NULL; | 2348 | struct ulist *roots = NULL; |
1856 | struct ulist_node *unode; | ||
1857 | struct ulist_iterator uiter; | ||
1858 | struct seq_list tree_mod_seq_elem = {}; | 2349 | struct seq_list tree_mod_seq_elem = {}; |
2350 | u64 num_bytes; | ||
1859 | u64 seq; | 2351 | u64 seq; |
2352 | int new_roots; | ||
1860 | int slot; | 2353 | int slot; |
1861 | int ret; | 2354 | int ret; |
1862 | 2355 | ||
@@ -1897,8 +2390,6 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, | |||
1897 | mutex_unlock(&fs_info->qgroup_rescan_lock); | 2390 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
1898 | 2391 | ||
1899 | for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { | 2392 | for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { |
1900 | u64 num_bytes; | ||
1901 | |||
1902 | btrfs_item_key_to_cpu(scratch_leaf, &found, slot); | 2393 | btrfs_item_key_to_cpu(scratch_leaf, &found, slot); |
1903 | if (found.type != BTRFS_EXTENT_ITEM_KEY && | 2394 | if (found.type != BTRFS_EXTENT_ITEM_KEY && |
1904 | found.type != BTRFS_METADATA_ITEM_KEY) | 2395 | found.type != BTRFS_METADATA_ITEM_KEY) |
@@ -1908,76 +2399,34 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, | |||
1908 | else | 2399 | else |
1909 | num_bytes = found.offset; | 2400 | num_bytes = found.offset; |
1910 | 2401 | ||
1911 | ret = btrfs_find_all_roots(trans, fs_info, found.objectid, | 2402 | ulist_reinit(qgroups); |
1912 | tree_mod_seq_elem.seq, &roots); | 2403 | ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, |
2404 | &roots); | ||
1913 | if (ret < 0) | 2405 | if (ret < 0) |
1914 | goto out; | 2406 | goto out; |
1915 | spin_lock(&fs_info->qgroup_lock); | 2407 | spin_lock(&fs_info->qgroup_lock); |
1916 | seq = fs_info->qgroup_seq; | 2408 | seq = fs_info->qgroup_seq; |
1917 | fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ | 2409 | fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ |
1918 | 2410 | ||
1919 | ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq); | 2411 | new_roots = 0; |
1920 | if (ret) { | 2412 | ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups, |
2413 | seq, &new_roots, 1); | ||
2414 | if (ret < 0) { | ||
1921 | spin_unlock(&fs_info->qgroup_lock); | 2415 | spin_unlock(&fs_info->qgroup_lock); |
1922 | ulist_free(roots); | 2416 | ulist_free(roots); |
1923 | goto out; | 2417 | goto out; |
1924 | } | 2418 | } |
1925 | 2419 | ||
1926 | /* | 2420 | ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups, |
1927 | * step2 of btrfs_qgroup_account_ref works from a single root, | 2421 | seq, 0, new_roots, 1); |
1928 | * we're doing all at once here. | 2422 | if (ret < 0) { |
1929 | */ | 2423 | spin_unlock(&fs_info->qgroup_lock); |
1930 | ulist_reinit(tmp); | 2424 | ulist_free(roots); |
1931 | ULIST_ITER_INIT(&uiter); | 2425 | goto out; |
1932 | while ((unode = ulist_next(roots, &uiter))) { | ||
1933 | struct btrfs_qgroup *qg; | ||
1934 | |||
1935 | qg = find_qgroup_rb(fs_info, unode->val); | ||
1936 | if (!qg) | ||
1937 | continue; | ||
1938 | |||
1939 | ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, | ||
1940 | GFP_ATOMIC); | ||
1941 | if (ret < 0) { | ||
1942 | spin_unlock(&fs_info->qgroup_lock); | ||
1943 | ulist_free(roots); | ||
1944 | goto out; | ||
1945 | } | ||
1946 | } | ||
1947 | |||
1948 | /* this loop is similar to step 2 of btrfs_qgroup_account_ref */ | ||
1949 | ULIST_ITER_INIT(&uiter); | ||
1950 | while ((unode = ulist_next(tmp, &uiter))) { | ||
1951 | struct btrfs_qgroup *qg; | ||
1952 | struct btrfs_qgroup_list *glist; | ||
1953 | |||
1954 | qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux; | ||
1955 | qg->rfer += num_bytes; | ||
1956 | qg->rfer_cmpr += num_bytes; | ||
1957 | WARN_ON(qg->tag >= seq); | ||
1958 | if (qg->refcnt - seq == roots->nnodes) { | ||
1959 | qg->excl += num_bytes; | ||
1960 | qg->excl_cmpr += num_bytes; | ||
1961 | } | ||
1962 | qgroup_dirty(fs_info, qg); | ||
1963 | |||
1964 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
1965 | ret = ulist_add(tmp, glist->group->qgroupid, | ||
1966 | (uintptr_t)glist->group, | ||
1967 | GFP_ATOMIC); | ||
1968 | if (ret < 0) { | ||
1969 | spin_unlock(&fs_info->qgroup_lock); | ||
1970 | ulist_free(roots); | ||
1971 | goto out; | ||
1972 | } | ||
1973 | } | ||
1974 | } | 2426 | } |
1975 | |||
1976 | spin_unlock(&fs_info->qgroup_lock); | 2427 | spin_unlock(&fs_info->qgroup_lock); |
1977 | ulist_free(roots); | 2428 | ulist_free(roots); |
1978 | ret = 0; | ||
1979 | } | 2429 | } |
1980 | |||
1981 | out: | 2430 | out: |
1982 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); | 2431 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); |
1983 | 2432 | ||
@@ -1990,13 +2439,16 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) | |||
1990 | qgroup_rescan_work); | 2439 | qgroup_rescan_work); |
1991 | struct btrfs_path *path; | 2440 | struct btrfs_path *path; |
1992 | struct btrfs_trans_handle *trans = NULL; | 2441 | struct btrfs_trans_handle *trans = NULL; |
1993 | struct ulist *tmp = NULL; | 2442 | struct ulist *tmp = NULL, *qgroups = NULL; |
1994 | struct extent_buffer *scratch_leaf = NULL; | 2443 | struct extent_buffer *scratch_leaf = NULL; |
1995 | int err = -ENOMEM; | 2444 | int err = -ENOMEM; |
1996 | 2445 | ||
1997 | path = btrfs_alloc_path(); | 2446 | path = btrfs_alloc_path(); |
1998 | if (!path) | 2447 | if (!path) |
1999 | goto out; | 2448 | goto out; |
2449 | qgroups = ulist_alloc(GFP_NOFS); | ||
2450 | if (!qgroups) | ||
2451 | goto out; | ||
2000 | tmp = ulist_alloc(GFP_NOFS); | 2452 | tmp = ulist_alloc(GFP_NOFS); |
2001 | if (!tmp) | 2453 | if (!tmp) |
2002 | goto out; | 2454 | goto out; |
@@ -2015,7 +2467,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) | |||
2015 | err = -EINTR; | 2467 | err = -EINTR; |
2016 | } else { | 2468 | } else { |
2017 | err = qgroup_rescan_leaf(fs_info, path, trans, | 2469 | err = qgroup_rescan_leaf(fs_info, path, trans, |
2018 | tmp, scratch_leaf); | 2470 | qgroups, tmp, scratch_leaf); |
2019 | } | 2471 | } |
2020 | if (err > 0) | 2472 | if (err > 0) |
2021 | btrfs_commit_transaction(trans, fs_info->fs_root); | 2473 | btrfs_commit_transaction(trans, fs_info->fs_root); |
@@ -2025,6 +2477,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) | |||
2025 | 2477 | ||
2026 | out: | 2478 | out: |
2027 | kfree(scratch_leaf); | 2479 | kfree(scratch_leaf); |
2480 | ulist_free(qgroups); | ||
2028 | ulist_free(tmp); | 2481 | ulist_free(tmp); |
2029 | btrfs_free_path(path); | 2482 | btrfs_free_path(path); |
2030 | 2483 | ||
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h new file mode 100644 index 000000000000..5952ff1fbd7a --- /dev/null +++ b/fs/btrfs/qgroup.h | |||
@@ -0,0 +1,107 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2014 Facebook. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #ifndef __BTRFS_QGROUP__ | ||
20 | #define __BTRFS_QGROUP__ | ||
21 | |||
22 | /* | ||
23 | * A description of the operations, all of these operations only happen when we | ||
24 | * are adding the 1st reference for that subvolume in the case of adding space | ||
25 | * or on the last reference delete in the case of subtraction. The only | ||
26 | * exception is the last one, which is added for confusion. | ||
27 | * | ||
28 | * BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only | ||
29 | * one pointing at the bytes we are adding. This is called on the first | ||
30 | * allocation. | ||
31 | * | ||
32 | * BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be | ||
33 | * shared between subvols. This is called on the creation of a ref that already | ||
34 | * has refs from a different subvolume, so basically reflink. | ||
35 | * | ||
36 | * BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only | ||
37 | * one referencing the range. | ||
38 | * | ||
39 | * BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with | ||
40 | * refs with other subvolumes. | ||
41 | */ | ||
42 | enum btrfs_qgroup_operation_type { | ||
43 | BTRFS_QGROUP_OPER_ADD_EXCL, | ||
44 | BTRFS_QGROUP_OPER_ADD_SHARED, | ||
45 | BTRFS_QGROUP_OPER_SUB_EXCL, | ||
46 | BTRFS_QGROUP_OPER_SUB_SHARED, | ||
47 | }; | ||
48 | |||
49 | struct btrfs_qgroup_operation { | ||
50 | u64 ref_root; | ||
51 | u64 bytenr; | ||
52 | u64 num_bytes; | ||
53 | u64 seq; | ||
54 | enum btrfs_qgroup_operation_type type; | ||
55 | struct seq_list elem; | ||
56 | struct rb_node n; | ||
57 | struct list_head list; | ||
58 | }; | ||
59 | |||
60 | int btrfs_quota_enable(struct btrfs_trans_handle *trans, | ||
61 | struct btrfs_fs_info *fs_info); | ||
62 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, | ||
63 | struct btrfs_fs_info *fs_info); | ||
64 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); | ||
65 | void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); | ||
66 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); | ||
67 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | ||
68 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | ||
69 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | ||
70 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | ||
71 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, | ||
72 | struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
73 | char *name); | ||
74 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, | ||
75 | struct btrfs_fs_info *fs_info, u64 qgroupid); | ||
76 | int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, | ||
77 | struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
78 | struct btrfs_qgroup_limit *limit); | ||
79 | int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); | ||
80 | void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); | ||
81 | struct btrfs_delayed_extent_op; | ||
82 | int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, | ||
83 | struct btrfs_fs_info *fs_info, u64 ref_root, | ||
84 | u64 bytenr, u64 num_bytes, | ||
85 | enum btrfs_qgroup_operation_type type, | ||
86 | int mod_seq); | ||
87 | int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans, | ||
88 | struct btrfs_fs_info *fs_info); | ||
89 | void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans, | ||
90 | struct btrfs_fs_info *fs_info, | ||
91 | struct btrfs_qgroup_operation *oper); | ||
92 | int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | ||
93 | struct btrfs_fs_info *fs_info); | ||
94 | int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | ||
95 | struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, | ||
96 | struct btrfs_qgroup_inherit *inherit); | ||
97 | int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); | ||
98 | void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); | ||
99 | |||
100 | void assert_qgroups_uptodate(struct btrfs_trans_handle *trans); | ||
101 | |||
102 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
103 | int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
104 | u64 rfer, u64 excl); | ||
105 | #endif | ||
106 | |||
107 | #endif /* __BTRFS_QGROUP__ */ | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 7f92ab1daa87..65245a07275b 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -337,7 +337,7 @@ static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr) | |||
337 | if (bnode->root) | 337 | if (bnode->root) |
338 | fs_info = bnode->root->fs_info; | 338 | fs_info = bnode->root->fs_info; |
339 | btrfs_panic(fs_info, errno, "Inconsistency in backref cache " | 339 | btrfs_panic(fs_info, errno, "Inconsistency in backref cache " |
340 | "found at offset %llu\n", bytenr); | 340 | "found at offset %llu", bytenr); |
341 | } | 341 | } |
342 | 342 | ||
343 | /* | 343 | /* |
@@ -528,7 +528,7 @@ static int should_ignore_root(struct btrfs_root *root) | |||
528 | { | 528 | { |
529 | struct btrfs_root *reloc_root; | 529 | struct btrfs_root *reloc_root; |
530 | 530 | ||
531 | if (!root->ref_cows) | 531 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
532 | return 0; | 532 | return 0; |
533 | 533 | ||
534 | reloc_root = root->reloc_root; | 534 | reloc_root = root->reloc_root; |
@@ -610,7 +610,7 @@ struct btrfs_root *find_tree_root(struct reloc_control *rc, | |||
610 | root = read_fs_root(rc->extent_root->fs_info, root_objectid); | 610 | root = read_fs_root(rc->extent_root->fs_info, root_objectid); |
611 | BUG_ON(IS_ERR(root)); | 611 | BUG_ON(IS_ERR(root)); |
612 | 612 | ||
613 | if (root->ref_cows && | 613 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
614 | generation != btrfs_root_generation(&root->root_item)) | 614 | generation != btrfs_root_generation(&root->root_item)) |
615 | return NULL; | 615 | return NULL; |
616 | 616 | ||
@@ -887,7 +887,7 @@ again: | |||
887 | goto out; | 887 | goto out; |
888 | } | 888 | } |
889 | 889 | ||
890 | if (!root->ref_cows) | 890 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
891 | cur->cowonly = 1; | 891 | cur->cowonly = 1; |
892 | 892 | ||
893 | if (btrfs_root_level(&root->root_item) == cur->level) { | 893 | if (btrfs_root_level(&root->root_item) == cur->level) { |
@@ -954,7 +954,8 @@ again: | |||
954 | upper->bytenr = eb->start; | 954 | upper->bytenr = eb->start; |
955 | upper->owner = btrfs_header_owner(eb); | 955 | upper->owner = btrfs_header_owner(eb); |
956 | upper->level = lower->level + 1; | 956 | upper->level = lower->level + 1; |
957 | if (!root->ref_cows) | 957 | if (!test_bit(BTRFS_ROOT_REF_COWS, |
958 | &root->state)) | ||
958 | upper->cowonly = 1; | 959 | upper->cowonly = 1; |
959 | 960 | ||
960 | /* | 961 | /* |
@@ -1258,7 +1259,7 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) | |||
1258 | if (rb_node) { | 1259 | if (rb_node) { |
1259 | btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " | 1260 | btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " |
1260 | "for start=%llu while inserting into relocation " | 1261 | "for start=%llu while inserting into relocation " |
1261 | "tree\n", node->bytenr); | 1262 | "tree", node->bytenr); |
1262 | kfree(node); | 1263 | kfree(node); |
1263 | return -EEXIST; | 1264 | return -EEXIST; |
1264 | } | 1265 | } |
@@ -2441,7 +2442,7 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, | |||
2441 | next = walk_up_backref(next, edges, &index); | 2442 | next = walk_up_backref(next, edges, &index); |
2442 | root = next->root; | 2443 | root = next->root; |
2443 | BUG_ON(!root); | 2444 | BUG_ON(!root); |
2444 | BUG_ON(!root->ref_cows); | 2445 | BUG_ON(!test_bit(BTRFS_ROOT_REF_COWS, &root->state)); |
2445 | 2446 | ||
2446 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 2447 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { |
2447 | record_reloc_root_in_trans(trans, root); | 2448 | record_reloc_root_in_trans(trans, root); |
@@ -2506,7 +2507,7 @@ struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, | |||
2506 | BUG_ON(!root); | 2507 | BUG_ON(!root); |
2507 | 2508 | ||
2508 | /* no other choice for non-references counted tree */ | 2509 | /* no other choice for non-references counted tree */ |
2509 | if (!root->ref_cows) | 2510 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
2510 | return root; | 2511 | return root; |
2511 | 2512 | ||
2512 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) | 2513 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) |
@@ -2893,14 +2894,14 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, | |||
2893 | goto out; | 2894 | goto out; |
2894 | } | 2895 | } |
2895 | 2896 | ||
2896 | if (!root || root->ref_cows) { | 2897 | if (!root || test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { |
2897 | ret = reserve_metadata_space(trans, rc, node); | 2898 | ret = reserve_metadata_space(trans, rc, node); |
2898 | if (ret) | 2899 | if (ret) |
2899 | goto out; | 2900 | goto out; |
2900 | } | 2901 | } |
2901 | 2902 | ||
2902 | if (root) { | 2903 | if (root) { |
2903 | if (root->ref_cows) { | 2904 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { |
2904 | BUG_ON(node->new_bytenr); | 2905 | BUG_ON(node->new_bytenr); |
2905 | BUG_ON(!list_empty(&node->list)); | 2906 | BUG_ON(!list_empty(&node->list)); |
2906 | btrfs_record_root_in_trans(trans, root); | 2907 | btrfs_record_root_in_trans(trans, root); |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 38bb47e7d6b1..360a728a639f 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -306,7 +306,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
306 | break; | 306 | break; |
307 | } | 307 | } |
308 | 308 | ||
309 | root->orphan_item_inserted = 1; | 309 | set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); |
310 | 310 | ||
311 | err = btrfs_insert_fs_root(root->fs_info, root); | 311 | err = btrfs_insert_fs_root(root->fs_info, root); |
312 | if (err) { | 312 | if (err) { |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 0be77993378e..ac80188eec88 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -588,8 +588,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
588 | 588 | ||
589 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 589 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
590 | do { | 590 | do { |
591 | ret = tree_backref_for_extent(&ptr, eb, ei, item_size, | 591 | ret = tree_backref_for_extent(&ptr, eb, &found_key, ei, |
592 | &ref_root, &ref_level); | 592 | item_size, &ref_root, |
593 | &ref_level); | ||
593 | printk_in_rcu(KERN_WARNING | 594 | printk_in_rcu(KERN_WARNING |
594 | "BTRFS: %s at logical %llu on dev %s, " | 595 | "BTRFS: %s at logical %llu on dev %s, " |
595 | "sector %llu: metadata %s (level %d) in tree " | 596 | "sector %llu: metadata %s (level %d) in tree " |
@@ -717,8 +718,8 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) | |||
717 | out: | 718 | out: |
718 | if (page) | 719 | if (page) |
719 | put_page(page); | 720 | put_page(page); |
720 | if (inode) | 721 | |
721 | iput(inode); | 722 | iput(inode); |
722 | 723 | ||
723 | if (ret < 0) | 724 | if (ret < 0) |
724 | return ret; | 725 | return ret; |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fd38b5053479..6528aa662181 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -360,10 +360,13 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) | |||
360 | /* | 360 | /* |
361 | * First time the inline_buf does not suffice | 361 | * First time the inline_buf does not suffice |
362 | */ | 362 | */ |
363 | if (p->buf == p->inline_buf) | 363 | if (p->buf == p->inline_buf) { |
364 | tmp_buf = kmalloc(len, GFP_NOFS); | 364 | tmp_buf = kmalloc(len, GFP_NOFS); |
365 | else | 365 | if (tmp_buf) |
366 | memcpy(tmp_buf, p->buf, old_buf_len); | ||
367 | } else { | ||
366 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); | 368 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); |
369 | } | ||
367 | if (!tmp_buf) | 370 | if (!tmp_buf) |
368 | return -ENOMEM; | 371 | return -ENOMEM; |
369 | p->buf = tmp_buf; | 372 | p->buf = tmp_buf; |
@@ -972,7 +975,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
972 | struct btrfs_dir_item *di; | 975 | struct btrfs_dir_item *di; |
973 | struct btrfs_key di_key; | 976 | struct btrfs_key di_key; |
974 | char *buf = NULL; | 977 | char *buf = NULL; |
975 | const int buf_len = PATH_MAX; | 978 | int buf_len; |
976 | u32 name_len; | 979 | u32 name_len; |
977 | u32 data_len; | 980 | u32 data_len; |
978 | u32 cur; | 981 | u32 cur; |
@@ -982,6 +985,11 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
982 | int num; | 985 | int num; |
983 | u8 type; | 986 | u8 type; |
984 | 987 | ||
988 | if (found_key->type == BTRFS_XATTR_ITEM_KEY) | ||
989 | buf_len = BTRFS_MAX_XATTR_SIZE(root); | ||
990 | else | ||
991 | buf_len = PATH_MAX; | ||
992 | |||
985 | buf = kmalloc(buf_len, GFP_NOFS); | 993 | buf = kmalloc(buf_len, GFP_NOFS); |
986 | if (!buf) { | 994 | if (!buf) { |
987 | ret = -ENOMEM; | 995 | ret = -ENOMEM; |
@@ -1003,12 +1011,23 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
1003 | type = btrfs_dir_type(eb, di); | 1011 | type = btrfs_dir_type(eb, di); |
1004 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); | 1012 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); |
1005 | 1013 | ||
1006 | /* | 1014 | if (type == BTRFS_FT_XATTR) { |
1007 | * Path too long | 1015 | if (name_len > XATTR_NAME_MAX) { |
1008 | */ | 1016 | ret = -ENAMETOOLONG; |
1009 | if (name_len + data_len > buf_len) { | 1017 | goto out; |
1010 | ret = -ENAMETOOLONG; | 1018 | } |
1011 | goto out; | 1019 | if (name_len + data_len > buf_len) { |
1020 | ret = -E2BIG; | ||
1021 | goto out; | ||
1022 | } | ||
1023 | } else { | ||
1024 | /* | ||
1025 | * Path too long | ||
1026 | */ | ||
1027 | if (name_len + data_len > buf_len) { | ||
1028 | ret = -ENAMETOOLONG; | ||
1029 | goto out; | ||
1030 | } | ||
1012 | } | 1031 | } |
1013 | 1032 | ||
1014 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), | 1033 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), |
@@ -1346,7 +1365,7 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
1346 | ret = -EIO; | 1365 | ret = -EIO; |
1347 | btrfs_err(sctx->send_root->fs_info, "did not find backref in " | 1366 | btrfs_err(sctx->send_root->fs_info, "did not find backref in " |
1348 | "send_root. inode=%llu, offset=%llu, " | 1367 | "send_root. inode=%llu, offset=%llu, " |
1349 | "disk_byte=%llu found extent=%llu\n", | 1368 | "disk_byte=%llu found extent=%llu", |
1350 | ino, data_offset, disk_byte, found_key.objectid); | 1369 | ino, data_offset, disk_byte, found_key.objectid); |
1351 | goto out; | 1370 | goto out; |
1352 | } | 1371 | } |
@@ -1625,6 +1644,10 @@ static int lookup_dir_item_inode(struct btrfs_root *root, | |||
1625 | goto out; | 1644 | goto out; |
1626 | } | 1645 | } |
1627 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); | 1646 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); |
1647 | if (key.type == BTRFS_ROOT_ITEM_KEY) { | ||
1648 | ret = -ENOENT; | ||
1649 | goto out; | ||
1650 | } | ||
1628 | *found_inode = key.objectid; | 1651 | *found_inode = key.objectid; |
1629 | *found_type = btrfs_dir_type(path->nodes[0], di); | 1652 | *found_type = btrfs_dir_type(path->nodes[0], di); |
1630 | 1653 | ||
@@ -1690,10 +1713,12 @@ static int get_first_ref(struct btrfs_root *root, u64 ino, | |||
1690 | goto out; | 1713 | goto out; |
1691 | btrfs_release_path(path); | 1714 | btrfs_release_path(path); |
1692 | 1715 | ||
1693 | ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL, | 1716 | if (dir_gen) { |
1694 | NULL, NULL); | 1717 | ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, |
1695 | if (ret < 0) | 1718 | NULL, NULL, NULL); |
1696 | goto out; | 1719 | if (ret < 0) |
1720 | goto out; | ||
1721 | } | ||
1697 | 1722 | ||
1698 | *dir = parent_dir; | 1723 | *dir = parent_dir; |
1699 | 1724 | ||
@@ -1709,13 +1734,12 @@ static int is_first_ref(struct btrfs_root *root, | |||
1709 | int ret; | 1734 | int ret; |
1710 | struct fs_path *tmp_name; | 1735 | struct fs_path *tmp_name; |
1711 | u64 tmp_dir; | 1736 | u64 tmp_dir; |
1712 | u64 tmp_dir_gen; | ||
1713 | 1737 | ||
1714 | tmp_name = fs_path_alloc(); | 1738 | tmp_name = fs_path_alloc(); |
1715 | if (!tmp_name) | 1739 | if (!tmp_name) |
1716 | return -ENOMEM; | 1740 | return -ENOMEM; |
1717 | 1741 | ||
1718 | ret = get_first_ref(root, ino, &tmp_dir, &tmp_dir_gen, tmp_name); | 1742 | ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name); |
1719 | if (ret < 0) | 1743 | if (ret < 0) |
1720 | goto out; | 1744 | goto out; |
1721 | 1745 | ||
@@ -2026,7 +2050,6 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
2026 | { | 2050 | { |
2027 | int ret; | 2051 | int ret; |
2028 | int nce_ret; | 2052 | int nce_ret; |
2029 | struct btrfs_path *path = NULL; | ||
2030 | struct name_cache_entry *nce = NULL; | 2053 | struct name_cache_entry *nce = NULL; |
2031 | 2054 | ||
2032 | /* | 2055 | /* |
@@ -2052,10 +2075,6 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
2052 | } | 2075 | } |
2053 | } | 2076 | } |
2054 | 2077 | ||
2055 | path = alloc_path_for_send(); | ||
2056 | if (!path) | ||
2057 | return -ENOMEM; | ||
2058 | |||
2059 | /* | 2078 | /* |
2060 | * If the inode is not existent yet, add the orphan name and return 1. | 2079 | * If the inode is not existent yet, add the orphan name and return 1. |
2061 | * This should only happen for the parent dir that we determine in | 2080 | * This should only happen for the parent dir that we determine in |
@@ -2131,7 +2150,6 @@ out_cache: | |||
2131 | name_cache_clean_unused(sctx); | 2150 | name_cache_clean_unused(sctx); |
2132 | 2151 | ||
2133 | out: | 2152 | out: |
2134 | btrfs_free_path(path); | ||
2135 | return ret; | 2153 | return ret; |
2136 | } | 2154 | } |
2137 | 2155 | ||
@@ -2942,7 +2960,9 @@ static void free_waiting_dir_move(struct send_ctx *sctx, | |||
2942 | static int add_pending_dir_move(struct send_ctx *sctx, | 2960 | static int add_pending_dir_move(struct send_ctx *sctx, |
2943 | u64 ino, | 2961 | u64 ino, |
2944 | u64 ino_gen, | 2962 | u64 ino_gen, |
2945 | u64 parent_ino) | 2963 | u64 parent_ino, |
2964 | struct list_head *new_refs, | ||
2965 | struct list_head *deleted_refs) | ||
2946 | { | 2966 | { |
2947 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; | 2967 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; |
2948 | struct rb_node *parent = NULL; | 2968 | struct rb_node *parent = NULL; |
@@ -2974,12 +2994,12 @@ static int add_pending_dir_move(struct send_ctx *sctx, | |||
2974 | } | 2994 | } |
2975 | } | 2995 | } |
2976 | 2996 | ||
2977 | list_for_each_entry(cur, &sctx->deleted_refs, list) { | 2997 | list_for_each_entry(cur, deleted_refs, list) { |
2978 | ret = dup_ref(cur, &pm->update_refs); | 2998 | ret = dup_ref(cur, &pm->update_refs); |
2979 | if (ret < 0) | 2999 | if (ret < 0) |
2980 | goto out; | 3000 | goto out; |
2981 | } | 3001 | } |
2982 | list_for_each_entry(cur, &sctx->new_refs, list) { | 3002 | list_for_each_entry(cur, new_refs, list) { |
2983 | ret = dup_ref(cur, &pm->update_refs); | 3003 | ret = dup_ref(cur, &pm->update_refs); |
2984 | if (ret < 0) | 3004 | if (ret < 0) |
2985 | goto out; | 3005 | goto out; |
@@ -3022,6 +3042,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, | |||
3022 | return NULL; | 3042 | return NULL; |
3023 | } | 3043 | } |
3024 | 3044 | ||
3045 | static int path_loop(struct send_ctx *sctx, struct fs_path *name, | ||
3046 | u64 ino, u64 gen, u64 *ancestor_ino) | ||
3047 | { | ||
3048 | int ret = 0; | ||
3049 | u64 parent_inode = 0; | ||
3050 | u64 parent_gen = 0; | ||
3051 | u64 start_ino = ino; | ||
3052 | |||
3053 | *ancestor_ino = 0; | ||
3054 | while (ino != BTRFS_FIRST_FREE_OBJECTID) { | ||
3055 | fs_path_reset(name); | ||
3056 | |||
3057 | if (is_waiting_for_rm(sctx, ino)) | ||
3058 | break; | ||
3059 | if (is_waiting_for_move(sctx, ino)) { | ||
3060 | if (*ancestor_ino == 0) | ||
3061 | *ancestor_ino = ino; | ||
3062 | ret = get_first_ref(sctx->parent_root, ino, | ||
3063 | &parent_inode, &parent_gen, name); | ||
3064 | } else { | ||
3065 | ret = __get_cur_name_and_parent(sctx, ino, gen, | ||
3066 | &parent_inode, | ||
3067 | &parent_gen, name); | ||
3068 | if (ret > 0) { | ||
3069 | ret = 0; | ||
3070 | break; | ||
3071 | } | ||
3072 | } | ||
3073 | if (ret < 0) | ||
3074 | break; | ||
3075 | if (parent_inode == start_ino) { | ||
3076 | ret = 1; | ||
3077 | if (*ancestor_ino == 0) | ||
3078 | *ancestor_ino = ino; | ||
3079 | break; | ||
3080 | } | ||
3081 | ino = parent_inode; | ||
3082 | gen = parent_gen; | ||
3083 | } | ||
3084 | return ret; | ||
3085 | } | ||
3086 | |||
3025 | static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | 3087 | static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) |
3026 | { | 3088 | { |
3027 | struct fs_path *from_path = NULL; | 3089 | struct fs_path *from_path = NULL; |
@@ -3033,6 +3095,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
3033 | struct waiting_dir_move *dm = NULL; | 3095 | struct waiting_dir_move *dm = NULL; |
3034 | u64 rmdir_ino = 0; | 3096 | u64 rmdir_ino = 0; |
3035 | int ret; | 3097 | int ret; |
3098 | u64 ancestor = 0; | ||
3036 | 3099 | ||
3037 | name = fs_path_alloc(); | 3100 | name = fs_path_alloc(); |
3038 | from_path = fs_path_alloc(); | 3101 | from_path = fs_path_alloc(); |
@@ -3051,34 +3114,33 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
3051 | if (ret < 0) | 3114 | if (ret < 0) |
3052 | goto out; | 3115 | goto out; |
3053 | 3116 | ||
3054 | if (parent_ino == sctx->cur_ino) { | 3117 | ret = get_cur_path(sctx, parent_ino, parent_gen, |
3055 | /* child only renamed, not moved */ | 3118 | from_path); |
3056 | ASSERT(parent_gen == sctx->cur_inode_gen); | 3119 | if (ret < 0) |
3057 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, | 3120 | goto out; |
3058 | from_path); | 3121 | ret = fs_path_add_path(from_path, name); |
3059 | if (ret < 0) | 3122 | if (ret < 0) |
3060 | goto out; | 3123 | goto out; |
3061 | ret = fs_path_add_path(from_path, name); | 3124 | |
3062 | if (ret < 0) | 3125 | sctx->send_progress = sctx->cur_ino + 1; |
3063 | goto out; | 3126 | ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor); |
3064 | } else { | 3127 | if (ret) { |
3065 | /* child moved and maybe renamed too */ | 3128 | LIST_HEAD(deleted_refs); |
3066 | sctx->send_progress = pm->ino; | 3129 | ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); |
3067 | ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); | 3130 | ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, |
3131 | &pm->update_refs, &deleted_refs); | ||
3068 | if (ret < 0) | 3132 | if (ret < 0) |
3069 | goto out; | 3133 | goto out; |
3070 | } | 3134 | if (rmdir_ino) { |
3071 | 3135 | dm = get_waiting_dir_move(sctx, pm->ino); | |
3072 | fs_path_free(name); | 3136 | ASSERT(dm); |
3073 | name = NULL; | 3137 | dm->rmdir_ino = rmdir_ino; |
3074 | 3138 | } | |
3075 | to_path = fs_path_alloc(); | ||
3076 | if (!to_path) { | ||
3077 | ret = -ENOMEM; | ||
3078 | goto out; | 3139 | goto out; |
3079 | } | 3140 | } |
3080 | 3141 | fs_path_reset(name); | |
3081 | sctx->send_progress = sctx->cur_ino + 1; | 3142 | to_path = name; |
3143 | name = NULL; | ||
3082 | ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); | 3144 | ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); |
3083 | if (ret < 0) | 3145 | if (ret < 0) |
3084 | goto out; | 3146 | goto out; |
@@ -3202,127 +3264,74 @@ out: | |||
3202 | static int wait_for_parent_move(struct send_ctx *sctx, | 3264 | static int wait_for_parent_move(struct send_ctx *sctx, |
3203 | struct recorded_ref *parent_ref) | 3265 | struct recorded_ref *parent_ref) |
3204 | { | 3266 | { |
3205 | int ret; | 3267 | int ret = 0; |
3206 | u64 ino = parent_ref->dir; | 3268 | u64 ino = parent_ref->dir; |
3207 | u64 parent_ino_before, parent_ino_after; | 3269 | u64 parent_ino_before, parent_ino_after; |
3208 | u64 old_gen; | ||
3209 | struct fs_path *path_before = NULL; | 3270 | struct fs_path *path_before = NULL; |
3210 | struct fs_path *path_after = NULL; | 3271 | struct fs_path *path_after = NULL; |
3211 | int len1, len2; | 3272 | int len1, len2; |
3212 | int register_upper_dirs; | ||
3213 | u64 gen; | ||
3214 | |||
3215 | if (is_waiting_for_move(sctx, ino)) | ||
3216 | return 1; | ||
3217 | |||
3218 | if (parent_ref->dir <= sctx->cur_ino) | ||
3219 | return 0; | ||
3220 | |||
3221 | ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, | ||
3222 | NULL, NULL, NULL, NULL); | ||
3223 | if (ret == -ENOENT) | ||
3224 | return 0; | ||
3225 | else if (ret < 0) | ||
3226 | return ret; | ||
3227 | |||
3228 | if (parent_ref->dir_gen != old_gen) | ||
3229 | return 0; | ||
3230 | |||
3231 | path_before = fs_path_alloc(); | ||
3232 | if (!path_before) | ||
3233 | return -ENOMEM; | ||
3234 | |||
3235 | ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, | ||
3236 | NULL, path_before); | ||
3237 | if (ret == -ENOENT) { | ||
3238 | ret = 0; | ||
3239 | goto out; | ||
3240 | } else if (ret < 0) { | ||
3241 | goto out; | ||
3242 | } | ||
3243 | 3273 | ||
3244 | path_after = fs_path_alloc(); | 3274 | path_after = fs_path_alloc(); |
3245 | if (!path_after) { | 3275 | path_before = fs_path_alloc(); |
3276 | if (!path_after || !path_before) { | ||
3246 | ret = -ENOMEM; | 3277 | ret = -ENOMEM; |
3247 | goto out; | 3278 | goto out; |
3248 | } | 3279 | } |
3249 | 3280 | ||
3250 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, | ||
3251 | &gen, path_after); | ||
3252 | if (ret == -ENOENT) { | ||
3253 | ret = 0; | ||
3254 | goto out; | ||
3255 | } else if (ret < 0) { | ||
3256 | goto out; | ||
3257 | } | ||
3258 | |||
3259 | len1 = fs_path_len(path_before); | ||
3260 | len2 = fs_path_len(path_after); | ||
3261 | if (parent_ino_before != parent_ino_after || len1 != len2 || | ||
3262 | memcmp(path_before->start, path_after->start, len1)) { | ||
3263 | ret = 1; | ||
3264 | goto out; | ||
3265 | } | ||
3266 | ret = 0; | ||
3267 | |||
3268 | /* | 3281 | /* |
3269 | * Ok, our new most direct ancestor has a higher inode number but | 3282 | * Our current directory inode may not yet be renamed/moved because some |
3270 | * wasn't moved/renamed. So maybe some of the new ancestors higher in | 3283 | * ancestor (immediate or not) has to be renamed/moved first. So find if |
3271 | * the hierarchy have an higher inode number too *and* were renamed | 3284 | * such ancestor exists and make sure our own rename/move happens after |
3272 | * or moved - in this case we need to wait for the ancestor's rename | 3285 | * that ancestor is processed. |
3273 | * or move operation before we can do the move/rename for the current | ||
3274 | * inode. | ||
3275 | */ | 3286 | */ |
3276 | register_upper_dirs = 0; | 3287 | while (ino > BTRFS_FIRST_FREE_OBJECTID) { |
3277 | ino = parent_ino_after; | 3288 | if (is_waiting_for_move(sctx, ino)) { |
3278 | again: | 3289 | ret = 1; |
3279 | while ((ret == 0 || register_upper_dirs) && ino > sctx->cur_ino) { | 3290 | break; |
3280 | u64 parent_gen; | 3291 | } |
3281 | 3292 | ||
3282 | fs_path_reset(path_before); | 3293 | fs_path_reset(path_before); |
3283 | fs_path_reset(path_after); | 3294 | fs_path_reset(path_after); |
3284 | 3295 | ||
3285 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, | 3296 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, |
3286 | &parent_gen, path_after); | 3297 | NULL, path_after); |
3287 | if (ret < 0) | 3298 | if (ret < 0) |
3288 | goto out; | 3299 | goto out; |
3289 | ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, | 3300 | ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, |
3290 | NULL, path_before); | 3301 | NULL, path_before); |
3291 | if (ret == -ENOENT) { | 3302 | if (ret < 0 && ret != -ENOENT) { |
3292 | ret = 0; | ||
3293 | break; | ||
3294 | } else if (ret < 0) { | ||
3295 | goto out; | 3303 | goto out; |
3304 | } else if (ret == -ENOENT) { | ||
3305 | ret = 1; | ||
3306 | break; | ||
3296 | } | 3307 | } |
3297 | 3308 | ||
3298 | len1 = fs_path_len(path_before); | 3309 | len1 = fs_path_len(path_before); |
3299 | len2 = fs_path_len(path_after); | 3310 | len2 = fs_path_len(path_after); |
3300 | if (parent_ino_before != parent_ino_after || len1 != len2 || | 3311 | if (ino > sctx->cur_ino && |
3301 | memcmp(path_before->start, path_after->start, len1)) { | 3312 | (parent_ino_before != parent_ino_after || len1 != len2 || |
3313 | memcmp(path_before->start, path_after->start, len1))) { | ||
3302 | ret = 1; | 3314 | ret = 1; |
3303 | if (register_upper_dirs) { | 3315 | break; |
3304 | break; | ||
3305 | } else { | ||
3306 | register_upper_dirs = 1; | ||
3307 | ino = parent_ref->dir; | ||
3308 | gen = parent_ref->dir_gen; | ||
3309 | goto again; | ||
3310 | } | ||
3311 | } else if (register_upper_dirs) { | ||
3312 | ret = add_pending_dir_move(sctx, ino, gen, | ||
3313 | parent_ino_after); | ||
3314 | if (ret < 0 && ret != -EEXIST) | ||
3315 | goto out; | ||
3316 | } | 3316 | } |
3317 | |||
3318 | ino = parent_ino_after; | 3317 | ino = parent_ino_after; |
3319 | gen = parent_gen; | ||
3320 | } | 3318 | } |
3321 | 3319 | ||
3322 | out: | 3320 | out: |
3323 | fs_path_free(path_before); | 3321 | fs_path_free(path_before); |
3324 | fs_path_free(path_after); | 3322 | fs_path_free(path_after); |
3325 | 3323 | ||
3324 | if (ret == 1) { | ||
3325 | ret = add_pending_dir_move(sctx, | ||
3326 | sctx->cur_ino, | ||
3327 | sctx->cur_inode_gen, | ||
3328 | ino, | ||
3329 | &sctx->new_refs, | ||
3330 | &sctx->deleted_refs); | ||
3331 | if (!ret) | ||
3332 | ret = 1; | ||
3333 | } | ||
3334 | |||
3326 | return ret; | 3335 | return ret; |
3327 | } | 3336 | } |
3328 | 3337 | ||
@@ -3483,10 +3492,6 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3483 | if (ret < 0) | 3492 | if (ret < 0) |
3484 | goto out; | 3493 | goto out; |
3485 | if (ret) { | 3494 | if (ret) { |
3486 | ret = add_pending_dir_move(sctx, | ||
3487 | sctx->cur_ino, | ||
3488 | sctx->cur_inode_gen, | ||
3489 | cur->dir); | ||
3490 | *pending_move = 1; | 3495 | *pending_move = 1; |
3491 | } else { | 3496 | } else { |
3492 | ret = send_rename(sctx, valid_path, | 3497 | ret = send_rename(sctx, valid_path, |
@@ -5487,7 +5492,7 @@ static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) | |||
5487 | */ | 5492 | */ |
5488 | if (root->send_in_progress < 0) | 5493 | if (root->send_in_progress < 0) |
5489 | btrfs_err(root->fs_info, | 5494 | btrfs_err(root->fs_info, |
5490 | "send_in_progres unbalanced %d root %llu\n", | 5495 | "send_in_progres unbalanced %d root %llu", |
5491 | root->send_in_progress, root->root_key.objectid); | 5496 | root->send_in_progress, root->root_key.objectid); |
5492 | spin_unlock(&root->root_item_lock); | 5497 | spin_unlock(&root->root_item_lock); |
5493 | } | 5498 | } |
@@ -5515,7 +5520,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
5515 | 5520 | ||
5516 | /* | 5521 | /* |
5517 | * The subvolume must remain read-only during send, protect against | 5522 | * The subvolume must remain read-only during send, protect against |
5518 | * making it RW. | 5523 | * making it RW. This also protects against deletion. |
5519 | */ | 5524 | */ |
5520 | spin_lock(&send_root->root_item_lock); | 5525 | spin_lock(&send_root->root_item_lock); |
5521 | send_root->send_in_progress++; | 5526 | send_root->send_in_progress++; |
@@ -5575,6 +5580,15 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
5575 | } | 5580 | } |
5576 | 5581 | ||
5577 | sctx->send_root = send_root; | 5582 | sctx->send_root = send_root; |
5583 | /* | ||
5584 | * Unlikely but possible, if the subvolume is marked for deletion but | ||
5585 | * is slow to remove the directory entry, send can still be started | ||
5586 | */ | ||
5587 | if (btrfs_root_dead(sctx->send_root)) { | ||
5588 | ret = -EPERM; | ||
5589 | goto out; | ||
5590 | } | ||
5591 | |||
5578 | sctx->clone_roots_cnt = arg->clone_sources_count; | 5592 | sctx->clone_roots_cnt = arg->clone_sources_count; |
5579 | 5593 | ||
5580 | sctx->send_max_size = BTRFS_SEND_BUF_SIZE; | 5594 | sctx->send_max_size = BTRFS_SEND_BUF_SIZE; |
@@ -5664,7 +5678,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
5664 | 5678 | ||
5665 | spin_lock(&sctx->parent_root->root_item_lock); | 5679 | spin_lock(&sctx->parent_root->root_item_lock); |
5666 | sctx->parent_root->send_in_progress++; | 5680 | sctx->parent_root->send_in_progress++; |
5667 | if (!btrfs_root_readonly(sctx->parent_root)) { | 5681 | if (!btrfs_root_readonly(sctx->parent_root) || |
5682 | btrfs_root_dead(sctx->parent_root)) { | ||
5668 | spin_unlock(&sctx->parent_root->root_item_lock); | 5683 | spin_unlock(&sctx->parent_root->root_item_lock); |
5669 | srcu_read_unlock(&fs_info->subvol_srcu, index); | 5684 | srcu_read_unlock(&fs_info->subvol_srcu, index); |
5670 | ret = -EPERM; | 5685 | ret = -EPERM; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9601d25a4607..4662d92a4b73 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -511,7 +511,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
511 | } else if (compress) { | 511 | } else if (compress) { |
512 | if (!btrfs_test_opt(root, COMPRESS)) | 512 | if (!btrfs_test_opt(root, COMPRESS)) |
513 | btrfs_info(root->fs_info, | 513 | btrfs_info(root->fs_info, |
514 | "btrfs: use %s compression\n", | 514 | "btrfs: use %s compression", |
515 | compress_type); | 515 | compress_type); |
516 | } | 516 | } |
517 | break; | 517 | break; |
@@ -580,8 +580,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
580 | } | 580 | } |
581 | break; | 581 | break; |
582 | case Opt_acl: | 582 | case Opt_acl: |
583 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | ||
583 | root->fs_info->sb->s_flags |= MS_POSIXACL; | 584 | root->fs_info->sb->s_flags |= MS_POSIXACL; |
584 | break; | 585 | break; |
586 | #else | ||
587 | btrfs_err(root->fs_info, | ||
588 | "support for ACL not compiled in!"); | ||
589 | ret = -EINVAL; | ||
590 | goto out; | ||
591 | #endif | ||
585 | case Opt_noacl: | 592 | case Opt_noacl: |
586 | root->fs_info->sb->s_flags &= ~MS_POSIXACL; | 593 | root->fs_info->sb->s_flags &= ~MS_POSIXACL; |
587 | break; | 594 | break; |
@@ -1413,6 +1420,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
1413 | * this also happens on 'umount -rf' or on shutdown, when | 1420 | * this also happens on 'umount -rf' or on shutdown, when |
1414 | * the filesystem is busy. | 1421 | * the filesystem is busy. |
1415 | */ | 1422 | */ |
1423 | cancel_work_sync(&fs_info->async_reclaim_work); | ||
1416 | 1424 | ||
1417 | /* wait for the uuid_scan task to finish */ | 1425 | /* wait for the uuid_scan task to finish */ |
1418 | down(&fs_info->uuid_tree_rescan_sem); | 1426 | down(&fs_info->uuid_tree_rescan_sem); |
@@ -1894,6 +1902,9 @@ static int btrfs_run_sanity_tests(void) | |||
1894 | if (ret) | 1902 | if (ret) |
1895 | goto out; | 1903 | goto out; |
1896 | ret = btrfs_test_inodes(); | 1904 | ret = btrfs_test_inodes(); |
1905 | if (ret) | ||
1906 | goto out; | ||
1907 | ret = btrfs_test_qgroups(); | ||
1897 | out: | 1908 | out: |
1898 | btrfs_destroy_test_fs(); | 1909 | btrfs_destroy_test_fs(); |
1899 | return ret; | 1910 | return ret; |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index c5eb2143dc66..df39458f1487 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -254,6 +254,7 @@ static ssize_t global_rsv_reserved_show(struct kobject *kobj, | |||
254 | BTRFS_ATTR(global_rsv_reserved, 0444, global_rsv_reserved_show); | 254 | BTRFS_ATTR(global_rsv_reserved, 0444, global_rsv_reserved_show); |
255 | 255 | ||
256 | #define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj) | 256 | #define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj) |
257 | #define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj) | ||
257 | 258 | ||
258 | static ssize_t raid_bytes_show(struct kobject *kobj, | 259 | static ssize_t raid_bytes_show(struct kobject *kobj, |
259 | struct kobj_attribute *attr, char *buf); | 260 | struct kobj_attribute *attr, char *buf); |
@@ -266,7 +267,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj, | |||
266 | { | 267 | { |
267 | struct btrfs_space_info *sinfo = to_space_info(kobj->parent); | 268 | struct btrfs_space_info *sinfo = to_space_info(kobj->parent); |
268 | struct btrfs_block_group_cache *block_group; | 269 | struct btrfs_block_group_cache *block_group; |
269 | int index = kobj - sinfo->block_group_kobjs; | 270 | int index = to_raid_kobj(kobj)->raid_type; |
270 | u64 val = 0; | 271 | u64 val = 0; |
271 | 272 | ||
272 | down_read(&sinfo->groups_sem); | 273 | down_read(&sinfo->groups_sem); |
@@ -288,7 +289,7 @@ static struct attribute *raid_attributes[] = { | |||
288 | 289 | ||
289 | static void release_raid_kobj(struct kobject *kobj) | 290 | static void release_raid_kobj(struct kobject *kobj) |
290 | { | 291 | { |
291 | kobject_put(kobj->parent); | 292 | kfree(to_raid_kobj(kobj)); |
292 | } | 293 | } |
293 | 294 | ||
294 | struct kobj_type btrfs_raid_ktype = { | 295 | struct kobj_type btrfs_raid_ktype = { |
@@ -374,11 +375,8 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
374 | struct btrfs_root *root = fs_info->fs_root; | 375 | struct btrfs_root *root = fs_info->fs_root; |
375 | int ret; | 376 | int ret; |
376 | 377 | ||
377 | if (len >= BTRFS_LABEL_SIZE) { | 378 | if (len >= BTRFS_LABEL_SIZE) |
378 | pr_err("BTRFS: unable to set label with more than %d bytes\n", | ||
379 | BTRFS_LABEL_SIZE - 1); | ||
380 | return -EINVAL; | 379 | return -EINVAL; |
381 | } | ||
382 | 380 | ||
383 | trans = btrfs_start_transaction(root, 0); | 381 | trans = btrfs_start_transaction(root, 0); |
384 | if (IS_ERR(trans)) | 382 | if (IS_ERR(trans)) |
@@ -396,8 +394,48 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
396 | } | 394 | } |
397 | BTRFS_ATTR_RW(label, 0644, btrfs_label_show, btrfs_label_store); | 395 | BTRFS_ATTR_RW(label, 0644, btrfs_label_show, btrfs_label_store); |
398 | 396 | ||
397 | static ssize_t btrfs_no_store(struct kobject *kobj, | ||
398 | struct kobj_attribute *a, | ||
399 | const char *buf, size_t len) | ||
400 | { | ||
401 | return -EPERM; | ||
402 | } | ||
403 | |||
404 | static ssize_t btrfs_nodesize_show(struct kobject *kobj, | ||
405 | struct kobj_attribute *a, char *buf) | ||
406 | { | ||
407 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | ||
408 | |||
409 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize); | ||
410 | } | ||
411 | |||
412 | BTRFS_ATTR_RW(nodesize, 0444, btrfs_nodesize_show, btrfs_no_store); | ||
413 | |||
414 | static ssize_t btrfs_sectorsize_show(struct kobject *kobj, | ||
415 | struct kobj_attribute *a, char *buf) | ||
416 | { | ||
417 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | ||
418 | |||
419 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); | ||
420 | } | ||
421 | |||
422 | BTRFS_ATTR_RW(sectorsize, 0444, btrfs_sectorsize_show, btrfs_no_store); | ||
423 | |||
424 | static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, | ||
425 | struct kobj_attribute *a, char *buf) | ||
426 | { | ||
427 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | ||
428 | |||
429 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); | ||
430 | } | ||
431 | |||
432 | BTRFS_ATTR_RW(clone_alignment, 0444, btrfs_clone_alignment_show, btrfs_no_store); | ||
433 | |||
399 | static struct attribute *btrfs_attrs[] = { | 434 | static struct attribute *btrfs_attrs[] = { |
400 | BTRFS_ATTR_PTR(label), | 435 | BTRFS_ATTR_PTR(label), |
436 | BTRFS_ATTR_PTR(nodesize), | ||
437 | BTRFS_ATTR_PTR(sectorsize), | ||
438 | BTRFS_ATTR_PTR(clone_alignment), | ||
401 | NULL, | 439 | NULL, |
402 | }; | 440 | }; |
403 | 441 | ||
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 757ef00a75a4..a5dcacb5df9c 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c | |||
@@ -21,6 +21,9 @@ | |||
21 | #include <linux/magic.h> | 21 | #include <linux/magic.h> |
22 | #include "btrfs-tests.h" | 22 | #include "btrfs-tests.h" |
23 | #include "../ctree.h" | 23 | #include "../ctree.h" |
24 | #include "../volumes.h" | ||
25 | #include "../disk-io.h" | ||
26 | #include "../qgroup.h" | ||
24 | 27 | ||
25 | static struct vfsmount *test_mnt = NULL; | 28 | static struct vfsmount *test_mnt = NULL; |
26 | 29 | ||
@@ -72,3 +75,97 @@ void btrfs_destroy_test_fs(void) | |||
72 | kern_unmount(test_mnt); | 75 | kern_unmount(test_mnt); |
73 | unregister_filesystem(&test_type); | 76 | unregister_filesystem(&test_type); |
74 | } | 77 | } |
78 | |||
79 | struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void) | ||
80 | { | ||
81 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info), | ||
82 | GFP_NOFS); | ||
83 | |||
84 | if (!fs_info) | ||
85 | return fs_info; | ||
86 | fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices), | ||
87 | GFP_NOFS); | ||
88 | if (!fs_info->fs_devices) { | ||
89 | kfree(fs_info); | ||
90 | return NULL; | ||
91 | } | ||
92 | fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block), | ||
93 | GFP_NOFS); | ||
94 | if (!fs_info->super_copy) { | ||
95 | kfree(fs_info->fs_devices); | ||
96 | kfree(fs_info); | ||
97 | return NULL; | ||
98 | } | ||
99 | |||
100 | if (init_srcu_struct(&fs_info->subvol_srcu)) { | ||
101 | kfree(fs_info->fs_devices); | ||
102 | kfree(fs_info->super_copy); | ||
103 | kfree(fs_info); | ||
104 | return NULL; | ||
105 | } | ||
106 | |||
107 | spin_lock_init(&fs_info->buffer_lock); | ||
108 | spin_lock_init(&fs_info->qgroup_lock); | ||
109 | spin_lock_init(&fs_info->qgroup_op_lock); | ||
110 | spin_lock_init(&fs_info->super_lock); | ||
111 | spin_lock_init(&fs_info->fs_roots_radix_lock); | ||
112 | spin_lock_init(&fs_info->tree_mod_seq_lock); | ||
113 | mutex_init(&fs_info->qgroup_ioctl_lock); | ||
114 | mutex_init(&fs_info->qgroup_rescan_lock); | ||
115 | rwlock_init(&fs_info->tree_mod_log_lock); | ||
116 | fs_info->running_transaction = NULL; | ||
117 | fs_info->qgroup_tree = RB_ROOT; | ||
118 | fs_info->qgroup_ulist = NULL; | ||
119 | atomic64_set(&fs_info->tree_mod_seq, 0); | ||
120 | INIT_LIST_HEAD(&fs_info->dirty_qgroups); | ||
121 | INIT_LIST_HEAD(&fs_info->dead_roots); | ||
122 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); | ||
123 | INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); | ||
124 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | ||
125 | return fs_info; | ||
126 | } | ||
127 | |||
128 | static void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) | ||
129 | { | ||
130 | struct radix_tree_iter iter; | ||
131 | void **slot; | ||
132 | |||
133 | spin_lock(&fs_info->buffer_lock); | ||
134 | restart: | ||
135 | radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { | ||
136 | struct extent_buffer *eb; | ||
137 | |||
138 | eb = radix_tree_deref_slot(slot); | ||
139 | if (!eb) | ||
140 | continue; | ||
141 | /* Shouldn't happen but that kind of thinking creates CVE's */ | ||
142 | if (radix_tree_exception(eb)) { | ||
143 | if (radix_tree_deref_retry(eb)) | ||
144 | goto restart; | ||
145 | continue; | ||
146 | } | ||
147 | spin_unlock(&fs_info->buffer_lock); | ||
148 | free_extent_buffer_stale(eb); | ||
149 | spin_lock(&fs_info->buffer_lock); | ||
150 | } | ||
151 | spin_unlock(&fs_info->buffer_lock); | ||
152 | |||
153 | btrfs_free_qgroup_config(fs_info); | ||
154 | btrfs_free_fs_roots(fs_info); | ||
155 | cleanup_srcu_struct(&fs_info->subvol_srcu); | ||
156 | kfree(fs_info->super_copy); | ||
157 | kfree(fs_info->fs_devices); | ||
158 | kfree(fs_info); | ||
159 | } | ||
160 | |||
161 | void btrfs_free_dummy_root(struct btrfs_root *root) | ||
162 | { | ||
163 | if (!root) | ||
164 | return; | ||
165 | if (root->node) | ||
166 | free_extent_buffer(root->node); | ||
167 | if (root->fs_info) | ||
168 | btrfs_free_dummy_fs_info(root->fs_info); | ||
169 | kfree(root); | ||
170 | } | ||
171 | |||
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h index 312560a9123d..fd3954224480 100644 --- a/fs/btrfs/tests/btrfs-tests.h +++ b/fs/btrfs/tests/btrfs-tests.h | |||
@@ -23,13 +23,18 @@ | |||
23 | 23 | ||
24 | #define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__) | 24 | #define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__) |
25 | 25 | ||
26 | struct btrfs_root; | ||
27 | |||
26 | int btrfs_test_free_space_cache(void); | 28 | int btrfs_test_free_space_cache(void); |
27 | int btrfs_test_extent_buffer_operations(void); | 29 | int btrfs_test_extent_buffer_operations(void); |
28 | int btrfs_test_extent_io(void); | 30 | int btrfs_test_extent_io(void); |
29 | int btrfs_test_inodes(void); | 31 | int btrfs_test_inodes(void); |
32 | int btrfs_test_qgroups(void); | ||
30 | int btrfs_init_test_fs(void); | 33 | int btrfs_init_test_fs(void); |
31 | void btrfs_destroy_test_fs(void); | 34 | void btrfs_destroy_test_fs(void); |
32 | struct inode *btrfs_new_test_inode(void); | 35 | struct inode *btrfs_new_test_inode(void); |
36 | struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void); | ||
37 | void btrfs_free_dummy_root(struct btrfs_root *root); | ||
33 | #else | 38 | #else |
34 | static inline int btrfs_test_free_space_cache(void) | 39 | static inline int btrfs_test_free_space_cache(void) |
35 | { | 40 | { |
@@ -54,6 +59,10 @@ static inline int btrfs_test_inodes(void) | |||
54 | { | 59 | { |
55 | return 0; | 60 | return 0; |
56 | } | 61 | } |
62 | static inline int btrfs_test_qgroups(void) | ||
63 | { | ||
64 | return 0; | ||
65 | } | ||
57 | #endif | 66 | #endif |
58 | 67 | ||
59 | #endif | 68 | #endif |
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 397d1f99a8eb..3ae0f5b8bb80 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c | |||
@@ -23,33 +23,6 @@ | |||
23 | #include "../extent_io.h" | 23 | #include "../extent_io.h" |
24 | #include "../volumes.h" | 24 | #include "../volumes.h" |
25 | 25 | ||
26 | static struct btrfs_fs_info *alloc_dummy_fs_info(void) | ||
27 | { | ||
28 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info), | ||
29 | GFP_NOFS); | ||
30 | if (!fs_info) | ||
31 | return fs_info; | ||
32 | fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices), | ||
33 | GFP_NOFS); | ||
34 | if (!fs_info->fs_devices) { | ||
35 | kfree(fs_info); | ||
36 | return NULL; | ||
37 | } | ||
38 | return fs_info; | ||
39 | } | ||
40 | static void free_dummy_root(struct btrfs_root *root) | ||
41 | { | ||
42 | if (!root) | ||
43 | return; | ||
44 | if (root->fs_info) { | ||
45 | kfree(root->fs_info->fs_devices); | ||
46 | kfree(root->fs_info); | ||
47 | } | ||
48 | if (root->node) | ||
49 | free_extent_buffer(root->node); | ||
50 | kfree(root); | ||
51 | } | ||
52 | |||
53 | static void insert_extent(struct btrfs_root *root, u64 start, u64 len, | 26 | static void insert_extent(struct btrfs_root *root, u64 start, u64 len, |
54 | u64 ram_bytes, u64 offset, u64 disk_bytenr, | 27 | u64 ram_bytes, u64 offset, u64 disk_bytenr, |
55 | u64 disk_len, u32 type, u8 compression, int slot) | 28 | u64 disk_len, u32 type, u8 compression, int slot) |
@@ -276,7 +249,7 @@ static noinline int test_btrfs_get_extent(void) | |||
276 | * We do this since btrfs_get_extent wants to assign em->bdev to | 249 | * We do this since btrfs_get_extent wants to assign em->bdev to |
277 | * root->fs_info->fs_devices->latest_bdev. | 250 | * root->fs_info->fs_devices->latest_bdev. |
278 | */ | 251 | */ |
279 | root->fs_info = alloc_dummy_fs_info(); | 252 | root->fs_info = btrfs_alloc_dummy_fs_info(); |
280 | if (!root->fs_info) { | 253 | if (!root->fs_info) { |
281 | test_msg("Couldn't allocate dummy fs info\n"); | 254 | test_msg("Couldn't allocate dummy fs info\n"); |
282 | goto out; | 255 | goto out; |
@@ -837,7 +810,7 @@ out: | |||
837 | if (!IS_ERR(em)) | 810 | if (!IS_ERR(em)) |
838 | free_extent_map(em); | 811 | free_extent_map(em); |
839 | iput(inode); | 812 | iput(inode); |
840 | free_dummy_root(root); | 813 | btrfs_free_dummy_root(root); |
841 | return ret; | 814 | return ret; |
842 | } | 815 | } |
843 | 816 | ||
@@ -864,7 +837,7 @@ static int test_hole_first(void) | |||
864 | goto out; | 837 | goto out; |
865 | } | 838 | } |
866 | 839 | ||
867 | root->fs_info = alloc_dummy_fs_info(); | 840 | root->fs_info = btrfs_alloc_dummy_fs_info(); |
868 | if (!root->fs_info) { | 841 | if (!root->fs_info) { |
869 | test_msg("Couldn't allocate dummy fs info\n"); | 842 | test_msg("Couldn't allocate dummy fs info\n"); |
870 | goto out; | 843 | goto out; |
@@ -934,7 +907,7 @@ out: | |||
934 | if (!IS_ERR(em)) | 907 | if (!IS_ERR(em)) |
935 | free_extent_map(em); | 908 | free_extent_map(em); |
936 | iput(inode); | 909 | iput(inode); |
937 | free_dummy_root(root); | 910 | btrfs_free_dummy_root(root); |
938 | return ret; | 911 | return ret; |
939 | } | 912 | } |
940 | 913 | ||
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c new file mode 100644 index 000000000000..fa691b754aaf --- /dev/null +++ b/fs/btrfs/tests/qgroup-tests.c | |||
@@ -0,0 +1,468 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2013 Facebook. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include "btrfs-tests.h" | ||
20 | #include "../ctree.h" | ||
21 | #include "../transaction.h" | ||
22 | #include "../disk-io.h" | ||
23 | #include "../qgroup.h" | ||
24 | |||
25 | static void init_dummy_trans(struct btrfs_trans_handle *trans) | ||
26 | { | ||
27 | memset(trans, 0, sizeof(*trans)); | ||
28 | trans->transid = 1; | ||
29 | INIT_LIST_HEAD(&trans->qgroup_ref_list); | ||
30 | trans->type = __TRANS_DUMMY; | ||
31 | } | ||
32 | |||
33 | static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr, | ||
34 | u64 num_bytes, u64 parent, u64 root_objectid) | ||
35 | { | ||
36 | struct btrfs_trans_handle trans; | ||
37 | struct btrfs_extent_item *item; | ||
38 | struct btrfs_extent_inline_ref *iref; | ||
39 | struct btrfs_tree_block_info *block_info; | ||
40 | struct btrfs_path *path; | ||
41 | struct extent_buffer *leaf; | ||
42 | struct btrfs_key ins; | ||
43 | u32 size = sizeof(*item) + sizeof(*iref) + sizeof(*block_info); | ||
44 | int ret; | ||
45 | |||
46 | init_dummy_trans(&trans); | ||
47 | |||
48 | ins.objectid = bytenr; | ||
49 | ins.type = BTRFS_EXTENT_ITEM_KEY; | ||
50 | ins.offset = num_bytes; | ||
51 | |||
52 | path = btrfs_alloc_path(); | ||
53 | if (!path) { | ||
54 | test_msg("Couldn't allocate path\n"); | ||
55 | return -ENOMEM; | ||
56 | } | ||
57 | |||
58 | path->leave_spinning = 1; | ||
59 | ret = btrfs_insert_empty_item(&trans, root, path, &ins, size); | ||
60 | if (ret) { | ||
61 | test_msg("Couldn't insert ref %d\n", ret); | ||
62 | btrfs_free_path(path); | ||
63 | return ret; | ||
64 | } | ||
65 | |||
66 | leaf = path->nodes[0]; | ||
67 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | ||
68 | btrfs_set_extent_refs(leaf, item, 1); | ||
69 | btrfs_set_extent_generation(leaf, item, 1); | ||
70 | btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_TREE_BLOCK); | ||
71 | block_info = (struct btrfs_tree_block_info *)(item + 1); | ||
72 | btrfs_set_tree_block_level(leaf, block_info, 1); | ||
73 | iref = (struct btrfs_extent_inline_ref *)(block_info + 1); | ||
74 | if (parent > 0) { | ||
75 | btrfs_set_extent_inline_ref_type(leaf, iref, | ||
76 | BTRFS_SHARED_BLOCK_REF_KEY); | ||
77 | btrfs_set_extent_inline_ref_offset(leaf, iref, parent); | ||
78 | } else { | ||
79 | btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY); | ||
80 | btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); | ||
81 | } | ||
82 | btrfs_free_path(path); | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes, | ||
87 | u64 parent, u64 root_objectid) | ||
88 | { | ||
89 | struct btrfs_trans_handle trans; | ||
90 | struct btrfs_extent_item *item; | ||
91 | struct btrfs_path *path; | ||
92 | struct btrfs_key key; | ||
93 | u64 refs; | ||
94 | int ret; | ||
95 | |||
96 | init_dummy_trans(&trans); | ||
97 | |||
98 | key.objectid = bytenr; | ||
99 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
100 | key.offset = num_bytes; | ||
101 | |||
102 | path = btrfs_alloc_path(); | ||
103 | if (!path) { | ||
104 | test_msg("Couldn't allocate path\n"); | ||
105 | return -ENOMEM; | ||
106 | } | ||
107 | |||
108 | path->leave_spinning = 1; | ||
109 | ret = btrfs_search_slot(&trans, root, &key, path, 0, 1); | ||
110 | if (ret) { | ||
111 | test_msg("Couldn't find extent ref\n"); | ||
112 | btrfs_free_path(path); | ||
113 | return ret; | ||
114 | } | ||
115 | |||
116 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
117 | struct btrfs_extent_item); | ||
118 | refs = btrfs_extent_refs(path->nodes[0], item); | ||
119 | btrfs_set_extent_refs(path->nodes[0], item, refs + 1); | ||
120 | btrfs_release_path(path); | ||
121 | |||
122 | key.objectid = bytenr; | ||
123 | if (parent) { | ||
124 | key.type = BTRFS_SHARED_BLOCK_REF_KEY; | ||
125 | key.offset = parent; | ||
126 | } else { | ||
127 | key.type = BTRFS_TREE_BLOCK_REF_KEY; | ||
128 | key.offset = root_objectid; | ||
129 | } | ||
130 | |||
131 | ret = btrfs_insert_empty_item(&trans, root, path, &key, 0); | ||
132 | if (ret) | ||
133 | test_msg("Failed to insert backref\n"); | ||
134 | btrfs_free_path(path); | ||
135 | return ret; | ||
136 | } | ||
137 | |||
138 | static int remove_extent_item(struct btrfs_root *root, u64 bytenr, | ||
139 | u64 num_bytes) | ||
140 | { | ||
141 | struct btrfs_trans_handle trans; | ||
142 | struct btrfs_key key; | ||
143 | struct btrfs_path *path; | ||
144 | int ret; | ||
145 | |||
146 | init_dummy_trans(&trans); | ||
147 | |||
148 | key.objectid = bytenr; | ||
149 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
150 | key.offset = num_bytes; | ||
151 | |||
152 | path = btrfs_alloc_path(); | ||
153 | if (!path) { | ||
154 | test_msg("Couldn't allocate path\n"); | ||
155 | return -ENOMEM; | ||
156 | } | ||
157 | path->leave_spinning = 1; | ||
158 | |||
159 | ret = btrfs_search_slot(&trans, root, &key, path, -1, 1); | ||
160 | if (ret) { | ||
161 | test_msg("Didn't find our key %d\n", ret); | ||
162 | btrfs_free_path(path); | ||
163 | return ret; | ||
164 | } | ||
165 | btrfs_del_item(&trans, root, path); | ||
166 | btrfs_free_path(path); | ||
167 | return 0; | ||
168 | } | ||
169 | |||
170 | static int remove_extent_ref(struct btrfs_root *root, u64 bytenr, | ||
171 | u64 num_bytes, u64 parent, u64 root_objectid) | ||
172 | { | ||
173 | struct btrfs_trans_handle trans; | ||
174 | struct btrfs_extent_item *item; | ||
175 | struct btrfs_path *path; | ||
176 | struct btrfs_key key; | ||
177 | u64 refs; | ||
178 | int ret; | ||
179 | |||
180 | init_dummy_trans(&trans); | ||
181 | |||
182 | key.objectid = bytenr; | ||
183 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
184 | key.offset = num_bytes; | ||
185 | |||
186 | path = btrfs_alloc_path(); | ||
187 | if (!path) { | ||
188 | test_msg("Couldn't allocate path\n"); | ||
189 | return -ENOMEM; | ||
190 | } | ||
191 | |||
192 | path->leave_spinning = 1; | ||
193 | ret = btrfs_search_slot(&trans, root, &key, path, 0, 1); | ||
194 | if (ret) { | ||
195 | test_msg("Couldn't find extent ref\n"); | ||
196 | btrfs_free_path(path); | ||
197 | return ret; | ||
198 | } | ||
199 | |||
200 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
201 | struct btrfs_extent_item); | ||
202 | refs = btrfs_extent_refs(path->nodes[0], item); | ||
203 | btrfs_set_extent_refs(path->nodes[0], item, refs - 1); | ||
204 | btrfs_release_path(path); | ||
205 | |||
206 | key.objectid = bytenr; | ||
207 | if (parent) { | ||
208 | key.type = BTRFS_SHARED_BLOCK_REF_KEY; | ||
209 | key.offset = parent; | ||
210 | } else { | ||
211 | key.type = BTRFS_TREE_BLOCK_REF_KEY; | ||
212 | key.offset = root_objectid; | ||
213 | } | ||
214 | |||
215 | ret = btrfs_search_slot(&trans, root, &key, path, -1, 1); | ||
216 | if (ret) { | ||
217 | test_msg("Couldn't find backref %d\n", ret); | ||
218 | btrfs_free_path(path); | ||
219 | return ret; | ||
220 | } | ||
221 | btrfs_del_item(&trans, root, path); | ||
222 | btrfs_free_path(path); | ||
223 | return ret; | ||
224 | } | ||
225 | |||
226 | static int test_no_shared_qgroup(struct btrfs_root *root) | ||
227 | { | ||
228 | struct btrfs_trans_handle trans; | ||
229 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
230 | int ret; | ||
231 | |||
232 | init_dummy_trans(&trans); | ||
233 | |||
234 | test_msg("Qgroup basic add\n"); | ||
235 | ret = btrfs_create_qgroup(NULL, fs_info, 5, NULL); | ||
236 | if (ret) { | ||
237 | test_msg("Couldn't create a qgroup %d\n", ret); | ||
238 | return ret; | ||
239 | } | ||
240 | |||
241 | ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096, | ||
242 | BTRFS_QGROUP_OPER_ADD_EXCL, 0); | ||
243 | if (ret) { | ||
244 | test_msg("Couldn't add space to a qgroup %d\n", ret); | ||
245 | return ret; | ||
246 | } | ||
247 | |||
248 | ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5); | ||
249 | if (ret) | ||
250 | return ret; | ||
251 | |||
252 | ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); | ||
253 | if (ret) { | ||
254 | test_msg("Delayed qgroup accounting failed %d\n", ret); | ||
255 | return ret; | ||
256 | } | ||
257 | |||
258 | if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { | ||
259 | test_msg("Qgroup counts didn't match expected values\n"); | ||
260 | return -EINVAL; | ||
261 | } | ||
262 | |||
263 | ret = remove_extent_item(root, 4096, 4096); | ||
264 | if (ret) | ||
265 | return -EINVAL; | ||
266 | |||
267 | ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096, | ||
268 | BTRFS_QGROUP_OPER_SUB_EXCL, 0); | ||
269 | if (ret) { | ||
270 | test_msg("Couldn't remove space from the qgroup %d\n", ret); | ||
271 | return -EINVAL; | ||
272 | } | ||
273 | |||
274 | ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); | ||
275 | if (ret) { | ||
276 | test_msg("Qgroup accounting failed %d\n", ret); | ||
277 | return -EINVAL; | ||
278 | } | ||
279 | |||
280 | if (btrfs_verify_qgroup_counts(fs_info, 5, 0, 0)) { | ||
281 | test_msg("Qgroup counts didn't match expected values\n"); | ||
282 | return -EINVAL; | ||
283 | } | ||
284 | |||
285 | return 0; | ||
286 | } | ||
287 | |||
288 | /* | ||
289 | * Add a ref for two different roots to make sure the shared value comes out | ||
290 | * right, also remove one of the roots and make sure the exclusive count is | ||
291 | * adjusted properly. | ||
292 | */ | ||
293 | static int test_multiple_refs(struct btrfs_root *root) | ||
294 | { | ||
295 | struct btrfs_trans_handle trans; | ||
296 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
297 | int ret; | ||
298 | |||
299 | init_dummy_trans(&trans); | ||
300 | |||
301 | test_msg("Qgroup multiple refs test\n"); | ||
302 | |||
303 | /* We have 5 created already from the previous test */ | ||
304 | ret = btrfs_create_qgroup(NULL, fs_info, 256, NULL); | ||
305 | if (ret) { | ||
306 | test_msg("Couldn't create a qgroup %d\n", ret); | ||
307 | return ret; | ||
308 | } | ||
309 | |||
310 | ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5); | ||
311 | if (ret) | ||
312 | return ret; | ||
313 | |||
314 | ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096, | ||
315 | BTRFS_QGROUP_OPER_ADD_EXCL, 0); | ||
316 | if (ret) { | ||
317 | test_msg("Couldn't add space to a qgroup %d\n", ret); | ||
318 | return ret; | ||
319 | } | ||
320 | |||
321 | ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); | ||
322 | if (ret) { | ||
323 | test_msg("Delayed qgroup accounting failed %d\n", ret); | ||
324 | return ret; | ||
325 | } | ||
326 | |||
327 | if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { | ||
328 | test_msg("Qgroup counts didn't match expected values\n"); | ||
329 | return -EINVAL; | ||
330 | } | ||
331 | |||
332 | ret = add_tree_ref(root, 4096, 4096, 0, 256); | ||
333 | if (ret) | ||
334 | return ret; | ||
335 | |||
336 | ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096, | ||
337 | BTRFS_QGROUP_OPER_ADD_SHARED, 0); | ||
338 | if (ret) { | ||
339 | test_msg("Qgroup record ref failed %d\n", ret); | ||
340 | return ret; | ||
341 | } | ||
342 | |||
343 | ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); | ||
344 | if (ret) { | ||
345 | test_msg("Qgroup accounting failed %d\n", ret); | ||
346 | return ret; | ||
347 | } | ||
348 | |||
349 | if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 0)) { | ||
350 | test_msg("Qgroup counts didn't match expected values\n"); | ||
351 | return -EINVAL; | ||
352 | } | ||
353 | |||
354 | if (btrfs_verify_qgroup_counts(fs_info, 256, 4096, 0)) { | ||
355 | test_msg("Qgroup counts didn't match expected values\n"); | ||
356 | return -EINVAL; | ||
357 | } | ||
358 | |||
359 | ret = remove_extent_ref(root, 4096, 4096, 0, 256); | ||
360 | if (ret) | ||
361 | return ret; | ||
362 | |||
363 | ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096, | ||
364 | BTRFS_QGROUP_OPER_SUB_SHARED, 0); | ||
365 | if (ret) { | ||
366 | test_msg("Qgroup record ref failed %d\n", ret); | ||
367 | return ret; | ||
368 | } | ||
369 | |||
370 | ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); | ||
371 | if (ret) { | ||
372 | test_msg("Qgroup accounting failed %d\n", ret); | ||
373 | return ret; | ||
374 | } | ||
375 | |||
376 | if (btrfs_verify_qgroup_counts(fs_info, 256, 0, 0)) { | ||
377 | test_msg("Qgroup counts didn't match expected values\n"); | ||
378 | return -EINVAL; | ||
379 | } | ||
380 | |||
381 | if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { | ||
382 | test_msg("Qgroup counts didn't match expected values\n"); | ||
383 | return -EINVAL; | ||
384 | } | ||
385 | |||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | int btrfs_test_qgroups(void) | ||
390 | { | ||
391 | struct btrfs_root *root; | ||
392 | struct btrfs_root *tmp_root; | ||
393 | int ret = 0; | ||
394 | |||
395 | root = btrfs_alloc_dummy_root(); | ||
396 | if (IS_ERR(root)) { | ||
397 | test_msg("Couldn't allocate root\n"); | ||
398 | return PTR_ERR(root); | ||
399 | } | ||
400 | |||
401 | root->fs_info = btrfs_alloc_dummy_fs_info(); | ||
402 | if (!root->fs_info) { | ||
403 | test_msg("Couldn't allocate dummy fs info\n"); | ||
404 | ret = -ENOMEM; | ||
405 | goto out; | ||
406 | } | ||
407 | |||
408 | /* | ||
409 | * Can't use bytenr 0, some things freak out | ||
410 | * *cough*backref walking code*cough* | ||
411 | */ | ||
412 | root->node = alloc_test_extent_buffer(root->fs_info, 4096, 4096); | ||
413 | if (!root->node) { | ||
414 | test_msg("Couldn't allocate dummy buffer\n"); | ||
415 | ret = -ENOMEM; | ||
416 | goto out; | ||
417 | } | ||
418 | root->alloc_bytenr += 8192; | ||
419 | |||
420 | tmp_root = btrfs_alloc_dummy_root(); | ||
421 | if (IS_ERR(tmp_root)) { | ||
422 | test_msg("Couldn't allocate a fs root\n"); | ||
423 | ret = PTR_ERR(tmp_root); | ||
424 | goto out; | ||
425 | } | ||
426 | |||
427 | tmp_root->root_key.objectid = 5; | ||
428 | root->fs_info->fs_root = tmp_root; | ||
429 | ret = btrfs_insert_fs_root(root->fs_info, tmp_root); | ||
430 | if (ret) { | ||
431 | test_msg("Couldn't insert fs root %d\n", ret); | ||
432 | goto out; | ||
433 | } | ||
434 | |||
435 | tmp_root = btrfs_alloc_dummy_root(); | ||
436 | if (IS_ERR(tmp_root)) { | ||
437 | test_msg("Couldn't allocate a fs root\n"); | ||
438 | ret = PTR_ERR(tmp_root); | ||
439 | goto out; | ||
440 | } | ||
441 | |||
442 | tmp_root->root_key.objectid = 256; | ||
443 | ret = btrfs_insert_fs_root(root->fs_info, tmp_root); | ||
444 | if (ret) { | ||
445 | test_msg("Couldn't insert fs root %d\n", ret); | ||
446 | goto out; | ||
447 | } | ||
448 | |||
449 | /* We are using this root as our extent root */ | ||
450 | root->fs_info->extent_root = root; | ||
451 | |||
452 | /* | ||
453 | * Some of the paths we test assume we have a filled out fs_info, so we | ||
454 | * just need to addt he root in there so we don't panic. | ||
455 | */ | ||
456 | root->fs_info->tree_root = root; | ||
457 | root->fs_info->quota_root = root; | ||
458 | root->fs_info->quota_enabled = 1; | ||
459 | |||
460 | test_msg("Running qgroup tests\n"); | ||
461 | ret = test_no_shared_qgroup(root); | ||
462 | if (ret) | ||
463 | goto out; | ||
464 | ret = test_multiple_refs(root); | ||
465 | out: | ||
466 | btrfs_free_dummy_root(root); | ||
467 | return ret; | ||
468 | } | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7579f6d0b854..9630f10f8e1e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include "inode-map.h" | 31 | #include "inode-map.h" |
32 | #include "volumes.h" | 32 | #include "volumes.h" |
33 | #include "dev-replace.h" | 33 | #include "dev-replace.h" |
34 | #include "qgroup.h" | ||
34 | 35 | ||
35 | #define BTRFS_ROOT_TRANS_TAG 0 | 36 | #define BTRFS_ROOT_TRANS_TAG 0 |
36 | 37 | ||
@@ -241,18 +242,19 @@ loop: | |||
241 | static int record_root_in_trans(struct btrfs_trans_handle *trans, | 242 | static int record_root_in_trans(struct btrfs_trans_handle *trans, |
242 | struct btrfs_root *root) | 243 | struct btrfs_root *root) |
243 | { | 244 | { |
244 | if (root->ref_cows && root->last_trans < trans->transid) { | 245 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
246 | root->last_trans < trans->transid) { | ||
245 | WARN_ON(root == root->fs_info->extent_root); | 247 | WARN_ON(root == root->fs_info->extent_root); |
246 | WARN_ON(root->commit_root != root->node); | 248 | WARN_ON(root->commit_root != root->node); |
247 | 249 | ||
248 | /* | 250 | /* |
249 | * see below for in_trans_setup usage rules | 251 | * see below for IN_TRANS_SETUP usage rules |
250 | * we have the reloc mutex held now, so there | 252 | * we have the reloc mutex held now, so there |
251 | * is only one writer in this function | 253 | * is only one writer in this function |
252 | */ | 254 | */ |
253 | root->in_trans_setup = 1; | 255 | set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state); |
254 | 256 | ||
255 | /* make sure readers find in_trans_setup before | 257 | /* make sure readers find IN_TRANS_SETUP before |
256 | * they find our root->last_trans update | 258 | * they find our root->last_trans update |
257 | */ | 259 | */ |
258 | smp_wmb(); | 260 | smp_wmb(); |
@@ -279,7 +281,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, | |||
279 | * But, we have to set root->last_trans before we | 281 | * But, we have to set root->last_trans before we |
280 | * init the relocation root, otherwise, we trip over warnings | 282 | * init the relocation root, otherwise, we trip over warnings |
281 | * in ctree.c. The solution used here is to flag ourselves | 283 | * in ctree.c. The solution used here is to flag ourselves |
282 | * with root->in_trans_setup. When this is 1, we're still | 284 | * with root IN_TRANS_SETUP. When this is 1, we're still |
283 | * fixing up the reloc trees and everyone must wait. | 285 | * fixing up the reloc trees and everyone must wait. |
284 | * | 286 | * |
285 | * When this is zero, they can trust root->last_trans and fly | 287 | * When this is zero, they can trust root->last_trans and fly |
@@ -288,8 +290,8 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, | |||
288 | * done before we pop in the zero below | 290 | * done before we pop in the zero below |
289 | */ | 291 | */ |
290 | btrfs_init_reloc_root(trans, root); | 292 | btrfs_init_reloc_root(trans, root); |
291 | smp_wmb(); | 293 | smp_mb__before_atomic(); |
292 | root->in_trans_setup = 0; | 294 | clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state); |
293 | } | 295 | } |
294 | return 0; | 296 | return 0; |
295 | } | 297 | } |
@@ -298,16 +300,16 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, | |||
298 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 300 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
299 | struct btrfs_root *root) | 301 | struct btrfs_root *root) |
300 | { | 302 | { |
301 | if (!root->ref_cows) | 303 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
302 | return 0; | 304 | return 0; |
303 | 305 | ||
304 | /* | 306 | /* |
305 | * see record_root_in_trans for comments about in_trans_setup usage | 307 | * see record_root_in_trans for comments about IN_TRANS_SETUP usage |
306 | * and barriers | 308 | * and barriers |
307 | */ | 309 | */ |
308 | smp_rmb(); | 310 | smp_rmb(); |
309 | if (root->last_trans == trans->transid && | 311 | if (root->last_trans == trans->transid && |
310 | !root->in_trans_setup) | 312 | !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state)) |
311 | return 0; | 313 | return 0; |
312 | 314 | ||
313 | mutex_lock(&root->fs_info->reloc_mutex); | 315 | mutex_lock(&root->fs_info->reloc_mutex); |
@@ -365,7 +367,7 @@ static int may_wait_transaction(struct btrfs_root *root, int type) | |||
365 | static inline bool need_reserve_reloc_root(struct btrfs_root *root) | 367 | static inline bool need_reserve_reloc_root(struct btrfs_root *root) |
366 | { | 368 | { |
367 | if (!root->fs_info->reloc_ctl || | 369 | if (!root->fs_info->reloc_ctl || |
368 | !root->ref_cows || | 370 | !test_bit(BTRFS_ROOT_REF_COWS, &root->state) || |
369 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || | 371 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || |
370 | root->reloc_root) | 372 | root->reloc_root) |
371 | return false; | 373 | return false; |
@@ -695,6 +697,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
695 | unsigned long cur = trans->delayed_ref_updates; | 697 | unsigned long cur = trans->delayed_ref_updates; |
696 | int lock = (trans->type != TRANS_JOIN_NOLOCK); | 698 | int lock = (trans->type != TRANS_JOIN_NOLOCK); |
697 | int err = 0; | 699 | int err = 0; |
700 | int must_run_delayed_refs = 0; | ||
698 | 701 | ||
699 | if (trans->use_count > 1) { | 702 | if (trans->use_count > 1) { |
700 | trans->use_count--; | 703 | trans->use_count--; |
@@ -702,14 +705,27 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
702 | return 0; | 705 | return 0; |
703 | } | 706 | } |
704 | 707 | ||
705 | /* | ||
706 | * do the qgroup accounting as early as possible | ||
707 | */ | ||
708 | err = btrfs_delayed_refs_qgroup_accounting(trans, info); | ||
709 | |||
710 | btrfs_trans_release_metadata(trans, root); | 708 | btrfs_trans_release_metadata(trans, root); |
711 | trans->block_rsv = NULL; | 709 | trans->block_rsv = NULL; |
712 | 710 | ||
711 | if (!list_empty(&trans->new_bgs)) | ||
712 | btrfs_create_pending_block_groups(trans, root); | ||
713 | |||
714 | trans->delayed_ref_updates = 0; | ||
715 | if (!trans->sync) { | ||
716 | must_run_delayed_refs = | ||
717 | btrfs_should_throttle_delayed_refs(trans, root); | ||
718 | cur = max_t(unsigned long, cur, 32); | ||
719 | |||
720 | /* | ||
721 | * don't make the caller wait if they are from a NOLOCK | ||
722 | * or ATTACH transaction, it will deadlock with commit | ||
723 | */ | ||
724 | if (must_run_delayed_refs == 1 && | ||
725 | (trans->type & (__TRANS_JOIN_NOLOCK | __TRANS_ATTACH))) | ||
726 | must_run_delayed_refs = 2; | ||
727 | } | ||
728 | |||
713 | if (trans->qgroup_reserved) { | 729 | if (trans->qgroup_reserved) { |
714 | /* | 730 | /* |
715 | * the same root has to be passed here between start_transaction | 731 | * the same root has to be passed here between start_transaction |
@@ -719,16 +735,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
719 | trans->qgroup_reserved = 0; | 735 | trans->qgroup_reserved = 0; |
720 | } | 736 | } |
721 | 737 | ||
722 | if (!list_empty(&trans->new_bgs)) | ||
723 | btrfs_create_pending_block_groups(trans, root); | ||
724 | |||
725 | trans->delayed_ref_updates = 0; | ||
726 | if (!trans->sync && btrfs_should_throttle_delayed_refs(trans, root)) { | ||
727 | cur = max_t(unsigned long, cur, 32); | ||
728 | trans->delayed_ref_updates = 0; | ||
729 | btrfs_run_delayed_refs(trans, root, cur); | ||
730 | } | ||
731 | |||
732 | btrfs_trans_release_metadata(trans, root); | 738 | btrfs_trans_release_metadata(trans, root); |
733 | trans->block_rsv = NULL; | 739 | trans->block_rsv = NULL; |
734 | 740 | ||
@@ -778,6 +784,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
778 | assert_qgroups_uptodate(trans); | 784 | assert_qgroups_uptodate(trans); |
779 | 785 | ||
780 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 786 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
787 | if (must_run_delayed_refs) { | ||
788 | btrfs_async_run_delayed_refs(root, cur, | ||
789 | must_run_delayed_refs == 1); | ||
790 | } | ||
781 | return err; | 791 | return err; |
782 | } | 792 | } |
783 | 793 | ||
@@ -1049,8 +1059,8 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
1049 | btrfs_save_ino_cache(root, trans); | 1059 | btrfs_save_ino_cache(root, trans); |
1050 | 1060 | ||
1051 | /* see comments in should_cow_block() */ | 1061 | /* see comments in should_cow_block() */ |
1052 | root->force_cow = 0; | 1062 | clear_bit(BTRFS_ROOT_FORCE_COW, &root->state); |
1053 | smp_wmb(); | 1063 | smp_mb__after_atomic(); |
1054 | 1064 | ||
1055 | if (root->commit_root != root->node) { | 1065 | if (root->commit_root != root->node) { |
1056 | list_add_tail(&root->dirty_list, | 1066 | list_add_tail(&root->dirty_list, |
@@ -1081,7 +1091,7 @@ int btrfs_defrag_root(struct btrfs_root *root) | |||
1081 | struct btrfs_trans_handle *trans; | 1091 | struct btrfs_trans_handle *trans; |
1082 | int ret; | 1092 | int ret; |
1083 | 1093 | ||
1084 | if (xchg(&root->defrag_running, 1)) | 1094 | if (test_and_set_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state)) |
1085 | return 0; | 1095 | return 0; |
1086 | 1096 | ||
1087 | while (1) { | 1097 | while (1) { |
@@ -1104,7 +1114,7 @@ int btrfs_defrag_root(struct btrfs_root *root) | |||
1104 | break; | 1114 | break; |
1105 | } | 1115 | } |
1106 | } | 1116 | } |
1107 | root->defrag_running = 0; | 1117 | clear_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state); |
1108 | return ret; | 1118 | return ret; |
1109 | } | 1119 | } |
1110 | 1120 | ||
@@ -1168,12 +1178,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1168 | goto no_free_objectid; | 1178 | goto no_free_objectid; |
1169 | } | 1179 | } |
1170 | 1180 | ||
1171 | pending->error = btrfs_qgroup_inherit(trans, fs_info, | ||
1172 | root->root_key.objectid, | ||
1173 | objectid, pending->inherit); | ||
1174 | if (pending->error) | ||
1175 | goto no_free_objectid; | ||
1176 | |||
1177 | key.objectid = objectid; | 1181 | key.objectid = objectid; |
1178 | key.offset = (u64)-1; | 1182 | key.offset = (u64)-1; |
1179 | key.type = BTRFS_ROOT_ITEM_KEY; | 1183 | key.type = BTRFS_ROOT_ITEM_KEY; |
@@ -1270,8 +1274,24 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1270 | goto fail; | 1274 | goto fail; |
1271 | } | 1275 | } |
1272 | 1276 | ||
1277 | /* | ||
1278 | * We need to flush delayed refs in order to make sure all of our quota | ||
1279 | * operations have been done before we call btrfs_qgroup_inherit. | ||
1280 | */ | ||
1281 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
1282 | if (ret) { | ||
1283 | btrfs_abort_transaction(trans, root, ret); | ||
1284 | goto fail; | ||
1285 | } | ||
1286 | |||
1287 | pending->error = btrfs_qgroup_inherit(trans, fs_info, | ||
1288 | root->root_key.objectid, | ||
1289 | objectid, pending->inherit); | ||
1290 | if (pending->error) | ||
1291 | goto no_free_objectid; | ||
1292 | |||
1273 | /* see comments in should_cow_block() */ | 1293 | /* see comments in should_cow_block() */ |
1274 | root->force_cow = 1; | 1294 | set_bit(BTRFS_ROOT_FORCE_COW, &root->state); |
1275 | smp_wmb(); | 1295 | smp_wmb(); |
1276 | 1296 | ||
1277 | btrfs_set_root_node(new_root_item, tmp); | 1297 | btrfs_set_root_node(new_root_item, tmp); |
@@ -1598,12 +1618,6 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | |||
1598 | * them now so that they hinder processing of more delayed refs | 1618 | * them now so that they hinder processing of more delayed refs |
1599 | * as little as possible. | 1619 | * as little as possible. |
1600 | */ | 1620 | */ |
1601 | if (ret) { | ||
1602 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
1603 | return ret; | ||
1604 | } | ||
1605 | |||
1606 | ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
1607 | if (ret) | 1621 | if (ret) |
1608 | return ret; | 1622 | return ret; |
1609 | 1623 | ||
@@ -1984,19 +1998,6 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) | |||
1984 | } | 1998 | } |
1985 | root = list_first_entry(&fs_info->dead_roots, | 1999 | root = list_first_entry(&fs_info->dead_roots, |
1986 | struct btrfs_root, root_list); | 2000 | struct btrfs_root, root_list); |
1987 | /* | ||
1988 | * Make sure root is not involved in send, | ||
1989 | * if we fail with first root, we return | ||
1990 | * directly rather than continue. | ||
1991 | */ | ||
1992 | spin_lock(&root->root_item_lock); | ||
1993 | if (root->send_in_progress) { | ||
1994 | spin_unlock(&fs_info->trans_lock); | ||
1995 | spin_unlock(&root->root_item_lock); | ||
1996 | return 0; | ||
1997 | } | ||
1998 | spin_unlock(&root->root_item_lock); | ||
1999 | |||
2000 | list_del_init(&root->root_list); | 2001 | list_del_init(&root->root_list); |
2001 | spin_unlock(&fs_info->trans_lock); | 2002 | spin_unlock(&fs_info->trans_lock); |
2002 | 2003 | ||
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index b57b924e8e03..7dd558ed0716 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -69,6 +69,7 @@ struct btrfs_transaction { | |||
69 | #define __TRANS_ATTACH (1U << 10) | 69 | #define __TRANS_ATTACH (1U << 10) |
70 | #define __TRANS_JOIN (1U << 11) | 70 | #define __TRANS_JOIN (1U << 11) |
71 | #define __TRANS_JOIN_NOLOCK (1U << 12) | 71 | #define __TRANS_JOIN_NOLOCK (1U << 12) |
72 | #define __TRANS_DUMMY (1U << 13) | ||
72 | 73 | ||
73 | #define TRANS_USERSPACE (__TRANS_USERSPACE | __TRANS_FREEZABLE) | 74 | #define TRANS_USERSPACE (__TRANS_USERSPACE | __TRANS_FREEZABLE) |
74 | #define TRANS_START (__TRANS_START | __TRANS_FREEZABLE) | 75 | #define TRANS_START (__TRANS_START | __TRANS_FREEZABLE) |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 76928ca97741..a63719cc9578 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
@@ -49,7 +49,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
49 | goto out; | 49 | goto out; |
50 | } | 50 | } |
51 | 51 | ||
52 | if (root->ref_cows == 0) | 52 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
53 | goto out; | 53 | goto out; |
54 | 54 | ||
55 | if (btrfs_test_opt(root, SSD)) | 55 | if (btrfs_test_opt(root, SSD)) |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e2f45fc02610..9e1f2cd5e67a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -20,13 +20,11 @@ | |||
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/blkdev.h> | 21 | #include <linux/blkdev.h> |
22 | #include <linux/list_sort.h> | 22 | #include <linux/list_sort.h> |
23 | #include "ctree.h" | 23 | #include "tree-log.h" |
24 | #include "transaction.h" | ||
25 | #include "disk-io.h" | 24 | #include "disk-io.h" |
26 | #include "locking.h" | 25 | #include "locking.h" |
27 | #include "print-tree.h" | 26 | #include "print-tree.h" |
28 | #include "backref.h" | 27 | #include "backref.h" |
29 | #include "tree-log.h" | ||
30 | #include "hash.h" | 28 | #include "hash.h" |
31 | 29 | ||
32 | /* magic values for the inode_only field in btrfs_log_inode: | 30 | /* magic values for the inode_only field in btrfs_log_inode: |
@@ -144,17 +142,15 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
144 | 142 | ||
145 | mutex_lock(&root->log_mutex); | 143 | mutex_lock(&root->log_mutex); |
146 | if (root->log_root) { | 144 | if (root->log_root) { |
147 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == | 145 | if (btrfs_need_log_full_commit(root->fs_info, trans)) { |
148 | trans->transid) { | ||
149 | ret = -EAGAIN; | 146 | ret = -EAGAIN; |
150 | goto out; | 147 | goto out; |
151 | } | 148 | } |
152 | |||
153 | if (!root->log_start_pid) { | 149 | if (!root->log_start_pid) { |
154 | root->log_start_pid = current->pid; | 150 | root->log_start_pid = current->pid; |
155 | root->log_multiple_pids = false; | 151 | clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); |
156 | } else if (root->log_start_pid != current->pid) { | 152 | } else if (root->log_start_pid != current->pid) { |
157 | root->log_multiple_pids = true; | 153 | set_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); |
158 | } | 154 | } |
159 | 155 | ||
160 | atomic_inc(&root->log_batch); | 156 | atomic_inc(&root->log_batch); |
@@ -181,7 +177,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
181 | if (ret) | 177 | if (ret) |
182 | goto out; | 178 | goto out; |
183 | } | 179 | } |
184 | root->log_multiple_pids = false; | 180 | clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); |
185 | root->log_start_pid = current->pid; | 181 | root->log_start_pid = current->pid; |
186 | atomic_inc(&root->log_batch); | 182 | atomic_inc(&root->log_batch); |
187 | atomic_inc(&root->log_writers); | 183 | atomic_inc(&root->log_writers); |
@@ -2500,7 +2496,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2500 | while (1) { | 2496 | while (1) { |
2501 | int batch = atomic_read(&root->log_batch); | 2497 | int batch = atomic_read(&root->log_batch); |
2502 | /* when we're on an ssd, just kick the log commit out */ | 2498 | /* when we're on an ssd, just kick the log commit out */ |
2503 | if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { | 2499 | if (!btrfs_test_opt(root, SSD) && |
2500 | test_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state)) { | ||
2504 | mutex_unlock(&root->log_mutex); | 2501 | mutex_unlock(&root->log_mutex); |
2505 | schedule_timeout_uninterruptible(1); | 2502 | schedule_timeout_uninterruptible(1); |
2506 | mutex_lock(&root->log_mutex); | 2503 | mutex_lock(&root->log_mutex); |
@@ -2511,8 +2508,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2511 | } | 2508 | } |
2512 | 2509 | ||
2513 | /* bail out if we need to do a full commit */ | 2510 | /* bail out if we need to do a full commit */ |
2514 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == | 2511 | if (btrfs_need_log_full_commit(root->fs_info, trans)) { |
2515 | trans->transid) { | ||
2516 | ret = -EAGAIN; | 2512 | ret = -EAGAIN; |
2517 | btrfs_free_logged_extents(log, log_transid); | 2513 | btrfs_free_logged_extents(log, log_transid); |
2518 | mutex_unlock(&root->log_mutex); | 2514 | mutex_unlock(&root->log_mutex); |
@@ -2533,8 +2529,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2533 | blk_finish_plug(&plug); | 2529 | blk_finish_plug(&plug); |
2534 | btrfs_abort_transaction(trans, root, ret); | 2530 | btrfs_abort_transaction(trans, root, ret); |
2535 | btrfs_free_logged_extents(log, log_transid); | 2531 | btrfs_free_logged_extents(log, log_transid); |
2536 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | 2532 | btrfs_set_log_full_commit(root->fs_info, trans); |
2537 | trans->transid; | ||
2538 | mutex_unlock(&root->log_mutex); | 2533 | mutex_unlock(&root->log_mutex); |
2539 | goto out; | 2534 | goto out; |
2540 | } | 2535 | } |
@@ -2577,8 +2572,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2577 | list_del_init(&root_log_ctx.list); | 2572 | list_del_init(&root_log_ctx.list); |
2578 | 2573 | ||
2579 | blk_finish_plug(&plug); | 2574 | blk_finish_plug(&plug); |
2580 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | 2575 | btrfs_set_log_full_commit(root->fs_info, trans); |
2581 | trans->transid; | 2576 | |
2582 | if (ret != -ENOSPC) { | 2577 | if (ret != -ENOSPC) { |
2583 | btrfs_abort_transaction(trans, root, ret); | 2578 | btrfs_abort_transaction(trans, root, ret); |
2584 | mutex_unlock(&log_root_tree->log_mutex); | 2579 | mutex_unlock(&log_root_tree->log_mutex); |
@@ -2622,8 +2617,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2622 | * now that we've moved on to the tree of log tree roots, | 2617 | * now that we've moved on to the tree of log tree roots, |
2623 | * check the full commit flag again | 2618 | * check the full commit flag again |
2624 | */ | 2619 | */ |
2625 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == | 2620 | if (btrfs_need_log_full_commit(root->fs_info, trans)) { |
2626 | trans->transid) { | ||
2627 | blk_finish_plug(&plug); | 2621 | blk_finish_plug(&plug); |
2628 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2622 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2629 | btrfs_free_logged_extents(log, log_transid); | 2623 | btrfs_free_logged_extents(log, log_transid); |
@@ -2637,8 +2631,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2637 | EXTENT_DIRTY | EXTENT_NEW); | 2631 | EXTENT_DIRTY | EXTENT_NEW); |
2638 | blk_finish_plug(&plug); | 2632 | blk_finish_plug(&plug); |
2639 | if (ret) { | 2633 | if (ret) { |
2640 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | 2634 | btrfs_set_log_full_commit(root->fs_info, trans); |
2641 | trans->transid; | ||
2642 | btrfs_abort_transaction(trans, root, ret); | 2635 | btrfs_abort_transaction(trans, root, ret); |
2643 | btrfs_free_logged_extents(log, log_transid); | 2636 | btrfs_free_logged_extents(log, log_transid); |
2644 | mutex_unlock(&log_root_tree->log_mutex); | 2637 | mutex_unlock(&log_root_tree->log_mutex); |
@@ -2667,8 +2660,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2667 | */ | 2660 | */ |
2668 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); | 2661 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); |
2669 | if (ret) { | 2662 | if (ret) { |
2670 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | 2663 | btrfs_set_log_full_commit(root->fs_info, trans); |
2671 | trans->transid; | ||
2672 | btrfs_abort_transaction(trans, root, ret); | 2664 | btrfs_abort_transaction(trans, root, ret); |
2673 | goto out_wake_log_root; | 2665 | goto out_wake_log_root; |
2674 | } | 2666 | } |
@@ -2886,7 +2878,7 @@ fail: | |||
2886 | out_unlock: | 2878 | out_unlock: |
2887 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | 2879 | mutex_unlock(&BTRFS_I(dir)->log_mutex); |
2888 | if (ret == -ENOSPC) { | 2880 | if (ret == -ENOSPC) { |
2889 | root->fs_info->last_trans_log_full_commit = trans->transid; | 2881 | btrfs_set_log_full_commit(root->fs_info, trans); |
2890 | ret = 0; | 2882 | ret = 0; |
2891 | } else if (ret < 0) | 2883 | } else if (ret < 0) |
2892 | btrfs_abort_transaction(trans, root, ret); | 2884 | btrfs_abort_transaction(trans, root, ret); |
@@ -2919,7 +2911,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
2919 | dirid, &index); | 2911 | dirid, &index); |
2920 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2912 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2921 | if (ret == -ENOSPC) { | 2913 | if (ret == -ENOSPC) { |
2922 | root->fs_info->last_trans_log_full_commit = trans->transid; | 2914 | btrfs_set_log_full_commit(root->fs_info, trans); |
2923 | ret = 0; | 2915 | ret = 0; |
2924 | } else if (ret < 0 && ret != -ENOENT) | 2916 | } else if (ret < 0 && ret != -ENOENT) |
2925 | btrfs_abort_transaction(trans, root, ret); | 2917 | btrfs_abort_transaction(trans, root, ret); |
@@ -4130,8 +4122,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
4130 | * make sure any commits to the log are forced | 4122 | * make sure any commits to the log are forced |
4131 | * to be full commits | 4123 | * to be full commits |
4132 | */ | 4124 | */ |
4133 | root->fs_info->last_trans_log_full_commit = | 4125 | btrfs_set_log_full_commit(root->fs_info, trans); |
4134 | trans->transid; | ||
4135 | ret = 1; | 4126 | ret = 1; |
4136 | break; | 4127 | break; |
4137 | } | 4128 | } |
@@ -4177,6 +4168,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4177 | goto end_no_trans; | 4168 | goto end_no_trans; |
4178 | } | 4169 | } |
4179 | 4170 | ||
4171 | /* | ||
4172 | * The prev transaction commit doesn't complete, we need do | ||
4173 | * full commit by ourselves. | ||
4174 | */ | ||
4180 | if (root->fs_info->last_trans_log_full_commit > | 4175 | if (root->fs_info->last_trans_log_full_commit > |
4181 | root->fs_info->last_trans_committed) { | 4176 | root->fs_info->last_trans_committed) { |
4182 | ret = 1; | 4177 | ret = 1; |
@@ -4246,7 +4241,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4246 | end_trans: | 4241 | end_trans: |
4247 | dput(old_parent); | 4242 | dput(old_parent); |
4248 | if (ret < 0) { | 4243 | if (ret < 0) { |
4249 | root->fs_info->last_trans_log_full_commit = trans->transid; | 4244 | btrfs_set_log_full_commit(root->fs_info, trans); |
4250 | ret = 1; | 4245 | ret = 1; |
4251 | } | 4246 | } |
4252 | 4247 | ||
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 91b145fce333..7f5b41bd5373 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -19,6 +19,9 @@ | |||
19 | #ifndef __TREE_LOG_ | 19 | #ifndef __TREE_LOG_ |
20 | #define __TREE_LOG_ | 20 | #define __TREE_LOG_ |
21 | 21 | ||
22 | #include "ctree.h" | ||
23 | #include "transaction.h" | ||
24 | |||
22 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ | 25 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ |
23 | #define BTRFS_NO_LOG_SYNC 256 | 26 | #define BTRFS_NO_LOG_SYNC 256 |
24 | 27 | ||
@@ -35,6 +38,19 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) | |||
35 | INIT_LIST_HEAD(&ctx->list); | 38 | INIT_LIST_HEAD(&ctx->list); |
36 | } | 39 | } |
37 | 40 | ||
41 | static inline void btrfs_set_log_full_commit(struct btrfs_fs_info *fs_info, | ||
42 | struct btrfs_trans_handle *trans) | ||
43 | { | ||
44 | ACCESS_ONCE(fs_info->last_trans_log_full_commit) = trans->transid; | ||
45 | } | ||
46 | |||
47 | static inline int btrfs_need_log_full_commit(struct btrfs_fs_info *fs_info, | ||
48 | struct btrfs_trans_handle *trans) | ||
49 | { | ||
50 | return ACCESS_ONCE(fs_info->last_trans_log_full_commit) == | ||
51 | trans->transid; | ||
52 | } | ||
53 | |||
38 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 54 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
39 | struct btrfs_root *root, struct btrfs_log_ctx *ctx); | 55 | struct btrfs_root *root, struct btrfs_log_ctx *ctx); |
40 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 56 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 49d7fab73360..ffeed6d6326f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -1452,6 +1452,22 @@ out: | |||
1452 | return ret; | 1452 | return ret; |
1453 | } | 1453 | } |
1454 | 1454 | ||
1455 | /* | ||
1456 | * Function to update ctime/mtime for a given device path. | ||
1457 | * Mainly used for ctime/mtime based probe like libblkid. | ||
1458 | */ | ||
1459 | static void update_dev_time(char *path_name) | ||
1460 | { | ||
1461 | struct file *filp; | ||
1462 | |||
1463 | filp = filp_open(path_name, O_RDWR, 0); | ||
1464 | if (!filp) | ||
1465 | return; | ||
1466 | file_update_time(filp); | ||
1467 | filp_close(filp, NULL); | ||
1468 | return; | ||
1469 | } | ||
1470 | |||
1455 | static int btrfs_rm_dev_item(struct btrfs_root *root, | 1471 | static int btrfs_rm_dev_item(struct btrfs_root *root, |
1456 | struct btrfs_device *device) | 1472 | struct btrfs_device *device) |
1457 | { | 1473 | { |
@@ -1674,11 +1690,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1674 | struct btrfs_fs_devices *fs_devices; | 1690 | struct btrfs_fs_devices *fs_devices; |
1675 | fs_devices = root->fs_info->fs_devices; | 1691 | fs_devices = root->fs_info->fs_devices; |
1676 | while (fs_devices) { | 1692 | while (fs_devices) { |
1677 | if (fs_devices->seed == cur_devices) | 1693 | if (fs_devices->seed == cur_devices) { |
1694 | fs_devices->seed = cur_devices->seed; | ||
1678 | break; | 1695 | break; |
1696 | } | ||
1679 | fs_devices = fs_devices->seed; | 1697 | fs_devices = fs_devices->seed; |
1680 | } | 1698 | } |
1681 | fs_devices->seed = cur_devices->seed; | ||
1682 | cur_devices->seed = NULL; | 1699 | cur_devices->seed = NULL; |
1683 | lock_chunks(root); | 1700 | lock_chunks(root); |
1684 | __btrfs_close_devices(cur_devices); | 1701 | __btrfs_close_devices(cur_devices); |
@@ -1694,20 +1711,55 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1694 | * remove it from the devices list and zero out the old super | 1711 | * remove it from the devices list and zero out the old super |
1695 | */ | 1712 | */ |
1696 | if (clear_super && disk_super) { | 1713 | if (clear_super && disk_super) { |
1714 | u64 bytenr; | ||
1715 | int i; | ||
1716 | |||
1697 | /* make sure this device isn't detected as part of | 1717 | /* make sure this device isn't detected as part of |
1698 | * the FS anymore | 1718 | * the FS anymore |
1699 | */ | 1719 | */ |
1700 | memset(&disk_super->magic, 0, sizeof(disk_super->magic)); | 1720 | memset(&disk_super->magic, 0, sizeof(disk_super->magic)); |
1701 | set_buffer_dirty(bh); | 1721 | set_buffer_dirty(bh); |
1702 | sync_dirty_buffer(bh); | 1722 | sync_dirty_buffer(bh); |
1723 | |||
1724 | /* clear the mirror copies of super block on the disk | ||
1725 | * being removed, 0th copy is been taken care above and | ||
1726 | * the below would take of the rest | ||
1727 | */ | ||
1728 | for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) { | ||
1729 | bytenr = btrfs_sb_offset(i); | ||
1730 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= | ||
1731 | i_size_read(bdev->bd_inode)) | ||
1732 | break; | ||
1733 | |||
1734 | brelse(bh); | ||
1735 | bh = __bread(bdev, bytenr / 4096, | ||
1736 | BTRFS_SUPER_INFO_SIZE); | ||
1737 | if (!bh) | ||
1738 | continue; | ||
1739 | |||
1740 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
1741 | |||
1742 | if (btrfs_super_bytenr(disk_super) != bytenr || | ||
1743 | btrfs_super_magic(disk_super) != BTRFS_MAGIC) { | ||
1744 | continue; | ||
1745 | } | ||
1746 | memset(&disk_super->magic, 0, | ||
1747 | sizeof(disk_super->magic)); | ||
1748 | set_buffer_dirty(bh); | ||
1749 | sync_dirty_buffer(bh); | ||
1750 | } | ||
1703 | } | 1751 | } |
1704 | 1752 | ||
1705 | ret = 0; | 1753 | ret = 0; |
1706 | 1754 | ||
1707 | /* Notify udev that device has changed */ | 1755 | if (bdev) { |
1708 | if (bdev) | 1756 | /* Notify udev that device has changed */ |
1709 | btrfs_kobject_uevent(bdev, KOBJ_CHANGE); | 1757 | btrfs_kobject_uevent(bdev, KOBJ_CHANGE); |
1710 | 1758 | ||
1759 | /* Update ctime/mtime for device path for libblkid */ | ||
1760 | update_dev_time(device_path); | ||
1761 | } | ||
1762 | |||
1711 | error_brelse: | 1763 | error_brelse: |
1712 | brelse(bh); | 1764 | brelse(bh); |
1713 | if (bdev) | 1765 | if (bdev) |
@@ -1883,7 +1935,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) | |||
1883 | fs_devices->seeding = 0; | 1935 | fs_devices->seeding = 0; |
1884 | fs_devices->num_devices = 0; | 1936 | fs_devices->num_devices = 0; |
1885 | fs_devices->open_devices = 0; | 1937 | fs_devices->open_devices = 0; |
1886 | fs_devices->total_devices = 0; | ||
1887 | fs_devices->seed = seed_devices; | 1938 | fs_devices->seed = seed_devices; |
1888 | 1939 | ||
1889 | generate_random_uuid(fs_devices->fsid); | 1940 | generate_random_uuid(fs_devices->fsid); |
@@ -2146,6 +2197,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2146 | ret = btrfs_commit_transaction(trans, root); | 2197 | ret = btrfs_commit_transaction(trans, root); |
2147 | } | 2198 | } |
2148 | 2199 | ||
2200 | /* Update ctime/mtime for libblkid */ | ||
2201 | update_dev_time(device_path); | ||
2149 | return ret; | 2202 | return ret; |
2150 | 2203 | ||
2151 | error_trans: | 2204 | error_trans: |
@@ -2922,6 +2975,16 @@ static int should_balance_chunk(struct btrfs_root *root, | |||
2922 | return 0; | 2975 | return 0; |
2923 | } | 2976 | } |
2924 | 2977 | ||
2978 | /* | ||
2979 | * limited by count, must be the last filter | ||
2980 | */ | ||
2981 | if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) { | ||
2982 | if (bargs->limit == 0) | ||
2983 | return 0; | ||
2984 | else | ||
2985 | bargs->limit--; | ||
2986 | } | ||
2987 | |||
2925 | return 1; | 2988 | return 1; |
2926 | } | 2989 | } |
2927 | 2990 | ||
@@ -2944,6 +3007,9 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) | |||
2944 | int ret; | 3007 | int ret; |
2945 | int enospc_errors = 0; | 3008 | int enospc_errors = 0; |
2946 | bool counting = true; | 3009 | bool counting = true; |
3010 | u64 limit_data = bctl->data.limit; | ||
3011 | u64 limit_meta = bctl->meta.limit; | ||
3012 | u64 limit_sys = bctl->sys.limit; | ||
2947 | 3013 | ||
2948 | /* step one make some room on all the devices */ | 3014 | /* step one make some room on all the devices */ |
2949 | devices = &fs_info->fs_devices->devices; | 3015 | devices = &fs_info->fs_devices->devices; |
@@ -2982,6 +3048,11 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) | |||
2982 | memset(&bctl->stat, 0, sizeof(bctl->stat)); | 3048 | memset(&bctl->stat, 0, sizeof(bctl->stat)); |
2983 | spin_unlock(&fs_info->balance_lock); | 3049 | spin_unlock(&fs_info->balance_lock); |
2984 | again: | 3050 | again: |
3051 | if (!counting) { | ||
3052 | bctl->data.limit = limit_data; | ||
3053 | bctl->meta.limit = limit_meta; | ||
3054 | bctl->sys.limit = limit_sys; | ||
3055 | } | ||
2985 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; | 3056 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; |
2986 | key.offset = (u64)-1; | 3057 | key.offset = (u64)-1; |
2987 | key.type = BTRFS_CHUNK_ITEM_KEY; | 3058 | key.type = BTRFS_CHUNK_ITEM_KEY; |
@@ -3881,7 +3952,8 @@ static int btrfs_add_system_chunk(struct btrfs_root *root, | |||
3881 | u8 *ptr; | 3952 | u8 *ptr; |
3882 | 3953 | ||
3883 | array_size = btrfs_super_sys_array_size(super_copy); | 3954 | array_size = btrfs_super_sys_array_size(super_copy); |
3884 | if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) | 3955 | if (array_size + item_size + sizeof(disk_key) |
3956 | > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) | ||
3885 | return -EFBIG; | 3957 | return -EFBIG; |
3886 | 3958 | ||
3887 | ptr = super_copy->sys_chunk_array + array_size; | 3959 | ptr = super_copy->sys_chunk_array + array_size; |
@@ -3986,6 +4058,16 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) | |||
3986 | btrfs_set_fs_incompat(info, RAID56); | 4058 | btrfs_set_fs_incompat(info, RAID56); |
3987 | } | 4059 | } |
3988 | 4060 | ||
4061 | #define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r) \ | ||
4062 | - sizeof(struct btrfs_item) \ | ||
4063 | - sizeof(struct btrfs_chunk)) \ | ||
4064 | / sizeof(struct btrfs_stripe) + 1) | ||
4065 | |||
4066 | #define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \ | ||
4067 | - 2 * sizeof(struct btrfs_disk_key) \ | ||
4068 | - 2 * sizeof(struct btrfs_chunk)) \ | ||
4069 | / sizeof(struct btrfs_stripe) + 1) | ||
4070 | |||
3989 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 4071 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
3990 | struct btrfs_root *extent_root, u64 start, | 4072 | struct btrfs_root *extent_root, u64 start, |
3991 | u64 type) | 4073 | u64 type) |
@@ -4035,6 +4117,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
4035 | if (type & BTRFS_BLOCK_GROUP_DATA) { | 4117 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
4036 | max_stripe_size = 1024 * 1024 * 1024; | 4118 | max_stripe_size = 1024 * 1024 * 1024; |
4037 | max_chunk_size = 10 * max_stripe_size; | 4119 | max_chunk_size = 10 * max_stripe_size; |
4120 | if (!devs_max) | ||
4121 | devs_max = BTRFS_MAX_DEVS(info->chunk_root); | ||
4038 | } else if (type & BTRFS_BLOCK_GROUP_METADATA) { | 4122 | } else if (type & BTRFS_BLOCK_GROUP_METADATA) { |
4039 | /* for larger filesystems, use larger metadata chunks */ | 4123 | /* for larger filesystems, use larger metadata chunks */ |
4040 | if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024) | 4124 | if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024) |
@@ -4042,11 +4126,15 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
4042 | else | 4126 | else |
4043 | max_stripe_size = 256 * 1024 * 1024; | 4127 | max_stripe_size = 256 * 1024 * 1024; |
4044 | max_chunk_size = max_stripe_size; | 4128 | max_chunk_size = max_stripe_size; |
4129 | if (!devs_max) | ||
4130 | devs_max = BTRFS_MAX_DEVS(info->chunk_root); | ||
4045 | } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { | 4131 | } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { |
4046 | max_stripe_size = 32 * 1024 * 1024; | 4132 | max_stripe_size = 32 * 1024 * 1024; |
4047 | max_chunk_size = 2 * max_stripe_size; | 4133 | max_chunk_size = 2 * max_stripe_size; |
4134 | if (!devs_max) | ||
4135 | devs_max = BTRFS_MAX_DEVS_SYS_CHUNK; | ||
4048 | } else { | 4136 | } else { |
4049 | btrfs_err(info, "invalid chunk type 0x%llx requested\n", | 4137 | btrfs_err(info, "invalid chunk type 0x%llx requested", |
4050 | type); | 4138 | type); |
4051 | BUG_ON(1); | 4139 | BUG_ON(1); |
4052 | } | 4140 | } |
@@ -4294,7 +4382,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
4294 | 4382 | ||
4295 | if (em->start != chunk_offset || em->len != chunk_size) { | 4383 | if (em->start != chunk_offset || em->len != chunk_size) { |
4296 | btrfs_crit(extent_root->fs_info, "found a bad mapping, wanted" | 4384 | btrfs_crit(extent_root->fs_info, "found a bad mapping, wanted" |
4297 | " %Lu-%Lu, found %Lu-%Lu\n", chunk_offset, | 4385 | " %Lu-%Lu, found %Lu-%Lu", chunk_offset, |
4298 | chunk_size, em->start, em->len); | 4386 | chunk_size, em->start, em->len); |
4299 | free_extent_map(em); | 4387 | free_extent_map(em); |
4300 | return -EINVAL; | 4388 | return -EINVAL; |
@@ -4496,14 +4584,14 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) | |||
4496 | * and exit, so return 1 so the callers don't try to use other copies. | 4584 | * and exit, so return 1 so the callers don't try to use other copies. |
4497 | */ | 4585 | */ |
4498 | if (!em) { | 4586 | if (!em) { |
4499 | btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical, | 4587 | btrfs_crit(fs_info, "No mapping for %Lu-%Lu", logical, |
4500 | logical+len); | 4588 | logical+len); |
4501 | return 1; | 4589 | return 1; |
4502 | } | 4590 | } |
4503 | 4591 | ||
4504 | if (em->start > logical || em->start + em->len < logical) { | 4592 | if (em->start > logical || em->start + em->len < logical) { |
4505 | btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got " | 4593 | btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got " |
4506 | "%Lu-%Lu\n", logical, logical+len, em->start, | 4594 | "%Lu-%Lu", logical, logical+len, em->start, |
4507 | em->start + em->len); | 4595 | em->start + em->len); |
4508 | free_extent_map(em); | 4596 | free_extent_map(em); |
4509 | return 1; | 4597 | return 1; |
@@ -4684,7 +4772,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
4684 | 4772 | ||
4685 | if (em->start > logical || em->start + em->len < logical) { | 4773 | if (em->start > logical || em->start + em->len < logical) { |
4686 | btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, " | 4774 | btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, " |
4687 | "found %Lu-%Lu\n", logical, em->start, | 4775 | "found %Lu-%Lu", logical, em->start, |
4688 | em->start + em->len); | 4776 | em->start + em->len); |
4689 | free_extent_map(em); | 4777 | free_extent_map(em); |
4690 | return -EINVAL; | 4778 | return -EINVAL; |
@@ -6058,10 +6146,14 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info) | |||
6058 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | 6146 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
6059 | struct btrfs_device *device; | 6147 | struct btrfs_device *device; |
6060 | 6148 | ||
6061 | mutex_lock(&fs_devices->device_list_mutex); | 6149 | while (fs_devices) { |
6062 | list_for_each_entry(device, &fs_devices->devices, dev_list) | 6150 | mutex_lock(&fs_devices->device_list_mutex); |
6063 | device->dev_root = fs_info->dev_root; | 6151 | list_for_each_entry(device, &fs_devices->devices, dev_list) |
6064 | mutex_unlock(&fs_devices->device_list_mutex); | 6152 | device->dev_root = fs_info->dev_root; |
6153 | mutex_unlock(&fs_devices->device_list_mutex); | ||
6154 | |||
6155 | fs_devices = fs_devices->seed; | ||
6156 | } | ||
6065 | } | 6157 | } |
6066 | 6158 | ||
6067 | static void __btrfs_reset_dev_stats(struct btrfs_device *dev) | 6159 | static void __btrfs_reset_dev_stats(struct btrfs_device *dev) |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 80754f9dd3df..1a15bbeb65e2 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -255,6 +255,7 @@ struct map_lookup { | |||
255 | #define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2) | 255 | #define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2) |
256 | #define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3) | 256 | #define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3) |
257 | #define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) | 257 | #define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) |
258 | #define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) | ||
258 | 259 | ||
259 | /* | 260 | /* |
260 | * Profile changing flags. When SOFT is set we won't relocate chunk if | 261 | * Profile changing flags. When SOFT is set we won't relocate chunk if |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 8e57191950cb..4f196314c0c1 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -98,7 +98,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
98 | 98 | ||
99 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 99 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { |
100 | printk(KERN_WARNING "BTRFS: deflateInit failed\n"); | 100 | printk(KERN_WARNING "BTRFS: deflateInit failed\n"); |
101 | ret = -1; | 101 | ret = -EIO; |
102 | goto out; | 102 | goto out; |
103 | } | 103 | } |
104 | 104 | ||
@@ -110,7 +110,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
110 | 110 | ||
111 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 111 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
112 | if (out_page == NULL) { | 112 | if (out_page == NULL) { |
113 | ret = -1; | 113 | ret = -ENOMEM; |
114 | goto out; | 114 | goto out; |
115 | } | 115 | } |
116 | cpage_out = kmap(out_page); | 116 | cpage_out = kmap(out_page); |
@@ -128,7 +128,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
128 | printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n", | 128 | printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n", |
129 | ret); | 129 | ret); |
130 | zlib_deflateEnd(&workspace->def_strm); | 130 | zlib_deflateEnd(&workspace->def_strm); |
131 | ret = -1; | 131 | ret = -EIO; |
132 | goto out; | 132 | goto out; |
133 | } | 133 | } |
134 | 134 | ||
@@ -136,7 +136,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
136 | if (workspace->def_strm.total_in > 8192 && | 136 | if (workspace->def_strm.total_in > 8192 && |
137 | workspace->def_strm.total_in < | 137 | workspace->def_strm.total_in < |
138 | workspace->def_strm.total_out) { | 138 | workspace->def_strm.total_out) { |
139 | ret = -1; | 139 | ret = -EIO; |
140 | goto out; | 140 | goto out; |
141 | } | 141 | } |
142 | /* we need another page for writing out. Test this | 142 | /* we need another page for writing out. Test this |
@@ -147,12 +147,12 @@ static int zlib_compress_pages(struct list_head *ws, | |||
147 | kunmap(out_page); | 147 | kunmap(out_page); |
148 | if (nr_pages == nr_dest_pages) { | 148 | if (nr_pages == nr_dest_pages) { |
149 | out_page = NULL; | 149 | out_page = NULL; |
150 | ret = -1; | 150 | ret = -E2BIG; |
151 | goto out; | 151 | goto out; |
152 | } | 152 | } |
153 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 153 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
154 | if (out_page == NULL) { | 154 | if (out_page == NULL) { |
155 | ret = -1; | 155 | ret = -ENOMEM; |
156 | goto out; | 156 | goto out; |
157 | } | 157 | } |
158 | cpage_out = kmap(out_page); | 158 | cpage_out = kmap(out_page); |
@@ -188,12 +188,12 @@ static int zlib_compress_pages(struct list_head *ws, | |||
188 | zlib_deflateEnd(&workspace->def_strm); | 188 | zlib_deflateEnd(&workspace->def_strm); |
189 | 189 | ||
190 | if (ret != Z_STREAM_END) { | 190 | if (ret != Z_STREAM_END) { |
191 | ret = -1; | 191 | ret = -EIO; |
192 | goto out; | 192 | goto out; |
193 | } | 193 | } |
194 | 194 | ||
195 | if (workspace->def_strm.total_out >= workspace->def_strm.total_in) { | 195 | if (workspace->def_strm.total_out >= workspace->def_strm.total_in) { |
196 | ret = -1; | 196 | ret = -E2BIG; |
197 | goto out; | 197 | goto out; |
198 | } | 198 | } |
199 | 199 | ||
@@ -253,7 +253,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
253 | 253 | ||
254 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 254 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
255 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); | 255 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); |
256 | return -1; | 256 | return -EIO; |
257 | } | 257 | } |
258 | while (workspace->inf_strm.total_in < srclen) { | 258 | while (workspace->inf_strm.total_in < srclen) { |
259 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | 259 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); |
@@ -295,7 +295,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
295 | } | 295 | } |
296 | } | 296 | } |
297 | if (ret != Z_STREAM_END) | 297 | if (ret != Z_STREAM_END) |
298 | ret = -1; | 298 | ret = -EIO; |
299 | else | 299 | else |
300 | ret = 0; | 300 | ret = 0; |
301 | done: | 301 | done: |
@@ -337,7 +337,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
337 | 337 | ||
338 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 338 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
339 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); | 339 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); |
340 | return -1; | 340 | return -EIO; |
341 | } | 341 | } |
342 | 342 | ||
343 | while (bytes_left > 0) { | 343 | while (bytes_left > 0) { |
@@ -354,7 +354,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
354 | total_out = workspace->inf_strm.total_out; | 354 | total_out = workspace->inf_strm.total_out; |
355 | 355 | ||
356 | if (total_out == buf_start) { | 356 | if (total_out == buf_start) { |
357 | ret = -1; | 357 | ret = -EIO; |
358 | break; | 358 | break; |
359 | } | 359 | } |
360 | 360 | ||
@@ -382,7 +382,7 @@ next: | |||
382 | } | 382 | } |
383 | 383 | ||
384 | if (ret != Z_STREAM_END && bytes_left != 0) | 384 | if (ret != Z_STREAM_END && bytes_left != 0) |
385 | ret = -1; | 385 | ret = -EIO; |
386 | else | 386 | else |
387 | ret = 0; | 387 | ret = 0; |
388 | 388 | ||