diff options
Diffstat (limited to 'fs/btrfs')
47 files changed, 3978 insertions, 1456 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index f341a98031d2..6d1d0b93b1aa 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
| @@ -16,4 +16,4 @@ btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o | |||
| 16 | 16 | ||
| 17 | btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \ | 17 | btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \ |
| 18 | tests/extent-buffer-tests.o tests/btrfs-tests.o \ | 18 | tests/extent-buffer-tests.o tests/btrfs-tests.o \ |
| 19 | tests/extent-io-tests.o tests/inode-tests.o | 19 | tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index ff9b3995d453..9a0124a95851 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
| @@ -79,13 +79,6 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans, | |||
| 79 | const char *name; | 79 | const char *name; |
| 80 | char *value = NULL; | 80 | char *value = NULL; |
| 81 | 81 | ||
| 82 | if (acl) { | ||
| 83 | ret = posix_acl_valid(acl); | ||
| 84 | if (ret < 0) | ||
| 85 | return ret; | ||
| 86 | ret = 0; | ||
| 87 | } | ||
| 88 | |||
| 89 | switch (type) { | 82 | switch (type) { |
| 90 | case ACL_TYPE_ACCESS: | 83 | case ACL_TYPE_ACCESS: |
| 91 | name = POSIX_ACL_XATTR_ACCESS; | 84 | name = POSIX_ACL_XATTR_ACCESS; |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 10db21fa0926..e25564bfcb46 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
| @@ -900,7 +900,11 @@ again: | |||
| 900 | goto out; | 900 | goto out; |
| 901 | BUG_ON(ret == 0); | 901 | BUG_ON(ret == 0); |
| 902 | 902 | ||
| 903 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 904 | if (trans && likely(trans->type != __TRANS_DUMMY)) { | ||
| 905 | #else | ||
| 903 | if (trans) { | 906 | if (trans) { |
| 907 | #endif | ||
| 904 | /* | 908 | /* |
| 905 | * look if there are updates for this ref queued and lock the | 909 | * look if there are updates for this ref queued and lock the |
| 906 | * head | 910 | * head |
| @@ -984,11 +988,12 @@ again: | |||
| 984 | goto out; | 988 | goto out; |
| 985 | } | 989 | } |
| 986 | if (ref->count && ref->parent) { | 990 | if (ref->count && ref->parent) { |
| 987 | if (extent_item_pos && !ref->inode_list) { | 991 | if (extent_item_pos && !ref->inode_list && |
| 992 | ref->level == 0) { | ||
| 988 | u32 bsz; | 993 | u32 bsz; |
| 989 | struct extent_buffer *eb; | 994 | struct extent_buffer *eb; |
| 990 | bsz = btrfs_level_size(fs_info->extent_root, | 995 | bsz = btrfs_level_size(fs_info->extent_root, |
| 991 | info_level); | 996 | ref->level); |
| 992 | eb = read_tree_block(fs_info->extent_root, | 997 | eb = read_tree_block(fs_info->extent_root, |
| 993 | ref->parent, bsz, 0); | 998 | ref->parent, bsz, 0); |
| 994 | if (!eb || !extent_buffer_uptodate(eb)) { | 999 | if (!eb || !extent_buffer_uptodate(eb)) { |
| @@ -1404,9 +1409,10 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
| 1404 | * returns <0 on error | 1409 | * returns <0 on error |
| 1405 | */ | 1410 | */ |
| 1406 | static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, | 1411 | static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, |
| 1407 | struct btrfs_extent_item *ei, u32 item_size, | 1412 | struct btrfs_key *key, |
| 1408 | struct btrfs_extent_inline_ref **out_eiref, | 1413 | struct btrfs_extent_item *ei, u32 item_size, |
| 1409 | int *out_type) | 1414 | struct btrfs_extent_inline_ref **out_eiref, |
| 1415 | int *out_type) | ||
| 1410 | { | 1416 | { |
| 1411 | unsigned long end; | 1417 | unsigned long end; |
| 1412 | u64 flags; | 1418 | u64 flags; |
| @@ -1416,19 +1422,26 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, | |||
| 1416 | /* first call */ | 1422 | /* first call */ |
| 1417 | flags = btrfs_extent_flags(eb, ei); | 1423 | flags = btrfs_extent_flags(eb, ei); |
| 1418 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 1424 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
| 1419 | info = (struct btrfs_tree_block_info *)(ei + 1); | 1425 | if (key->type == BTRFS_METADATA_ITEM_KEY) { |
| 1420 | *out_eiref = | 1426 | /* a skinny metadata extent */ |
| 1421 | (struct btrfs_extent_inline_ref *)(info + 1); | 1427 | *out_eiref = |
| 1428 | (struct btrfs_extent_inline_ref *)(ei + 1); | ||
| 1429 | } else { | ||
| 1430 | WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY); | ||
| 1431 | info = (struct btrfs_tree_block_info *)(ei + 1); | ||
| 1432 | *out_eiref = | ||
| 1433 | (struct btrfs_extent_inline_ref *)(info + 1); | ||
| 1434 | } | ||
| 1422 | } else { | 1435 | } else { |
| 1423 | *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1); | 1436 | *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1); |
| 1424 | } | 1437 | } |
| 1425 | *ptr = (unsigned long)*out_eiref; | 1438 | *ptr = (unsigned long)*out_eiref; |
| 1426 | if ((void *)*ptr >= (void *)ei + item_size) | 1439 | if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size) |
| 1427 | return -ENOENT; | 1440 | return -ENOENT; |
| 1428 | } | 1441 | } |
| 1429 | 1442 | ||
| 1430 | end = (unsigned long)ei + item_size; | 1443 | end = (unsigned long)ei + item_size; |
| 1431 | *out_eiref = (struct btrfs_extent_inline_ref *)*ptr; | 1444 | *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr); |
| 1432 | *out_type = btrfs_extent_inline_ref_type(eb, *out_eiref); | 1445 | *out_type = btrfs_extent_inline_ref_type(eb, *out_eiref); |
| 1433 | 1446 | ||
| 1434 | *ptr += btrfs_extent_inline_ref_size(*out_type); | 1447 | *ptr += btrfs_extent_inline_ref_size(*out_type); |
| @@ -1447,8 +1460,8 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, | |||
| 1447 | * <0 on error. | 1460 | * <0 on error. |
| 1448 | */ | 1461 | */ |
| 1449 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | 1462 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, |
| 1450 | struct btrfs_extent_item *ei, u32 item_size, | 1463 | struct btrfs_key *key, struct btrfs_extent_item *ei, |
| 1451 | u64 *out_root, u8 *out_level) | 1464 | u32 item_size, u64 *out_root, u8 *out_level) |
| 1452 | { | 1465 | { |
| 1453 | int ret; | 1466 | int ret; |
| 1454 | int type; | 1467 | int type; |
| @@ -1459,8 +1472,8 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | |||
| 1459 | return 1; | 1472 | return 1; |
| 1460 | 1473 | ||
| 1461 | while (1) { | 1474 | while (1) { |
| 1462 | ret = __get_extent_inline_ref(ptr, eb, ei, item_size, | 1475 | ret = __get_extent_inline_ref(ptr, eb, key, ei, item_size, |
| 1463 | &eiref, &type); | 1476 | &eiref, &type); |
| 1464 | if (ret < 0) | 1477 | if (ret < 0) |
| 1465 | return ret; | 1478 | return ret; |
| 1466 | 1479 | ||
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index a910b27a8ad9..86fc20fec282 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
| @@ -40,8 +40,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
| 40 | u64 *flags); | 40 | u64 *flags); |
| 41 | 41 | ||
| 42 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | 42 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, |
| 43 | struct btrfs_extent_item *ei, u32 item_size, | 43 | struct btrfs_key *key, struct btrfs_extent_item *ei, |
| 44 | u64 *out_root, u8 *out_level); | 44 | u32 item_size, u64 *out_root, u8 *out_level); |
| 45 | 45 | ||
| 46 | int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | 46 | int iterate_extent_inodes(struct btrfs_fs_info *fs_info, |
| 47 | u64 extent_item_objectid, | 47 | u64 extent_item_objectid, |
| @@ -55,8 +55,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | |||
| 55 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); | 55 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); |
| 56 | 56 | ||
| 57 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 57 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
| 58 | struct btrfs_fs_info *fs_info, u64 bytenr, | 58 | struct btrfs_fs_info *fs_info, u64 bytenr, |
| 59 | u64 time_seq, struct ulist **roots); | 59 | u64 time_seq, struct ulist **roots); |
| 60 | char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | 60 | char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, |
| 61 | u32 name_len, unsigned long name_off, | 61 | u32 name_len, unsigned long name_off, |
| 62 | struct extent_buffer *eb_in, u64 parent, | 62 | struct extent_buffer *eb_in, u64 parent, |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index c9a24444ec9a..4794923c410c 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -279,9 +279,11 @@ static inline void btrfs_inode_block_unlocked_dio(struct inode *inode) | |||
| 279 | 279 | ||
| 280 | static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode) | 280 | static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode) |
| 281 | { | 281 | { |
| 282 | smp_mb__before_clear_bit(); | 282 | smp_mb__before_atomic(); |
| 283 | clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, | 283 | clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, |
| 284 | &BTRFS_I(inode)->runtime_flags); | 284 | &BTRFS_I(inode)->runtime_flags); |
| 285 | } | 285 | } |
| 286 | 286 | ||
| 287 | bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end); | ||
| 288 | |||
| 287 | #endif | 289 | #endif |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 0e8388e72d8d..ce92ae30250f 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
| @@ -1093,6 +1093,7 @@ leaf_item_out_of_bounce_error: | |||
| 1093 | next_stack = | 1093 | next_stack = |
| 1094 | btrfsic_stack_frame_alloc(); | 1094 | btrfsic_stack_frame_alloc(); |
| 1095 | if (NULL == next_stack) { | 1095 | if (NULL == next_stack) { |
| 1096 | sf->error = -1; | ||
| 1096 | btrfsic_release_block_ctx( | 1097 | btrfsic_release_block_ctx( |
| 1097 | &sf-> | 1098 | &sf-> |
| 1098 | next_block_ctx); | 1099 | next_block_ctx); |
| @@ -1190,8 +1191,10 @@ continue_with_current_node_stack_frame: | |||
| 1190 | sf->next_block_ctx.datav[0]; | 1191 | sf->next_block_ctx.datav[0]; |
| 1191 | 1192 | ||
| 1192 | next_stack = btrfsic_stack_frame_alloc(); | 1193 | next_stack = btrfsic_stack_frame_alloc(); |
| 1193 | if (NULL == next_stack) | 1194 | if (NULL == next_stack) { |
| 1195 | sf->error = -1; | ||
| 1194 | goto one_stack_frame_backwards; | 1196 | goto one_stack_frame_backwards; |
| 1197 | } | ||
| 1195 | 1198 | ||
| 1196 | next_stack->i = -1; | 1199 | next_stack->i = -1; |
| 1197 | next_stack->block = sf->next_block; | 1200 | next_stack->block = sf->next_block; |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index d43c544d3b68..92371c414228 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -887,7 +887,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping, | |||
| 887 | 887 | ||
| 888 | workspace = find_workspace(type); | 888 | workspace = find_workspace(type); |
| 889 | if (IS_ERR(workspace)) | 889 | if (IS_ERR(workspace)) |
| 890 | return -1; | 890 | return PTR_ERR(workspace); |
| 891 | 891 | ||
| 892 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, | 892 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, |
| 893 | start, len, pages, | 893 | start, len, pages, |
| @@ -923,7 +923,7 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in, | |||
| 923 | 923 | ||
| 924 | workspace = find_workspace(type); | 924 | workspace = find_workspace(type); |
| 925 | if (IS_ERR(workspace)) | 925 | if (IS_ERR(workspace)) |
| 926 | return -ENOMEM; | 926 | return PTR_ERR(workspace); |
| 927 | 927 | ||
| 928 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, | 928 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, |
| 929 | disk_start, | 929 | disk_start, |
| @@ -945,7 +945,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | |||
| 945 | 945 | ||
| 946 | workspace = find_workspace(type); | 946 | workspace = find_workspace(type); |
| 947 | if (IS_ERR(workspace)) | 947 | if (IS_ERR(workspace)) |
| 948 | return -ENOMEM; | 948 | return PTR_ERR(workspace); |
| 949 | 949 | ||
| 950 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, | 950 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, |
| 951 | dest_page, start_byte, | 951 | dest_page, start_byte, |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1bcfcdb23cf4..aeab453b8e24 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -224,7 +224,8 @@ static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) | |||
| 224 | static void add_root_to_dirty_list(struct btrfs_root *root) | 224 | static void add_root_to_dirty_list(struct btrfs_root *root) |
| 225 | { | 225 | { |
| 226 | spin_lock(&root->fs_info->trans_lock); | 226 | spin_lock(&root->fs_info->trans_lock); |
| 227 | if (root->track_dirty && list_empty(&root->dirty_list)) { | 227 | if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) && |
| 228 | list_empty(&root->dirty_list)) { | ||
| 228 | list_add(&root->dirty_list, | 229 | list_add(&root->dirty_list, |
| 229 | &root->fs_info->dirty_cowonly_roots); | 230 | &root->fs_info->dirty_cowonly_roots); |
| 230 | } | 231 | } |
| @@ -246,9 +247,10 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
| 246 | int level; | 247 | int level; |
| 247 | struct btrfs_disk_key disk_key; | 248 | struct btrfs_disk_key disk_key; |
| 248 | 249 | ||
| 249 | WARN_ON(root->ref_cows && trans->transid != | 250 | WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
| 250 | root->fs_info->running_transaction->transid); | 251 | trans->transid != root->fs_info->running_transaction->transid); |
| 251 | WARN_ON(root->ref_cows && trans->transid != root->last_trans); | 252 | WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
| 253 | trans->transid != root->last_trans); | ||
| 252 | 254 | ||
| 253 | level = btrfs_header_level(buf); | 255 | level = btrfs_header_level(buf); |
| 254 | if (level == 0) | 256 | if (level == 0) |
| @@ -354,44 +356,14 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info) | |||
| 354 | } | 356 | } |
| 355 | 357 | ||
| 356 | /* | 358 | /* |
| 357 | * Increment the upper half of tree_mod_seq, set lower half zero. | 359 | * Pull a new tree mod seq number for our operation. |
| 358 | * | ||
| 359 | * Must be called with fs_info->tree_mod_seq_lock held. | ||
| 360 | */ | ||
| 361 | static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info) | ||
| 362 | { | ||
| 363 | u64 seq = atomic64_read(&fs_info->tree_mod_seq); | ||
| 364 | seq &= 0xffffffff00000000ull; | ||
| 365 | seq += 1ull << 32; | ||
| 366 | atomic64_set(&fs_info->tree_mod_seq, seq); | ||
| 367 | return seq; | ||
| 368 | } | ||
| 369 | |||
| 370 | /* | ||
| 371 | * Increment the lower half of tree_mod_seq. | ||
| 372 | * | ||
| 373 | * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers | ||
| 374 | * are generated should not technically require a spin lock here. (Rationale: | ||
| 375 | * incrementing the minor while incrementing the major seq number is between its | ||
| 376 | * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it | ||
| 377 | * just returns a unique sequence number as usual.) We have decided to leave | ||
| 378 | * that requirement in here and rethink it once we notice it really imposes a | ||
| 379 | * problem on some workload. | ||
| 380 | */ | 360 | */ |
| 381 | static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info) | 361 | static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) |
| 382 | { | 362 | { |
| 383 | return atomic64_inc_return(&fs_info->tree_mod_seq); | 363 | return atomic64_inc_return(&fs_info->tree_mod_seq); |
| 384 | } | 364 | } |
| 385 | 365 | ||
| 386 | /* | 366 | /* |
| 387 | * return the last minor in the previous major tree_mod_seq number | ||
| 388 | */ | ||
| 389 | u64 btrfs_tree_mod_seq_prev(u64 seq) | ||
| 390 | { | ||
| 391 | return (seq & 0xffffffff00000000ull) - 1ull; | ||
| 392 | } | ||
| 393 | |||
| 394 | /* | ||
| 395 | * This adds a new blocker to the tree mod log's blocker list if the @elem | 367 | * This adds a new blocker to the tree mod log's blocker list if the @elem |
| 396 | * passed does not already have a sequence number set. So when a caller expects | 368 | * passed does not already have a sequence number set. So when a caller expects |
| 397 | * to record tree modifications, it should ensure to set elem->seq to zero | 369 | * to record tree modifications, it should ensure to set elem->seq to zero |
| @@ -402,19 +374,16 @@ u64 btrfs_tree_mod_seq_prev(u64 seq) | |||
| 402 | u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | 374 | u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, |
| 403 | struct seq_list *elem) | 375 | struct seq_list *elem) |
| 404 | { | 376 | { |
| 405 | u64 seq; | ||
| 406 | |||
| 407 | tree_mod_log_write_lock(fs_info); | 377 | tree_mod_log_write_lock(fs_info); |
| 408 | spin_lock(&fs_info->tree_mod_seq_lock); | 378 | spin_lock(&fs_info->tree_mod_seq_lock); |
| 409 | if (!elem->seq) { | 379 | if (!elem->seq) { |
| 410 | elem->seq = btrfs_inc_tree_mod_seq_major(fs_info); | 380 | elem->seq = btrfs_inc_tree_mod_seq(fs_info); |
| 411 | list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); | 381 | list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); |
| 412 | } | 382 | } |
| 413 | seq = btrfs_inc_tree_mod_seq_minor(fs_info); | ||
| 414 | spin_unlock(&fs_info->tree_mod_seq_lock); | 383 | spin_unlock(&fs_info->tree_mod_seq_lock); |
| 415 | tree_mod_log_write_unlock(fs_info); | 384 | tree_mod_log_write_unlock(fs_info); |
| 416 | 385 | ||
| 417 | return seq; | 386 | return elem->seq; |
| 418 | } | 387 | } |
| 419 | 388 | ||
| 420 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | 389 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, |
| @@ -487,9 +456,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) | |||
| 487 | 456 | ||
| 488 | BUG_ON(!tm); | 457 | BUG_ON(!tm); |
| 489 | 458 | ||
| 490 | spin_lock(&fs_info->tree_mod_seq_lock); | 459 | tm->seq = btrfs_inc_tree_mod_seq(fs_info); |
| 491 | tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); | ||
| 492 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
| 493 | 460 | ||
| 494 | tm_root = &fs_info->tree_mod_log; | 461 | tm_root = &fs_info->tree_mod_log; |
| 495 | new = &tm_root->rb_node; | 462 | new = &tm_root->rb_node; |
| @@ -997,14 +964,14 @@ int btrfs_block_can_be_shared(struct btrfs_root *root, | |||
| 997 | * snapshot and the block was not allocated by tree relocation, | 964 | * snapshot and the block was not allocated by tree relocation, |
| 998 | * we know the block is not shared. | 965 | * we know the block is not shared. |
| 999 | */ | 966 | */ |
| 1000 | if (root->ref_cows && | 967 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
| 1001 | buf != root->node && buf != root->commit_root && | 968 | buf != root->node && buf != root->commit_root && |
| 1002 | (btrfs_header_generation(buf) <= | 969 | (btrfs_header_generation(buf) <= |
| 1003 | btrfs_root_last_snapshot(&root->root_item) || | 970 | btrfs_root_last_snapshot(&root->root_item) || |
| 1004 | btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) | 971 | btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) |
| 1005 | return 1; | 972 | return 1; |
| 1006 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 973 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
| 1007 | if (root->ref_cows && | 974 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
| 1008 | btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) | 975 | btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) |
| 1009 | return 1; | 976 | return 1; |
| 1010 | #endif | 977 | #endif |
| @@ -1146,9 +1113,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 1146 | 1113 | ||
| 1147 | btrfs_assert_tree_locked(buf); | 1114 | btrfs_assert_tree_locked(buf); |
| 1148 | 1115 | ||
| 1149 | WARN_ON(root->ref_cows && trans->transid != | 1116 | WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
| 1150 | root->fs_info->running_transaction->transid); | 1117 | trans->transid != root->fs_info->running_transaction->transid); |
| 1151 | WARN_ON(root->ref_cows && trans->transid != root->last_trans); | 1118 | WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
| 1119 | trans->transid != root->last_trans); | ||
| 1152 | 1120 | ||
| 1153 | level = btrfs_header_level(buf); | 1121 | level = btrfs_header_level(buf); |
| 1154 | 1122 | ||
| @@ -1193,7 +1161,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 1193 | return ret; | 1161 | return ret; |
| 1194 | } | 1162 | } |
| 1195 | 1163 | ||
| 1196 | if (root->ref_cows) { | 1164 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { |
| 1197 | ret = btrfs_reloc_cow_block(trans, root, buf, cow); | 1165 | ret = btrfs_reloc_cow_block(trans, root, buf, cow); |
| 1198 | if (ret) | 1166 | if (ret) |
| 1199 | return ret; | 1167 | return ret; |
| @@ -1538,6 +1506,10 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans, | |||
| 1538 | struct btrfs_root *root, | 1506 | struct btrfs_root *root, |
| 1539 | struct extent_buffer *buf) | 1507 | struct extent_buffer *buf) |
| 1540 | { | 1508 | { |
| 1509 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 1510 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
| 1511 | return 0; | ||
| 1512 | #endif | ||
| 1541 | /* ensure we can see the force_cow */ | 1513 | /* ensure we can see the force_cow */ |
| 1542 | smp_rmb(); | 1514 | smp_rmb(); |
| 1543 | 1515 | ||
| @@ -1556,7 +1528,7 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans, | |||
| 1556 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && | 1528 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && |
| 1557 | !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && | 1529 | !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && |
| 1558 | btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) && | 1530 | btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) && |
| 1559 | !root->force_cow) | 1531 | !test_bit(BTRFS_ROOT_FORCE_COW, &root->state)) |
| 1560 | return 0; | 1532 | return 0; |
| 1561 | return 1; | 1533 | return 1; |
| 1562 | } | 1534 | } |
| @@ -5125,7 +5097,17 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
| 5125 | return ret; | 5097 | return ret; |
| 5126 | btrfs_item_key(path->nodes[0], &found_key, 0); | 5098 | btrfs_item_key(path->nodes[0], &found_key, 0); |
| 5127 | ret = comp_keys(&found_key, &key); | 5099 | ret = comp_keys(&found_key, &key); |
| 5128 | if (ret < 0) | 5100 | /* |
| 5101 | * We might have had an item with the previous key in the tree right | ||
| 5102 | * before we released our path. And after we released our path, that | ||
| 5103 | * item might have been pushed to the first slot (0) of the leaf we | ||
| 5104 | * were holding due to a tree balance. Alternatively, an item with the | ||
| 5105 | * previous key can exist as the only element of a leaf (big fat item). | ||
| 5106 | * Therefore account for these 2 cases, so that our callers (like | ||
| 5107 | * btrfs_previous_item) don't miss an existing item with a key matching | ||
| 5108 | * the previous key we computed above. | ||
| 5109 | */ | ||
| 5110 | if (ret <= 0) | ||
| 5129 | return 0; | 5111 | return 0; |
| 5130 | return 1; | 5112 | return 1; |
| 5131 | } | 5113 | } |
| @@ -5736,6 +5718,24 @@ again: | |||
| 5736 | ret = 0; | 5718 | ret = 0; |
| 5737 | goto done; | 5719 | goto done; |
| 5738 | } | 5720 | } |
| 5721 | /* | ||
| 5722 | * So the above check misses one case: | ||
| 5723 | * - after releasing the path above, someone has removed the item that | ||
| 5724 | * used to be at the very end of the block, and balance between leafs | ||
| 5725 | * gets another one with bigger key.offset to replace it. | ||
| 5726 | * | ||
| 5727 | * This one should be returned as well, or we can get leaf corruption | ||
| 5728 | * later(esp. in __btrfs_drop_extents()). | ||
| 5729 | * | ||
| 5730 | * And a bit more explanation about this check, | ||
| 5731 | * with ret > 0, the key isn't found, the path points to the slot | ||
| 5732 | * where it should be inserted, so the path->slots[0] item must be the | ||
| 5733 | * bigger one. | ||
| 5734 | */ | ||
| 5735 | if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) { | ||
| 5736 | ret = 0; | ||
| 5737 | goto done; | ||
| 5738 | } | ||
| 5739 | 5739 | ||
| 5740 | while (level < BTRFS_MAX_LEVEL) { | 5740 | while (level < BTRFS_MAX_LEVEL) { |
| 5741 | if (!path->nodes[level]) { | 5741 | if (!path->nodes[level]) { |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4c48df572bd6..b7e2c1c1ef36 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include <asm/kmap_types.h> | 33 | #include <asm/kmap_types.h> |
| 34 | #include <linux/pagemap.h> | 34 | #include <linux/pagemap.h> |
| 35 | #include <linux/btrfs.h> | 35 | #include <linux/btrfs.h> |
| 36 | #include <linux/workqueue.h> | ||
| 36 | #include "extent_io.h" | 37 | #include "extent_io.h" |
| 37 | #include "extent_map.h" | 38 | #include "extent_map.h" |
| 38 | #include "async-thread.h" | 39 | #include "async-thread.h" |
| @@ -756,6 +757,12 @@ struct btrfs_dir_item { | |||
| 756 | 757 | ||
| 757 | #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) | 758 | #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) |
| 758 | 759 | ||
| 760 | /* | ||
| 761 | * Internal in-memory flag that a subvolume has been marked for deletion but | ||
| 762 | * still visible as a directory | ||
| 763 | */ | ||
| 764 | #define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48) | ||
| 765 | |||
| 759 | struct btrfs_root_item { | 766 | struct btrfs_root_item { |
| 760 | struct btrfs_inode_item inode; | 767 | struct btrfs_inode_item inode; |
| 761 | __le64 generation; | 768 | __le64 generation; |
| @@ -840,7 +847,10 @@ struct btrfs_disk_balance_args { | |||
| 840 | /* BTRFS_BALANCE_ARGS_* */ | 847 | /* BTRFS_BALANCE_ARGS_* */ |
| 841 | __le64 flags; | 848 | __le64 flags; |
| 842 | 849 | ||
| 843 | __le64 unused[8]; | 850 | /* BTRFS_BALANCE_ARGS_LIMIT value */ |
| 851 | __le64 limit; | ||
| 852 | |||
| 853 | __le64 unused[7]; | ||
| 844 | } __attribute__ ((__packed__)); | 854 | } __attribute__ ((__packed__)); |
| 845 | 855 | ||
| 846 | /* | 856 | /* |
| @@ -1113,6 +1123,12 @@ struct btrfs_qgroup_limit_item { | |||
| 1113 | __le64 rsv_excl; | 1123 | __le64 rsv_excl; |
| 1114 | } __attribute__ ((__packed__)); | 1124 | } __attribute__ ((__packed__)); |
| 1115 | 1125 | ||
| 1126 | /* For raid type sysfs entries */ | ||
| 1127 | struct raid_kobject { | ||
| 1128 | int raid_type; | ||
| 1129 | struct kobject kobj; | ||
| 1130 | }; | ||
| 1131 | |||
| 1116 | struct btrfs_space_info { | 1132 | struct btrfs_space_info { |
| 1117 | spinlock_t lock; | 1133 | spinlock_t lock; |
| 1118 | 1134 | ||
| @@ -1163,7 +1179,7 @@ struct btrfs_space_info { | |||
| 1163 | wait_queue_head_t wait; | 1179 | wait_queue_head_t wait; |
| 1164 | 1180 | ||
| 1165 | struct kobject kobj; | 1181 | struct kobject kobj; |
| 1166 | struct kobject block_group_kobjs[BTRFS_NR_RAID_TYPES]; | 1182 | struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES]; |
| 1167 | }; | 1183 | }; |
| 1168 | 1184 | ||
| 1169 | #define BTRFS_BLOCK_RSV_GLOBAL 1 | 1185 | #define BTRFS_BLOCK_RSV_GLOBAL 1 |
| @@ -1313,6 +1329,8 @@ struct btrfs_stripe_hash_table { | |||
| 1313 | 1329 | ||
| 1314 | #define BTRFS_STRIPE_HASH_TABLE_BITS 11 | 1330 | #define BTRFS_STRIPE_HASH_TABLE_BITS 11 |
| 1315 | 1331 | ||
| 1332 | void btrfs_init_async_reclaim_work(struct work_struct *work); | ||
| 1333 | |||
| 1316 | /* fs_info */ | 1334 | /* fs_info */ |
| 1317 | struct reloc_control; | 1335 | struct reloc_control; |
| 1318 | struct btrfs_device; | 1336 | struct btrfs_device; |
| @@ -1534,6 +1552,9 @@ struct btrfs_fs_info { | |||
| 1534 | */ | 1552 | */ |
| 1535 | struct btrfs_workqueue *fixup_workers; | 1553 | struct btrfs_workqueue *fixup_workers; |
| 1536 | struct btrfs_workqueue *delayed_workers; | 1554 | struct btrfs_workqueue *delayed_workers; |
| 1555 | |||
| 1556 | /* the extent workers do delayed refs on the extent allocation tree */ | ||
| 1557 | struct btrfs_workqueue *extent_workers; | ||
| 1537 | struct task_struct *transaction_kthread; | 1558 | struct task_struct *transaction_kthread; |
| 1538 | struct task_struct *cleaner_kthread; | 1559 | struct task_struct *cleaner_kthread; |
| 1539 | int thread_pool_size; | 1560 | int thread_pool_size; |
| @@ -1636,7 +1657,10 @@ struct btrfs_fs_info { | |||
| 1636 | 1657 | ||
| 1637 | /* holds configuration and tracking. Protected by qgroup_lock */ | 1658 | /* holds configuration and tracking. Protected by qgroup_lock */ |
| 1638 | struct rb_root qgroup_tree; | 1659 | struct rb_root qgroup_tree; |
| 1660 | struct rb_root qgroup_op_tree; | ||
| 1639 | spinlock_t qgroup_lock; | 1661 | spinlock_t qgroup_lock; |
| 1662 | spinlock_t qgroup_op_lock; | ||
| 1663 | atomic_t qgroup_op_seq; | ||
| 1640 | 1664 | ||
| 1641 | /* | 1665 | /* |
| 1642 | * used to avoid frequently calling ulist_alloc()/ulist_free() | 1666 | * used to avoid frequently calling ulist_alloc()/ulist_free() |
| @@ -1688,6 +1712,9 @@ struct btrfs_fs_info { | |||
| 1688 | 1712 | ||
| 1689 | struct semaphore uuid_tree_rescan_sem; | 1713 | struct semaphore uuid_tree_rescan_sem; |
| 1690 | unsigned int update_uuid_tree_gen:1; | 1714 | unsigned int update_uuid_tree_gen:1; |
| 1715 | |||
| 1716 | /* Used to reclaim the metadata space in the background. */ | ||
| 1717 | struct work_struct async_reclaim_work; | ||
| 1691 | }; | 1718 | }; |
| 1692 | 1719 | ||
| 1693 | struct btrfs_subvolume_writers { | 1720 | struct btrfs_subvolume_writers { |
| @@ -1696,6 +1723,26 @@ struct btrfs_subvolume_writers { | |||
| 1696 | }; | 1723 | }; |
| 1697 | 1724 | ||
| 1698 | /* | 1725 | /* |
| 1726 | * The state of btrfs root | ||
| 1727 | */ | ||
| 1728 | /* | ||
| 1729 | * btrfs_record_root_in_trans is a multi-step process, | ||
| 1730 | * and it can race with the balancing code. But the | ||
| 1731 | * race is very small, and only the first time the root | ||
| 1732 | * is added to each transaction. So IN_TRANS_SETUP | ||
| 1733 | * is used to tell us when more checks are required | ||
| 1734 | */ | ||
| 1735 | #define BTRFS_ROOT_IN_TRANS_SETUP 0 | ||
| 1736 | #define BTRFS_ROOT_REF_COWS 1 | ||
| 1737 | #define BTRFS_ROOT_TRACK_DIRTY 2 | ||
| 1738 | #define BTRFS_ROOT_IN_RADIX 3 | ||
| 1739 | #define BTRFS_ROOT_DUMMY_ROOT 4 | ||
| 1740 | #define BTRFS_ROOT_ORPHAN_ITEM_INSERTED 5 | ||
| 1741 | #define BTRFS_ROOT_DEFRAG_RUNNING 6 | ||
| 1742 | #define BTRFS_ROOT_FORCE_COW 7 | ||
| 1743 | #define BTRFS_ROOT_MULTI_LOG_TASKS 8 | ||
| 1744 | |||
| 1745 | /* | ||
| 1699 | * in ram representation of the tree. extent_root is used for all allocations | 1746 | * in ram representation of the tree. extent_root is used for all allocations |
| 1700 | * and for the extent tree extent_root root. | 1747 | * and for the extent tree extent_root root. |
| 1701 | */ | 1748 | */ |
| @@ -1706,6 +1753,7 @@ struct btrfs_root { | |||
| 1706 | struct btrfs_root *log_root; | 1753 | struct btrfs_root *log_root; |
| 1707 | struct btrfs_root *reloc_root; | 1754 | struct btrfs_root *reloc_root; |
| 1708 | 1755 | ||
| 1756 | unsigned long state; | ||
| 1709 | struct btrfs_root_item root_item; | 1757 | struct btrfs_root_item root_item; |
| 1710 | struct btrfs_key root_key; | 1758 | struct btrfs_key root_key; |
| 1711 | struct btrfs_fs_info *fs_info; | 1759 | struct btrfs_fs_info *fs_info; |
| @@ -1740,7 +1788,6 @@ struct btrfs_root { | |||
| 1740 | /* Just be updated when the commit succeeds. */ | 1788 | /* Just be updated when the commit succeeds. */ |
| 1741 | int last_log_commit; | 1789 | int last_log_commit; |
| 1742 | pid_t log_start_pid; | 1790 | pid_t log_start_pid; |
| 1743 | bool log_multiple_pids; | ||
| 1744 | 1791 | ||
| 1745 | u64 objectid; | 1792 | u64 objectid; |
| 1746 | u64 last_trans; | 1793 | u64 last_trans; |
| @@ -1760,23 +1807,13 @@ struct btrfs_root { | |||
| 1760 | 1807 | ||
| 1761 | u64 highest_objectid; | 1808 | u64 highest_objectid; |
| 1762 | 1809 | ||
| 1763 | /* btrfs_record_root_in_trans is a multi-step process, | ||
| 1764 | * and it can race with the balancing code. But the | ||
| 1765 | * race is very small, and only the first time the root | ||
| 1766 | * is added to each transaction. So in_trans_setup | ||
| 1767 | * is used to tell us when more checks are required | ||
| 1768 | */ | ||
| 1769 | unsigned long in_trans_setup; | ||
| 1770 | int ref_cows; | ||
| 1771 | int track_dirty; | ||
| 1772 | int in_radix; | ||
| 1773 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 1810 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
| 1774 | int dummy_root; | 1811 | u64 alloc_bytenr; |
| 1775 | #endif | 1812 | #endif |
| 1813 | |||
| 1776 | u64 defrag_trans_start; | 1814 | u64 defrag_trans_start; |
| 1777 | struct btrfs_key defrag_progress; | 1815 | struct btrfs_key defrag_progress; |
| 1778 | struct btrfs_key defrag_max; | 1816 | struct btrfs_key defrag_max; |
| 1779 | int defrag_running; | ||
| 1780 | char *name; | 1817 | char *name; |
| 1781 | 1818 | ||
| 1782 | /* the dirty list is only used by non-reference counted roots */ | 1819 | /* the dirty list is only used by non-reference counted roots */ |
| @@ -1790,7 +1827,6 @@ struct btrfs_root { | |||
| 1790 | spinlock_t orphan_lock; | 1827 | spinlock_t orphan_lock; |
| 1791 | atomic_t orphan_inodes; | 1828 | atomic_t orphan_inodes; |
| 1792 | struct btrfs_block_rsv *orphan_block_rsv; | 1829 | struct btrfs_block_rsv *orphan_block_rsv; |
| 1793 | int orphan_item_inserted; | ||
| 1794 | int orphan_cleanup_state; | 1830 | int orphan_cleanup_state; |
| 1795 | 1831 | ||
| 1796 | spinlock_t inode_lock; | 1832 | spinlock_t inode_lock; |
| @@ -1808,8 +1844,6 @@ struct btrfs_root { | |||
| 1808 | */ | 1844 | */ |
| 1809 | dev_t anon_dev; | 1845 | dev_t anon_dev; |
| 1810 | 1846 | ||
| 1811 | int force_cow; | ||
| 1812 | |||
| 1813 | spinlock_t root_item_lock; | 1847 | spinlock_t root_item_lock; |
| 1814 | atomic_t refs; | 1848 | atomic_t refs; |
| 1815 | 1849 | ||
| @@ -2058,6 +2092,20 @@ struct btrfs_ioctl_defrag_range_args { | |||
| 2058 | #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) | 2092 | #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) |
| 2059 | #define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ | 2093 | #define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ |
| 2060 | BTRFS_MOUNT_##opt) | 2094 | BTRFS_MOUNT_##opt) |
| 2095 | #define btrfs_set_and_info(root, opt, fmt, args...) \ | ||
| 2096 | { \ | ||
| 2097 | if (!btrfs_test_opt(root, opt)) \ | ||
| 2098 | btrfs_info(root->fs_info, fmt, ##args); \ | ||
| 2099 | btrfs_set_opt(root->fs_info->mount_opt, opt); \ | ||
| 2100 | } | ||
| 2101 | |||
| 2102 | #define btrfs_clear_and_info(root, opt, fmt, args...) \ | ||
| 2103 | { \ | ||
| 2104 | if (btrfs_test_opt(root, opt)) \ | ||
| 2105 | btrfs_info(root->fs_info, fmt, ##args); \ | ||
| 2106 | btrfs_clear_opt(root->fs_info->mount_opt, opt); \ | ||
| 2107 | } | ||
| 2108 | |||
| 2061 | /* | 2109 | /* |
| 2062 | * Inode flags | 2110 | * Inode flags |
| 2063 | */ | 2111 | */ |
| @@ -2774,6 +2822,11 @@ static inline bool btrfs_root_readonly(struct btrfs_root *root) | |||
| 2774 | return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0; | 2822 | return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0; |
| 2775 | } | 2823 | } |
| 2776 | 2824 | ||
| 2825 | static inline bool btrfs_root_dead(struct btrfs_root *root) | ||
| 2826 | { | ||
| 2827 | return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0; | ||
| 2828 | } | ||
| 2829 | |||
| 2777 | /* struct btrfs_root_backup */ | 2830 | /* struct btrfs_root_backup */ |
| 2778 | BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, | 2831 | BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, |
| 2779 | tree_root, 64); | 2832 | tree_root, 64); |
| @@ -2883,6 +2936,7 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, | |||
| 2883 | cpu->vend = le64_to_cpu(disk->vend); | 2936 | cpu->vend = le64_to_cpu(disk->vend); |
| 2884 | cpu->target = le64_to_cpu(disk->target); | 2937 | cpu->target = le64_to_cpu(disk->target); |
| 2885 | cpu->flags = le64_to_cpu(disk->flags); | 2938 | cpu->flags = le64_to_cpu(disk->flags); |
| 2939 | cpu->limit = le64_to_cpu(disk->limit); | ||
| 2886 | } | 2940 | } |
| 2887 | 2941 | ||
| 2888 | static inline void | 2942 | static inline void |
| @@ -2900,6 +2954,7 @@ btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk, | |||
| 2900 | disk->vend = cpu_to_le64(cpu->vend); | 2954 | disk->vend = cpu_to_le64(cpu->vend); |
| 2901 | disk->target = cpu_to_le64(cpu->target); | 2955 | disk->target = cpu_to_le64(cpu->target); |
| 2902 | disk->flags = cpu_to_le64(cpu->flags); | 2956 | disk->flags = cpu_to_le64(cpu->flags); |
| 2957 | disk->limit = cpu_to_le64(cpu->limit); | ||
| 2903 | } | 2958 | } |
| 2904 | 2959 | ||
| 2905 | /* struct btrfs_super_block */ | 2960 | /* struct btrfs_super_block */ |
| @@ -3222,6 +3277,8 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, | |||
| 3222 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | 3277 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); |
| 3223 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 3278 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
| 3224 | struct btrfs_root *root, unsigned long count); | 3279 | struct btrfs_root *root, unsigned long count); |
| 3280 | int btrfs_async_run_delayed_refs(struct btrfs_root *root, | ||
| 3281 | unsigned long count, int wait); | ||
| 3225 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 3282 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
| 3226 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | 3283 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, |
| 3227 | struct btrfs_root *root, u64 bytenr, | 3284 | struct btrfs_root *root, u64 bytenr, |
| @@ -3261,9 +3318,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, | |||
| 3261 | u64 min_alloc_size, u64 empty_size, u64 hint_byte, | 3318 | u64 min_alloc_size, u64 empty_size, u64 hint_byte, |
| 3262 | struct btrfs_key *ins, int is_data); | 3319 | struct btrfs_key *ins, int is_data); |
| 3263 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 3320 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 3264 | struct extent_buffer *buf, int full_backref, int for_cow); | 3321 | struct extent_buffer *buf, int full_backref, int no_quota); |
| 3265 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 3322 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 3266 | struct extent_buffer *buf, int full_backref, int for_cow); | 3323 | struct extent_buffer *buf, int full_backref, int no_quota); |
| 3267 | int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, | 3324 | int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, |
| 3268 | struct btrfs_root *root, | 3325 | struct btrfs_root *root, |
| 3269 | u64 bytenr, u64 num_bytes, u64 flags, | 3326 | u64 bytenr, u64 num_bytes, u64 flags, |
| @@ -3271,7 +3328,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, | |||
| 3271 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 3328 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
| 3272 | struct btrfs_root *root, | 3329 | struct btrfs_root *root, |
| 3273 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, | 3330 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, |
| 3274 | u64 owner, u64 offset, int for_cow); | 3331 | u64 owner, u64 offset, int no_quota); |
| 3275 | 3332 | ||
| 3276 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 3333 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); |
| 3277 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, | 3334 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, |
| @@ -3283,7 +3340,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 3283 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 3340 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
| 3284 | struct btrfs_root *root, | 3341 | struct btrfs_root *root, |
| 3285 | u64 bytenr, u64 num_bytes, u64 parent, | 3342 | u64 bytenr, u64 num_bytes, u64 parent, |
| 3286 | u64 root_objectid, u64 owner, u64 offset, int for_cow); | 3343 | u64 root_objectid, u64 owner, u64 offset, int no_quota); |
| 3287 | 3344 | ||
| 3288 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 3345 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
| 3289 | struct btrfs_root *root); | 3346 | struct btrfs_root *root); |
| @@ -3371,7 +3428,6 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | |||
| 3371 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | 3428 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, |
| 3372 | struct btrfs_fs_info *fs_info); | 3429 | struct btrfs_fs_info *fs_info); |
| 3373 | int __get_raid_index(u64 flags); | 3430 | int __get_raid_index(u64 flags); |
| 3374 | |||
| 3375 | int btrfs_start_nocow_write(struct btrfs_root *root); | 3431 | int btrfs_start_nocow_write(struct btrfs_root *root); |
| 3376 | void btrfs_end_nocow_write(struct btrfs_root *root); | 3432 | void btrfs_end_nocow_write(struct btrfs_root *root); |
| 3377 | /* ctree.c */ | 3433 | /* ctree.c */ |
| @@ -3547,7 +3603,6 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | |||
| 3547 | struct seq_list *elem); | 3603 | struct seq_list *elem); |
| 3548 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | 3604 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, |
| 3549 | struct seq_list *elem); | 3605 | struct seq_list *elem); |
| 3550 | u64 btrfs_tree_mod_seq_prev(u64 seq); | ||
| 3551 | int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); | 3606 | int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); |
| 3552 | 3607 | ||
| 3553 | /* root-item.c */ | 3608 | /* root-item.c */ |
| @@ -3694,6 +3749,12 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | |||
| 3694 | struct bio *bio, u64 file_start, int contig); | 3749 | struct bio *bio, u64 file_start, int contig); |
| 3695 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 3750 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
| 3696 | struct list_head *list, int search_commit); | 3751 | struct list_head *list, int search_commit); |
| 3752 | void btrfs_extent_item_to_extent_map(struct inode *inode, | ||
| 3753 | const struct btrfs_path *path, | ||
| 3754 | struct btrfs_file_extent_item *fi, | ||
| 3755 | const bool new_inline, | ||
| 3756 | struct extent_map *em); | ||
| 3757 | |||
| 3697 | /* inode.c */ | 3758 | /* inode.c */ |
| 3698 | struct btrfs_delalloc_work { | 3759 | struct btrfs_delalloc_work { |
| 3699 | struct inode *inode; | 3760 | struct inode *inode; |
| @@ -4055,52 +4116,6 @@ void btrfs_reada_detach(void *handle); | |||
| 4055 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | 4116 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, |
| 4056 | u64 start, int err); | 4117 | u64 start, int err); |
| 4057 | 4118 | ||
| 4058 | /* qgroup.c */ | ||
| 4059 | struct qgroup_update { | ||
| 4060 | struct list_head list; | ||
| 4061 | struct btrfs_delayed_ref_node *node; | ||
| 4062 | struct btrfs_delayed_extent_op *extent_op; | ||
| 4063 | }; | ||
| 4064 | |||
| 4065 | int btrfs_quota_enable(struct btrfs_trans_handle *trans, | ||
| 4066 | struct btrfs_fs_info *fs_info); | ||
| 4067 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, | ||
| 4068 | struct btrfs_fs_info *fs_info); | ||
| 4069 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); | ||
| 4070 | void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); | ||
| 4071 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); | ||
| 4072 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | ||
| 4073 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | ||
| 4074 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | ||
| 4075 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | ||
| 4076 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, | ||
| 4077 | struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
| 4078 | char *name); | ||
| 4079 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, | ||
| 4080 | struct btrfs_fs_info *fs_info, u64 qgroupid); | ||
| 4081 | int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, | ||
| 4082 | struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
| 4083 | struct btrfs_qgroup_limit *limit); | ||
| 4084 | int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); | ||
| 4085 | void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); | ||
| 4086 | struct btrfs_delayed_extent_op; | ||
| 4087 | int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, | ||
| 4088 | struct btrfs_delayed_ref_node *node, | ||
| 4089 | struct btrfs_delayed_extent_op *extent_op); | ||
| 4090 | int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | ||
| 4091 | struct btrfs_fs_info *fs_info, | ||
| 4092 | struct btrfs_delayed_ref_node *node, | ||
| 4093 | struct btrfs_delayed_extent_op *extent_op); | ||
| 4094 | int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | ||
| 4095 | struct btrfs_fs_info *fs_info); | ||
| 4096 | int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | ||
| 4097 | struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, | ||
| 4098 | struct btrfs_qgroup_inherit *inherit); | ||
| 4099 | int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); | ||
| 4100 | void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); | ||
| 4101 | |||
| 4102 | void assert_qgroups_uptodate(struct btrfs_trans_handle *trans); | ||
| 4103 | |||
| 4104 | static inline int is_fstree(u64 rootid) | 4119 | static inline int is_fstree(u64 rootid) |
| 4105 | { | 4120 | { |
| 4106 | if (rootid == BTRFS_FS_TREE_OBJECTID || | 4121 | if (rootid == BTRFS_FS_TREE_OBJECTID || |
| @@ -4117,6 +4132,8 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info) | |||
| 4117 | /* Sanity test specific functions */ | 4132 | /* Sanity test specific functions */ |
| 4118 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 4133 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
| 4119 | void btrfs_test_destroy_inode(struct inode *inode); | 4134 | void btrfs_test_destroy_inode(struct inode *inode); |
| 4135 | int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
| 4136 | u64 rfer, u64 excl); | ||
| 4120 | #endif | 4137 | #endif |
| 4121 | 4138 | ||
| 4122 | #endif | 4139 | #endif |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 33e561a84013..da775bfdebc9 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
| @@ -149,8 +149,8 @@ again: | |||
| 149 | spin_lock(&root->inode_lock); | 149 | spin_lock(&root->inode_lock); |
| 150 | ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node); | 150 | ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node); |
| 151 | if (ret == -EEXIST) { | 151 | if (ret == -EEXIST) { |
| 152 | kmem_cache_free(delayed_node_cache, node); | ||
| 153 | spin_unlock(&root->inode_lock); | 152 | spin_unlock(&root->inode_lock); |
| 153 | kmem_cache_free(delayed_node_cache, node); | ||
| 154 | radix_tree_preload_end(); | 154 | radix_tree_preload_end(); |
| 155 | goto again; | 155 | goto again; |
| 156 | } | 156 | } |
| @@ -267,14 +267,17 @@ static void __btrfs_release_delayed_node( | |||
| 267 | mutex_unlock(&delayed_node->mutex); | 267 | mutex_unlock(&delayed_node->mutex); |
| 268 | 268 | ||
| 269 | if (atomic_dec_and_test(&delayed_node->refs)) { | 269 | if (atomic_dec_and_test(&delayed_node->refs)) { |
| 270 | bool free = false; | ||
| 270 | struct btrfs_root *root = delayed_node->root; | 271 | struct btrfs_root *root = delayed_node->root; |
| 271 | spin_lock(&root->inode_lock); | 272 | spin_lock(&root->inode_lock); |
| 272 | if (atomic_read(&delayed_node->refs) == 0) { | 273 | if (atomic_read(&delayed_node->refs) == 0) { |
| 273 | radix_tree_delete(&root->delayed_nodes_tree, | 274 | radix_tree_delete(&root->delayed_nodes_tree, |
| 274 | delayed_node->inode_id); | 275 | delayed_node->inode_id); |
| 275 | kmem_cache_free(delayed_node_cache, delayed_node); | 276 | free = true; |
| 276 | } | 277 | } |
| 277 | spin_unlock(&root->inode_lock); | 278 | spin_unlock(&root->inode_lock); |
| 279 | if (free) | ||
| 280 | kmem_cache_free(delayed_node_cache, delayed_node); | ||
| 278 | } | 281 | } |
| 279 | } | 282 | } |
| 280 | 283 | ||
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 31299646024d..6d16bea94e1c 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
| @@ -106,6 +106,10 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2, | |||
| 106 | return -1; | 106 | return -1; |
| 107 | if (ref1->type > ref2->type) | 107 | if (ref1->type > ref2->type) |
| 108 | return 1; | 108 | return 1; |
| 109 | if (ref1->no_quota > ref2->no_quota) | ||
| 110 | return 1; | ||
| 111 | if (ref1->no_quota < ref2->no_quota) | ||
| 112 | return -1; | ||
| 109 | /* merging of sequenced refs is not allowed */ | 113 | /* merging of sequenced refs is not allowed */ |
| 110 | if (compare_seq) { | 114 | if (compare_seq) { |
| 111 | if (ref1->seq < ref2->seq) | 115 | if (ref1->seq < ref2->seq) |
| @@ -635,7 +639,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
| 635 | struct btrfs_delayed_ref_head *head_ref, | 639 | struct btrfs_delayed_ref_head *head_ref, |
| 636 | struct btrfs_delayed_ref_node *ref, u64 bytenr, | 640 | struct btrfs_delayed_ref_node *ref, u64 bytenr, |
| 637 | u64 num_bytes, u64 parent, u64 ref_root, int level, | 641 | u64 num_bytes, u64 parent, u64 ref_root, int level, |
| 638 | int action, int for_cow) | 642 | int action, int no_quota) |
| 639 | { | 643 | { |
| 640 | struct btrfs_delayed_ref_node *existing; | 644 | struct btrfs_delayed_ref_node *existing; |
| 641 | struct btrfs_delayed_tree_ref *full_ref; | 645 | struct btrfs_delayed_tree_ref *full_ref; |
| @@ -645,6 +649,8 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
| 645 | if (action == BTRFS_ADD_DELAYED_EXTENT) | 649 | if (action == BTRFS_ADD_DELAYED_EXTENT) |
| 646 | action = BTRFS_ADD_DELAYED_REF; | 650 | action = BTRFS_ADD_DELAYED_REF; |
| 647 | 651 | ||
| 652 | if (is_fstree(ref_root)) | ||
| 653 | seq = atomic64_read(&fs_info->tree_mod_seq); | ||
| 648 | delayed_refs = &trans->transaction->delayed_refs; | 654 | delayed_refs = &trans->transaction->delayed_refs; |
| 649 | 655 | ||
| 650 | /* first set the basic ref node struct up */ | 656 | /* first set the basic ref node struct up */ |
| @@ -655,9 +661,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
| 655 | ref->action = action; | 661 | ref->action = action; |
| 656 | ref->is_head = 0; | 662 | ref->is_head = 0; |
| 657 | ref->in_tree = 1; | 663 | ref->in_tree = 1; |
| 658 | 664 | ref->no_quota = no_quota; | |
| 659 | if (need_ref_seq(for_cow, ref_root)) | ||
| 660 | seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem); | ||
| 661 | ref->seq = seq; | 665 | ref->seq = seq; |
| 662 | 666 | ||
| 663 | full_ref = btrfs_delayed_node_to_tree_ref(ref); | 667 | full_ref = btrfs_delayed_node_to_tree_ref(ref); |
| @@ -697,7 +701,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
| 697 | struct btrfs_delayed_ref_head *head_ref, | 701 | struct btrfs_delayed_ref_head *head_ref, |
| 698 | struct btrfs_delayed_ref_node *ref, u64 bytenr, | 702 | struct btrfs_delayed_ref_node *ref, u64 bytenr, |
| 699 | u64 num_bytes, u64 parent, u64 ref_root, u64 owner, | 703 | u64 num_bytes, u64 parent, u64 ref_root, u64 owner, |
| 700 | u64 offset, int action, int for_cow) | 704 | u64 offset, int action, int no_quota) |
| 701 | { | 705 | { |
| 702 | struct btrfs_delayed_ref_node *existing; | 706 | struct btrfs_delayed_ref_node *existing; |
| 703 | struct btrfs_delayed_data_ref *full_ref; | 707 | struct btrfs_delayed_data_ref *full_ref; |
| @@ -709,6 +713,9 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
| 709 | 713 | ||
| 710 | delayed_refs = &trans->transaction->delayed_refs; | 714 | delayed_refs = &trans->transaction->delayed_refs; |
| 711 | 715 | ||
| 716 | if (is_fstree(ref_root)) | ||
| 717 | seq = atomic64_read(&fs_info->tree_mod_seq); | ||
| 718 | |||
| 712 | /* first set the basic ref node struct up */ | 719 | /* first set the basic ref node struct up */ |
| 713 | atomic_set(&ref->refs, 1); | 720 | atomic_set(&ref->refs, 1); |
| 714 | ref->bytenr = bytenr; | 721 | ref->bytenr = bytenr; |
| @@ -717,9 +724,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
| 717 | ref->action = action; | 724 | ref->action = action; |
| 718 | ref->is_head = 0; | 725 | ref->is_head = 0; |
| 719 | ref->in_tree = 1; | 726 | ref->in_tree = 1; |
| 720 | 727 | ref->no_quota = no_quota; | |
| 721 | if (need_ref_seq(for_cow, ref_root)) | ||
| 722 | seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem); | ||
| 723 | ref->seq = seq; | 728 | ref->seq = seq; |
| 724 | 729 | ||
| 725 | full_ref = btrfs_delayed_node_to_data_ref(ref); | 730 | full_ref = btrfs_delayed_node_to_data_ref(ref); |
| @@ -762,12 +767,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
| 762 | u64 bytenr, u64 num_bytes, u64 parent, | 767 | u64 bytenr, u64 num_bytes, u64 parent, |
| 763 | u64 ref_root, int level, int action, | 768 | u64 ref_root, int level, int action, |
| 764 | struct btrfs_delayed_extent_op *extent_op, | 769 | struct btrfs_delayed_extent_op *extent_op, |
| 765 | int for_cow) | 770 | int no_quota) |
| 766 | { | 771 | { |
| 767 | struct btrfs_delayed_tree_ref *ref; | 772 | struct btrfs_delayed_tree_ref *ref; |
| 768 | struct btrfs_delayed_ref_head *head_ref; | 773 | struct btrfs_delayed_ref_head *head_ref; |
| 769 | struct btrfs_delayed_ref_root *delayed_refs; | 774 | struct btrfs_delayed_ref_root *delayed_refs; |
| 770 | 775 | ||
| 776 | if (!is_fstree(ref_root) || !fs_info->quota_enabled) | ||
| 777 | no_quota = 0; | ||
| 778 | |||
| 771 | BUG_ON(extent_op && extent_op->is_data); | 779 | BUG_ON(extent_op && extent_op->is_data); |
| 772 | ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); | 780 | ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); |
| 773 | if (!ref) | 781 | if (!ref) |
| @@ -793,10 +801,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
| 793 | 801 | ||
| 794 | add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, | 802 | add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, |
| 795 | num_bytes, parent, ref_root, level, action, | 803 | num_bytes, parent, ref_root, level, action, |
| 796 | for_cow); | 804 | no_quota); |
| 797 | spin_unlock(&delayed_refs->lock); | 805 | spin_unlock(&delayed_refs->lock); |
| 798 | if (need_ref_seq(for_cow, ref_root)) | ||
| 799 | btrfs_qgroup_record_ref(trans, &ref->node, extent_op); | ||
| 800 | 806 | ||
| 801 | return 0; | 807 | return 0; |
| 802 | } | 808 | } |
| @@ -810,12 +816,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
| 810 | u64 parent, u64 ref_root, | 816 | u64 parent, u64 ref_root, |
| 811 | u64 owner, u64 offset, int action, | 817 | u64 owner, u64 offset, int action, |
| 812 | struct btrfs_delayed_extent_op *extent_op, | 818 | struct btrfs_delayed_extent_op *extent_op, |
| 813 | int for_cow) | 819 | int no_quota) |
| 814 | { | 820 | { |
| 815 | struct btrfs_delayed_data_ref *ref; | 821 | struct btrfs_delayed_data_ref *ref; |
| 816 | struct btrfs_delayed_ref_head *head_ref; | 822 | struct btrfs_delayed_ref_head *head_ref; |
| 817 | struct btrfs_delayed_ref_root *delayed_refs; | 823 | struct btrfs_delayed_ref_root *delayed_refs; |
| 818 | 824 | ||
| 825 | if (!is_fstree(ref_root) || !fs_info->quota_enabled) | ||
| 826 | no_quota = 0; | ||
| 827 | |||
| 819 | BUG_ON(extent_op && !extent_op->is_data); | 828 | BUG_ON(extent_op && !extent_op->is_data); |
| 820 | ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); | 829 | ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); |
| 821 | if (!ref) | 830 | if (!ref) |
| @@ -841,10 +850,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
| 841 | 850 | ||
| 842 | add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, | 851 | add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, |
| 843 | num_bytes, parent, ref_root, owner, offset, | 852 | num_bytes, parent, ref_root, owner, offset, |
| 844 | action, for_cow); | 853 | action, no_quota); |
| 845 | spin_unlock(&delayed_refs->lock); | 854 | spin_unlock(&delayed_refs->lock); |
| 846 | if (need_ref_seq(for_cow, ref_root)) | ||
| 847 | btrfs_qgroup_record_ref(trans, &ref->node, extent_op); | ||
| 848 | 855 | ||
| 849 | return 0; | 856 | return 0; |
| 850 | } | 857 | } |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 4ba9b93022ff..a764e2340d48 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
| @@ -52,6 +52,7 @@ struct btrfs_delayed_ref_node { | |||
| 52 | 52 | ||
| 53 | unsigned int action:8; | 53 | unsigned int action:8; |
| 54 | unsigned int type:8; | 54 | unsigned int type:8; |
| 55 | unsigned int no_quota:1; | ||
| 55 | /* is this node still in the rbtree? */ | 56 | /* is this node still in the rbtree? */ |
| 56 | unsigned int is_head:1; | 57 | unsigned int is_head:1; |
| 57 | unsigned int in_tree:1; | 58 | unsigned int in_tree:1; |
| @@ -196,14 +197,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
| 196 | u64 bytenr, u64 num_bytes, u64 parent, | 197 | u64 bytenr, u64 num_bytes, u64 parent, |
| 197 | u64 ref_root, int level, int action, | 198 | u64 ref_root, int level, int action, |
| 198 | struct btrfs_delayed_extent_op *extent_op, | 199 | struct btrfs_delayed_extent_op *extent_op, |
| 199 | int for_cow); | 200 | int no_quota); |
| 200 | int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | 201 | int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, |
| 201 | struct btrfs_trans_handle *trans, | 202 | struct btrfs_trans_handle *trans, |
| 202 | u64 bytenr, u64 num_bytes, | 203 | u64 bytenr, u64 num_bytes, |
| 203 | u64 parent, u64 ref_root, | 204 | u64 parent, u64 ref_root, |
| 204 | u64 owner, u64 offset, int action, | 205 | u64 owner, u64 offset, int action, |
| 205 | struct btrfs_delayed_extent_op *extent_op, | 206 | struct btrfs_delayed_extent_op *extent_op, |
| 206 | int for_cow); | 207 | int no_quota); |
| 207 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | 208 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, |
| 208 | struct btrfs_trans_handle *trans, | 209 | struct btrfs_trans_handle *trans, |
| 209 | u64 bytenr, u64 num_bytes, | 210 | u64 bytenr, u64 num_bytes, |
| @@ -231,25 +232,6 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, | |||
| 231 | u64 seq); | 232 | u64 seq); |
| 232 | 233 | ||
| 233 | /* | 234 | /* |
| 234 | * delayed refs with a ref_seq > 0 must be held back during backref walking. | ||
| 235 | * this only applies to items in one of the fs-trees. for_cow items never need | ||
| 236 | * to be held back, so they won't get a ref_seq number. | ||
| 237 | */ | ||
| 238 | static inline int need_ref_seq(int for_cow, u64 rootid) | ||
| 239 | { | ||
| 240 | if (for_cow) | ||
| 241 | return 0; | ||
| 242 | |||
| 243 | if (rootid == BTRFS_FS_TREE_OBJECTID) | ||
| 244 | return 1; | ||
| 245 | |||
| 246 | if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) | ||
| 247 | return 1; | ||
| 248 | |||
| 249 | return 0; | ||
| 250 | } | ||
| 251 | |||
| 252 | /* | ||
| 253 | * a node might live in a head or a regular ref, this lets you | 235 | * a node might live in a head or a regular ref, this lets you |
| 254 | * test for the proper type to use. | 236 | * test for the proper type to use. |
| 255 | */ | 237 | */ |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 9f2290509aca..2af6e66fe788 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
| @@ -313,7 +313,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
| 313 | 313 | ||
| 314 | if (btrfs_fs_incompat(fs_info, RAID56)) { | 314 | if (btrfs_fs_incompat(fs_info, RAID56)) { |
| 315 | btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6"); | 315 | btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6"); |
| 316 | return -EINVAL; | 316 | return -EOPNOTSUPP; |
| 317 | } | 317 | } |
| 318 | 318 | ||
| 319 | switch (args->start.cont_reading_from_srcdev_mode) { | 319 | switch (args->start.cont_reading_from_srcdev_mode) { |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 029d46c2e170..8bb4aa19898f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | #include "dev-replace.h" | 49 | #include "dev-replace.h" |
| 50 | #include "raid56.h" | 50 | #include "raid56.h" |
| 51 | #include "sysfs.h" | 51 | #include "sysfs.h" |
| 52 | #include "qgroup.h" | ||
| 52 | 53 | ||
| 53 | #ifdef CONFIG_X86 | 54 | #ifdef CONFIG_X86 |
| 54 | #include <asm/cpufeature.h> | 55 | #include <asm/cpufeature.h> |
| @@ -1109,6 +1110,11 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | |||
| 1109 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | 1110 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, |
| 1110 | u64 bytenr, u32 blocksize) | 1111 | u64 bytenr, u32 blocksize) |
| 1111 | { | 1112 | { |
| 1113 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 1114 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
| 1115 | return alloc_test_extent_buffer(root->fs_info, bytenr, | ||
| 1116 | blocksize); | ||
| 1117 | #endif | ||
| 1112 | return alloc_extent_buffer(root->fs_info, bytenr, blocksize); | 1118 | return alloc_extent_buffer(root->fs_info, bytenr, blocksize); |
| 1113 | } | 1119 | } |
| 1114 | 1120 | ||
| @@ -1201,10 +1207,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1201 | root->nodesize = nodesize; | 1207 | root->nodesize = nodesize; |
| 1202 | root->leafsize = leafsize; | 1208 | root->leafsize = leafsize; |
| 1203 | root->stripesize = stripesize; | 1209 | root->stripesize = stripesize; |
| 1204 | root->ref_cows = 0; | 1210 | root->state = 0; |
| 1205 | root->track_dirty = 0; | ||
| 1206 | root->in_radix = 0; | ||
| 1207 | root->orphan_item_inserted = 0; | ||
| 1208 | root->orphan_cleanup_state = 0; | 1211 | root->orphan_cleanup_state = 0; |
| 1209 | 1212 | ||
| 1210 | root->objectid = objectid; | 1213 | root->objectid = objectid; |
| @@ -1265,7 +1268,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1265 | else | 1268 | else |
| 1266 | root->defrag_trans_start = 0; | 1269 | root->defrag_trans_start = 0; |
| 1267 | init_completion(&root->kobj_unregister); | 1270 | init_completion(&root->kobj_unregister); |
| 1268 | root->defrag_running = 0; | ||
| 1269 | root->root_key.objectid = objectid; | 1271 | root->root_key.objectid = objectid; |
| 1270 | root->anon_dev = 0; | 1272 | root->anon_dev = 0; |
| 1271 | 1273 | ||
| @@ -1290,7 +1292,8 @@ struct btrfs_root *btrfs_alloc_dummy_root(void) | |||
| 1290 | if (!root) | 1292 | if (!root) |
| 1291 | return ERR_PTR(-ENOMEM); | 1293 | return ERR_PTR(-ENOMEM); |
| 1292 | __setup_root(4096, 4096, 4096, 4096, root, NULL, 1); | 1294 | __setup_root(4096, 4096, 4096, 4096, root, NULL, 1); |
| 1293 | root->dummy_root = 1; | 1295 | set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); |
| 1296 | root->alloc_bytenr = 0; | ||
| 1294 | 1297 | ||
| 1295 | return root; | 1298 | return root; |
| 1296 | } | 1299 | } |
| @@ -1341,8 +1344,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | |||
| 1341 | btrfs_mark_buffer_dirty(leaf); | 1344 | btrfs_mark_buffer_dirty(leaf); |
| 1342 | 1345 | ||
| 1343 | root->commit_root = btrfs_root_node(root); | 1346 | root->commit_root = btrfs_root_node(root); |
| 1344 | root->track_dirty = 1; | 1347 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); |
| 1345 | |||
| 1346 | 1348 | ||
| 1347 | root->root_item.flags = 0; | 1349 | root->root_item.flags = 0; |
| 1348 | root->root_item.byte_limit = 0; | 1350 | root->root_item.byte_limit = 0; |
| @@ -1371,6 +1373,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | |||
| 1371 | fail: | 1373 | fail: |
| 1372 | if (leaf) { | 1374 | if (leaf) { |
| 1373 | btrfs_tree_unlock(leaf); | 1375 | btrfs_tree_unlock(leaf); |
| 1376 | free_extent_buffer(root->commit_root); | ||
| 1374 | free_extent_buffer(leaf); | 1377 | free_extent_buffer(leaf); |
| 1375 | } | 1378 | } |
| 1376 | kfree(root); | 1379 | kfree(root); |
| @@ -1396,13 +1399,15 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
| 1396 | root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; | 1399 | root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; |
| 1397 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; | 1400 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; |
| 1398 | root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; | 1401 | root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; |
| 1402 | |||
| 1399 | /* | 1403 | /* |
| 1404 | * DON'T set REF_COWS for log trees | ||
| 1405 | * | ||
| 1400 | * log trees do not get reference counted because they go away | 1406 | * log trees do not get reference counted because they go away |
| 1401 | * before a real commit is actually done. They do store pointers | 1407 | * before a real commit is actually done. They do store pointers |
| 1402 | * to file data extents, and those reference counts still get | 1408 | * to file data extents, and those reference counts still get |
| 1403 | * updated (along with back refs to the log tree). | 1409 | * updated (along with back refs to the log tree). |
| 1404 | */ | 1410 | */ |
| 1405 | root->ref_cows = 0; | ||
| 1406 | 1411 | ||
| 1407 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 1412 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
| 1408 | BTRFS_TREE_LOG_OBJECTID, NULL, | 1413 | BTRFS_TREE_LOG_OBJECTID, NULL, |
| @@ -1536,7 +1541,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | |||
| 1536 | return root; | 1541 | return root; |
| 1537 | 1542 | ||
| 1538 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | 1543 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
| 1539 | root->ref_cows = 1; | 1544 | set_bit(BTRFS_ROOT_REF_COWS, &root->state); |
| 1540 | btrfs_check_and_init_root_item(&root->root_item); | 1545 | btrfs_check_and_init_root_item(&root->root_item); |
| 1541 | } | 1546 | } |
| 1542 | 1547 | ||
| @@ -1606,7 +1611,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, | |||
| 1606 | (unsigned long)root->root_key.objectid, | 1611 | (unsigned long)root->root_key.objectid, |
| 1607 | root); | 1612 | root); |
| 1608 | if (ret == 0) | 1613 | if (ret == 0) |
| 1609 | root->in_radix = 1; | 1614 | set_bit(BTRFS_ROOT_IN_RADIX, &root->state); |
| 1610 | spin_unlock(&fs_info->fs_roots_radix_lock); | 1615 | spin_unlock(&fs_info->fs_roots_radix_lock); |
| 1611 | radix_tree_preload_end(); | 1616 | radix_tree_preload_end(); |
| 1612 | 1617 | ||
| @@ -1662,7 +1667,7 @@ again: | |||
| 1662 | if (ret < 0) | 1667 | if (ret < 0) |
| 1663 | goto fail; | 1668 | goto fail; |
| 1664 | if (ret == 0) | 1669 | if (ret == 0) |
| 1665 | root->orphan_item_inserted = 1; | 1670 | set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); |
| 1666 | 1671 | ||
| 1667 | ret = btrfs_insert_fs_root(fs_info, root); | 1672 | ret = btrfs_insert_fs_root(fs_info, root); |
| 1668 | if (ret) { | 1673 | if (ret) { |
| @@ -2064,6 +2069,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) | |||
| 2064 | btrfs_destroy_workqueue(fs_info->readahead_workers); | 2069 | btrfs_destroy_workqueue(fs_info->readahead_workers); |
| 2065 | btrfs_destroy_workqueue(fs_info->flush_workers); | 2070 | btrfs_destroy_workqueue(fs_info->flush_workers); |
| 2066 | btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); | 2071 | btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); |
| 2072 | btrfs_destroy_workqueue(fs_info->extent_workers); | ||
| 2067 | } | 2073 | } |
| 2068 | 2074 | ||
| 2069 | static void free_root_extent_buffers(struct btrfs_root *root) | 2075 | static void free_root_extent_buffers(struct btrfs_root *root) |
| @@ -2090,7 +2096,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) | |||
| 2090 | free_root_extent_buffers(info->chunk_root); | 2096 | free_root_extent_buffers(info->chunk_root); |
| 2091 | } | 2097 | } |
| 2092 | 2098 | ||
| 2093 | static void del_fs_roots(struct btrfs_fs_info *fs_info) | 2099 | void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) |
| 2094 | { | 2100 | { |
| 2095 | int ret; | 2101 | int ret; |
| 2096 | struct btrfs_root *gang[8]; | 2102 | struct btrfs_root *gang[8]; |
| @@ -2101,7 +2107,7 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info) | |||
| 2101 | struct btrfs_root, root_list); | 2107 | struct btrfs_root, root_list); |
| 2102 | list_del(&gang[0]->root_list); | 2108 | list_del(&gang[0]->root_list); |
| 2103 | 2109 | ||
| 2104 | if (gang[0]->in_radix) { | 2110 | if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state)) { |
| 2105 | btrfs_drop_and_free_fs_root(fs_info, gang[0]); | 2111 | btrfs_drop_and_free_fs_root(fs_info, gang[0]); |
| 2106 | } else { | 2112 | } else { |
| 2107 | free_extent_buffer(gang[0]->node); | 2113 | free_extent_buffer(gang[0]->node); |
| @@ -2221,6 +2227,7 @@ int open_ctree(struct super_block *sb, | |||
| 2221 | spin_lock_init(&fs_info->free_chunk_lock); | 2227 | spin_lock_init(&fs_info->free_chunk_lock); |
| 2222 | spin_lock_init(&fs_info->tree_mod_seq_lock); | 2228 | spin_lock_init(&fs_info->tree_mod_seq_lock); |
| 2223 | spin_lock_init(&fs_info->super_lock); | 2229 | spin_lock_init(&fs_info->super_lock); |
| 2230 | spin_lock_init(&fs_info->qgroup_op_lock); | ||
| 2224 | spin_lock_init(&fs_info->buffer_lock); | 2231 | spin_lock_init(&fs_info->buffer_lock); |
| 2225 | rwlock_init(&fs_info->tree_mod_log_lock); | 2232 | rwlock_init(&fs_info->tree_mod_log_lock); |
| 2226 | mutex_init(&fs_info->reloc_mutex); | 2233 | mutex_init(&fs_info->reloc_mutex); |
| @@ -2246,6 +2253,7 @@ int open_ctree(struct super_block *sb, | |||
| 2246 | atomic_set(&fs_info->async_submit_draining, 0); | 2253 | atomic_set(&fs_info->async_submit_draining, 0); |
| 2247 | atomic_set(&fs_info->nr_async_bios, 0); | 2254 | atomic_set(&fs_info->nr_async_bios, 0); |
| 2248 | atomic_set(&fs_info->defrag_running, 0); | 2255 | atomic_set(&fs_info->defrag_running, 0); |
| 2256 | atomic_set(&fs_info->qgroup_op_seq, 0); | ||
| 2249 | atomic64_set(&fs_info->tree_mod_seq, 0); | 2257 | atomic64_set(&fs_info->tree_mod_seq, 0); |
| 2250 | fs_info->sb = sb; | 2258 | fs_info->sb = sb; |
| 2251 | fs_info->max_inline = 8192 * 1024; | 2259 | fs_info->max_inline = 8192 * 1024; |
| @@ -2291,6 +2299,7 @@ int open_ctree(struct super_block *sb, | |||
| 2291 | atomic_set(&fs_info->balance_cancel_req, 0); | 2299 | atomic_set(&fs_info->balance_cancel_req, 0); |
| 2292 | fs_info->balance_ctl = NULL; | 2300 | fs_info->balance_ctl = NULL; |
| 2293 | init_waitqueue_head(&fs_info->balance_wait_q); | 2301 | init_waitqueue_head(&fs_info->balance_wait_q); |
| 2302 | btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work); | ||
| 2294 | 2303 | ||
| 2295 | sb->s_blocksize = 4096; | 2304 | sb->s_blocksize = 4096; |
| 2296 | sb->s_blocksize_bits = blksize_bits(4096); | 2305 | sb->s_blocksize_bits = blksize_bits(4096); |
| @@ -2354,6 +2363,7 @@ int open_ctree(struct super_block *sb, | |||
| 2354 | spin_lock_init(&fs_info->qgroup_lock); | 2363 | spin_lock_init(&fs_info->qgroup_lock); |
| 2355 | mutex_init(&fs_info->qgroup_ioctl_lock); | 2364 | mutex_init(&fs_info->qgroup_ioctl_lock); |
| 2356 | fs_info->qgroup_tree = RB_ROOT; | 2365 | fs_info->qgroup_tree = RB_ROOT; |
| 2366 | fs_info->qgroup_op_tree = RB_ROOT; | ||
| 2357 | INIT_LIST_HEAD(&fs_info->dirty_qgroups); | 2367 | INIT_LIST_HEAD(&fs_info->dirty_qgroups); |
| 2358 | fs_info->qgroup_seq = 1; | 2368 | fs_info->qgroup_seq = 1; |
| 2359 | fs_info->quota_enabled = 0; | 2369 | fs_info->quota_enabled = 0; |
| @@ -2577,6 +2587,10 @@ int open_ctree(struct super_block *sb, | |||
| 2577 | btrfs_alloc_workqueue("readahead", flags, max_active, 2); | 2587 | btrfs_alloc_workqueue("readahead", flags, max_active, 2); |
| 2578 | fs_info->qgroup_rescan_workers = | 2588 | fs_info->qgroup_rescan_workers = |
| 2579 | btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); | 2589 | btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); |
| 2590 | fs_info->extent_workers = | ||
| 2591 | btrfs_alloc_workqueue("extent-refs", flags, | ||
| 2592 | min_t(u64, fs_devices->num_devices, | ||
| 2593 | max_active), 8); | ||
| 2580 | 2594 | ||
| 2581 | if (!(fs_info->workers && fs_info->delalloc_workers && | 2595 | if (!(fs_info->workers && fs_info->delalloc_workers && |
| 2582 | fs_info->submit_workers && fs_info->flush_workers && | 2596 | fs_info->submit_workers && fs_info->flush_workers && |
| @@ -2586,6 +2600,7 @@ int open_ctree(struct super_block *sb, | |||
| 2586 | fs_info->endio_freespace_worker && fs_info->rmw_workers && | 2600 | fs_info->endio_freespace_worker && fs_info->rmw_workers && |
| 2587 | fs_info->caching_workers && fs_info->readahead_workers && | 2601 | fs_info->caching_workers && fs_info->readahead_workers && |
| 2588 | fs_info->fixup_workers && fs_info->delayed_workers && | 2602 | fs_info->fixup_workers && fs_info->delayed_workers && |
| 2603 | fs_info->fixup_workers && fs_info->extent_workers && | ||
| 2589 | fs_info->qgroup_rescan_workers)) { | 2604 | fs_info->qgroup_rescan_workers)) { |
| 2590 | err = -ENOMEM; | 2605 | err = -ENOMEM; |
| 2591 | goto fail_sb_buffer; | 2606 | goto fail_sb_buffer; |
| @@ -2693,7 +2708,7 @@ retry_root_backup: | |||
| 2693 | ret = PTR_ERR(extent_root); | 2708 | ret = PTR_ERR(extent_root); |
| 2694 | goto recovery_tree_root; | 2709 | goto recovery_tree_root; |
| 2695 | } | 2710 | } |
| 2696 | extent_root->track_dirty = 1; | 2711 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &extent_root->state); |
| 2697 | fs_info->extent_root = extent_root; | 2712 | fs_info->extent_root = extent_root; |
| 2698 | 2713 | ||
| 2699 | location.objectid = BTRFS_DEV_TREE_OBJECTID; | 2714 | location.objectid = BTRFS_DEV_TREE_OBJECTID; |
| @@ -2702,7 +2717,7 @@ retry_root_backup: | |||
| 2702 | ret = PTR_ERR(dev_root); | 2717 | ret = PTR_ERR(dev_root); |
| 2703 | goto recovery_tree_root; | 2718 | goto recovery_tree_root; |
| 2704 | } | 2719 | } |
| 2705 | dev_root->track_dirty = 1; | 2720 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &dev_root->state); |
| 2706 | fs_info->dev_root = dev_root; | 2721 | fs_info->dev_root = dev_root; |
| 2707 | btrfs_init_devices_late(fs_info); | 2722 | btrfs_init_devices_late(fs_info); |
| 2708 | 2723 | ||
| @@ -2712,13 +2727,13 @@ retry_root_backup: | |||
| 2712 | ret = PTR_ERR(csum_root); | 2727 | ret = PTR_ERR(csum_root); |
| 2713 | goto recovery_tree_root; | 2728 | goto recovery_tree_root; |
| 2714 | } | 2729 | } |
| 2715 | csum_root->track_dirty = 1; | 2730 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &csum_root->state); |
| 2716 | fs_info->csum_root = csum_root; | 2731 | fs_info->csum_root = csum_root; |
| 2717 | 2732 | ||
| 2718 | location.objectid = BTRFS_QUOTA_TREE_OBJECTID; | 2733 | location.objectid = BTRFS_QUOTA_TREE_OBJECTID; |
| 2719 | quota_root = btrfs_read_tree_root(tree_root, &location); | 2734 | quota_root = btrfs_read_tree_root(tree_root, &location); |
| 2720 | if (!IS_ERR(quota_root)) { | 2735 | if (!IS_ERR(quota_root)) { |
| 2721 | quota_root->track_dirty = 1; | 2736 | set_bit(BTRFS_ROOT_TRACK_DIRTY, "a_root->state); |
| 2722 | fs_info->quota_enabled = 1; | 2737 | fs_info->quota_enabled = 1; |
| 2723 | fs_info->pending_quota_state = 1; | 2738 | fs_info->pending_quota_state = 1; |
| 2724 | fs_info->quota_root = quota_root; | 2739 | fs_info->quota_root = quota_root; |
| @@ -2733,7 +2748,7 @@ retry_root_backup: | |||
| 2733 | create_uuid_tree = true; | 2748 | create_uuid_tree = true; |
| 2734 | check_uuid_tree = false; | 2749 | check_uuid_tree = false; |
| 2735 | } else { | 2750 | } else { |
| 2736 | uuid_root->track_dirty = 1; | 2751 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &uuid_root->state); |
| 2737 | fs_info->uuid_root = uuid_root; | 2752 | fs_info->uuid_root = uuid_root; |
| 2738 | create_uuid_tree = false; | 2753 | create_uuid_tree = false; |
| 2739 | check_uuid_tree = | 2754 | check_uuid_tree = |
| @@ -2861,7 +2876,7 @@ retry_root_backup: | |||
| 2861 | printk(KERN_ERR "BTRFS: failed to read log tree\n"); | 2876 | printk(KERN_ERR "BTRFS: failed to read log tree\n"); |
| 2862 | free_extent_buffer(log_tree_root->node); | 2877 | free_extent_buffer(log_tree_root->node); |
| 2863 | kfree(log_tree_root); | 2878 | kfree(log_tree_root); |
| 2864 | goto fail_trans_kthread; | 2879 | goto fail_qgroup; |
| 2865 | } | 2880 | } |
| 2866 | /* returns with log_tree_root freed on success */ | 2881 | /* returns with log_tree_root freed on success */ |
| 2867 | ret = btrfs_recover_log_trees(log_tree_root); | 2882 | ret = btrfs_recover_log_trees(log_tree_root); |
| @@ -2870,24 +2885,24 @@ retry_root_backup: | |||
| 2870 | "Failed to recover log tree"); | 2885 | "Failed to recover log tree"); |
| 2871 | free_extent_buffer(log_tree_root->node); | 2886 | free_extent_buffer(log_tree_root->node); |
| 2872 | kfree(log_tree_root); | 2887 | kfree(log_tree_root); |
| 2873 | goto fail_trans_kthread; | 2888 | goto fail_qgroup; |
| 2874 | } | 2889 | } |
| 2875 | 2890 | ||
| 2876 | if (sb->s_flags & MS_RDONLY) { | 2891 | if (sb->s_flags & MS_RDONLY) { |
| 2877 | ret = btrfs_commit_super(tree_root); | 2892 | ret = btrfs_commit_super(tree_root); |
| 2878 | if (ret) | 2893 | if (ret) |
| 2879 | goto fail_trans_kthread; | 2894 | goto fail_qgroup; |
| 2880 | } | 2895 | } |
| 2881 | } | 2896 | } |
| 2882 | 2897 | ||
| 2883 | ret = btrfs_find_orphan_roots(tree_root); | 2898 | ret = btrfs_find_orphan_roots(tree_root); |
| 2884 | if (ret) | 2899 | if (ret) |
| 2885 | goto fail_trans_kthread; | 2900 | goto fail_qgroup; |
| 2886 | 2901 | ||
| 2887 | if (!(sb->s_flags & MS_RDONLY)) { | 2902 | if (!(sb->s_flags & MS_RDONLY)) { |
| 2888 | ret = btrfs_cleanup_fs_roots(fs_info); | 2903 | ret = btrfs_cleanup_fs_roots(fs_info); |
| 2889 | if (ret) | 2904 | if (ret) |
| 2890 | goto fail_trans_kthread; | 2905 | goto fail_qgroup; |
| 2891 | 2906 | ||
| 2892 | ret = btrfs_recover_relocation(tree_root); | 2907 | ret = btrfs_recover_relocation(tree_root); |
| 2893 | if (ret < 0) { | 2908 | if (ret < 0) { |
| @@ -2966,7 +2981,7 @@ fail_qgroup: | |||
| 2966 | fail_trans_kthread: | 2981 | fail_trans_kthread: |
| 2967 | kthread_stop(fs_info->transaction_kthread); | 2982 | kthread_stop(fs_info->transaction_kthread); |
| 2968 | btrfs_cleanup_transaction(fs_info->tree_root); | 2983 | btrfs_cleanup_transaction(fs_info->tree_root); |
| 2969 | del_fs_roots(fs_info); | 2984 | btrfs_free_fs_roots(fs_info); |
| 2970 | fail_cleaner: | 2985 | fail_cleaner: |
| 2971 | kthread_stop(fs_info->cleaner_kthread); | 2986 | kthread_stop(fs_info->cleaner_kthread); |
| 2972 | 2987 | ||
| @@ -3501,8 +3516,10 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, | |||
| 3501 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) | 3516 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) |
| 3502 | btrfs_free_log(NULL, root); | 3517 | btrfs_free_log(NULL, root); |
| 3503 | 3518 | ||
| 3504 | __btrfs_remove_free_space_cache(root->free_ino_pinned); | 3519 | if (root->free_ino_pinned) |
| 3505 | __btrfs_remove_free_space_cache(root->free_ino_ctl); | 3520 | __btrfs_remove_free_space_cache(root->free_ino_pinned); |
| 3521 | if (root->free_ino_ctl) | ||
| 3522 | __btrfs_remove_free_space_cache(root->free_ino_ctl); | ||
| 3506 | free_fs_root(root); | 3523 | free_fs_root(root); |
| 3507 | } | 3524 | } |
| 3508 | 3525 | ||
| @@ -3533,28 +3550,51 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
| 3533 | { | 3550 | { |
| 3534 | u64 root_objectid = 0; | 3551 | u64 root_objectid = 0; |
| 3535 | struct btrfs_root *gang[8]; | 3552 | struct btrfs_root *gang[8]; |
| 3536 | int i; | 3553 | int i = 0; |
| 3537 | int ret; | 3554 | int err = 0; |
| 3555 | unsigned int ret = 0; | ||
| 3556 | int index; | ||
| 3538 | 3557 | ||
| 3539 | while (1) { | 3558 | while (1) { |
| 3559 | index = srcu_read_lock(&fs_info->subvol_srcu); | ||
| 3540 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, | 3560 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, |
| 3541 | (void **)gang, root_objectid, | 3561 | (void **)gang, root_objectid, |
| 3542 | ARRAY_SIZE(gang)); | 3562 | ARRAY_SIZE(gang)); |
| 3543 | if (!ret) | 3563 | if (!ret) { |
| 3564 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
| 3544 | break; | 3565 | break; |
| 3545 | 3566 | } | |
| 3546 | root_objectid = gang[ret - 1]->root_key.objectid + 1; | 3567 | root_objectid = gang[ret - 1]->root_key.objectid + 1; |
| 3568 | |||
| 3547 | for (i = 0; i < ret; i++) { | 3569 | for (i = 0; i < ret; i++) { |
| 3548 | int err; | 3570 | /* Avoid to grab roots in dead_roots */ |
| 3571 | if (btrfs_root_refs(&gang[i]->root_item) == 0) { | ||
| 3572 | gang[i] = NULL; | ||
| 3573 | continue; | ||
| 3574 | } | ||
| 3575 | /* grab all the search result for later use */ | ||
| 3576 | gang[i] = btrfs_grab_fs_root(gang[i]); | ||
| 3577 | } | ||
| 3578 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
| 3549 | 3579 | ||
| 3580 | for (i = 0; i < ret; i++) { | ||
| 3581 | if (!gang[i]) | ||
| 3582 | continue; | ||
| 3550 | root_objectid = gang[i]->root_key.objectid; | 3583 | root_objectid = gang[i]->root_key.objectid; |
| 3551 | err = btrfs_orphan_cleanup(gang[i]); | 3584 | err = btrfs_orphan_cleanup(gang[i]); |
| 3552 | if (err) | 3585 | if (err) |
| 3553 | return err; | 3586 | break; |
| 3587 | btrfs_put_fs_root(gang[i]); | ||
| 3554 | } | 3588 | } |
| 3555 | root_objectid++; | 3589 | root_objectid++; |
| 3556 | } | 3590 | } |
| 3557 | return 0; | 3591 | |
| 3592 | /* release the uncleaned roots due to error */ | ||
| 3593 | for (; i < ret; i++) { | ||
| 3594 | if (gang[i]) | ||
| 3595 | btrfs_put_fs_root(gang[i]); | ||
| 3596 | } | ||
| 3597 | return err; | ||
| 3558 | } | 3598 | } |
| 3559 | 3599 | ||
| 3560 | int btrfs_commit_super(struct btrfs_root *root) | 3600 | int btrfs_commit_super(struct btrfs_root *root) |
| @@ -3603,6 +3643,8 @@ int close_ctree(struct btrfs_root *root) | |||
| 3603 | /* clear out the rbtree of defraggable inodes */ | 3643 | /* clear out the rbtree of defraggable inodes */ |
| 3604 | btrfs_cleanup_defrag_inodes(fs_info); | 3644 | btrfs_cleanup_defrag_inodes(fs_info); |
| 3605 | 3645 | ||
| 3646 | cancel_work_sync(&fs_info->async_reclaim_work); | ||
| 3647 | |||
| 3606 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 3648 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
| 3607 | ret = btrfs_commit_super(root); | 3649 | ret = btrfs_commit_super(root); |
| 3608 | if (ret) | 3650 | if (ret) |
| @@ -3627,12 +3669,17 @@ int close_ctree(struct btrfs_root *root) | |||
| 3627 | 3669 | ||
| 3628 | btrfs_sysfs_remove_one(fs_info); | 3670 | btrfs_sysfs_remove_one(fs_info); |
| 3629 | 3671 | ||
| 3630 | del_fs_roots(fs_info); | 3672 | btrfs_free_fs_roots(fs_info); |
| 3631 | 3673 | ||
| 3632 | btrfs_put_block_group_cache(fs_info); | 3674 | btrfs_put_block_group_cache(fs_info); |
| 3633 | 3675 | ||
| 3634 | btrfs_free_block_groups(fs_info); | 3676 | btrfs_free_block_groups(fs_info); |
| 3635 | 3677 | ||
| 3678 | /* | ||
| 3679 | * we must make sure there is not any read request to | ||
| 3680 | * submit after we stopping all workers. | ||
| 3681 | */ | ||
| 3682 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | ||
| 3636 | btrfs_stop_all_workers(fs_info); | 3683 | btrfs_stop_all_workers(fs_info); |
| 3637 | 3684 | ||
| 3638 | free_root_pointers(fs_info, 1); | 3685 | free_root_pointers(fs_info, 1); |
| @@ -3709,6 +3756,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
| 3709 | __percpu_counter_add(&root->fs_info->dirty_metadata_bytes, | 3756 | __percpu_counter_add(&root->fs_info->dirty_metadata_bytes, |
| 3710 | buf->len, | 3757 | buf->len, |
| 3711 | root->fs_info->dirty_metadata_batch); | 3758 | root->fs_info->dirty_metadata_batch); |
| 3759 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | ||
| 3760 | if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { | ||
| 3761 | btrfs_print_leaf(root, buf); | ||
| 3762 | ASSERT(0); | ||
| 3763 | } | ||
| 3764 | #endif | ||
| 3712 | } | 3765 | } |
| 3713 | 3766 | ||
| 3714 | static void __btrfs_btree_balance_dirty(struct btrfs_root *root, | 3767 | static void __btrfs_btree_balance_dirty(struct btrfs_root *root, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 53059df350f8..23ce3ceba0a9 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
| @@ -68,6 +68,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | |||
| 68 | int btrfs_init_fs_root(struct btrfs_root *root); | 68 | int btrfs_init_fs_root(struct btrfs_root *root); |
| 69 | int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, | 69 | int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, |
| 70 | struct btrfs_root *root); | 70 | struct btrfs_root *root); |
| 71 | void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); | ||
| 71 | 72 | ||
| 72 | struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, | 73 | struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, |
| 73 | struct btrfs_key *key, | 74 | struct btrfs_key *key, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1306487c82cf..fafb3e53ecde 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -26,16 +26,16 @@ | |||
| 26 | #include <linux/ratelimit.h> | 26 | #include <linux/ratelimit.h> |
| 27 | #include <linux/percpu_counter.h> | 27 | #include <linux/percpu_counter.h> |
| 28 | #include "hash.h" | 28 | #include "hash.h" |
| 29 | #include "ctree.h" | 29 | #include "tree-log.h" |
| 30 | #include "disk-io.h" | 30 | #include "disk-io.h" |
| 31 | #include "print-tree.h" | 31 | #include "print-tree.h" |
| 32 | #include "transaction.h" | ||
| 33 | #include "volumes.h" | 32 | #include "volumes.h" |
| 34 | #include "raid56.h" | 33 | #include "raid56.h" |
| 35 | #include "locking.h" | 34 | #include "locking.h" |
| 36 | #include "free-space-cache.h" | 35 | #include "free-space-cache.h" |
| 37 | #include "math.h" | 36 | #include "math.h" |
| 38 | #include "sysfs.h" | 37 | #include "sysfs.h" |
| 38 | #include "qgroup.h" | ||
| 39 | 39 | ||
| 40 | #undef SCRAMBLE_DELAYED_REFS | 40 | #undef SCRAMBLE_DELAYED_REFS |
| 41 | 41 | ||
| @@ -81,7 +81,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 81 | u64 bytenr, u64 num_bytes, u64 parent, | 81 | u64 bytenr, u64 num_bytes, u64 parent, |
| 82 | u64 root_objectid, u64 owner_objectid, | 82 | u64 root_objectid, u64 owner_objectid, |
| 83 | u64 owner_offset, int refs_to_drop, | 83 | u64 owner_offset, int refs_to_drop, |
| 84 | struct btrfs_delayed_extent_op *extra_op); | 84 | struct btrfs_delayed_extent_op *extra_op, |
| 85 | int no_quota); | ||
| 85 | static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, | 86 | static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, |
| 86 | struct extent_buffer *leaf, | 87 | struct extent_buffer *leaf, |
| 87 | struct btrfs_extent_item *ei); | 88 | struct btrfs_extent_item *ei); |
| @@ -94,7 +95,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
| 94 | struct btrfs_root *root, | 95 | struct btrfs_root *root, |
| 95 | u64 parent, u64 root_objectid, | 96 | u64 parent, u64 root_objectid, |
| 96 | u64 flags, struct btrfs_disk_key *key, | 97 | u64 flags, struct btrfs_disk_key *key, |
| 97 | int level, struct btrfs_key *ins); | 98 | int level, struct btrfs_key *ins, |
| 99 | int no_quota); | ||
| 98 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 100 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
| 99 | struct btrfs_root *extent_root, u64 flags, | 101 | struct btrfs_root *extent_root, u64 flags, |
| 100 | int force); | 102 | int force); |
| @@ -1271,7 +1273,7 @@ fail: | |||
| 1271 | static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, | 1273 | static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, |
| 1272 | struct btrfs_root *root, | 1274 | struct btrfs_root *root, |
| 1273 | struct btrfs_path *path, | 1275 | struct btrfs_path *path, |
| 1274 | int refs_to_drop) | 1276 | int refs_to_drop, int *last_ref) |
| 1275 | { | 1277 | { |
| 1276 | struct btrfs_key key; | 1278 | struct btrfs_key key; |
| 1277 | struct btrfs_extent_data_ref *ref1 = NULL; | 1279 | struct btrfs_extent_data_ref *ref1 = NULL; |
| @@ -1307,6 +1309,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, | |||
| 1307 | 1309 | ||
| 1308 | if (num_refs == 0) { | 1310 | if (num_refs == 0) { |
| 1309 | ret = btrfs_del_item(trans, root, path); | 1311 | ret = btrfs_del_item(trans, root, path); |
| 1312 | *last_ref = 1; | ||
| 1310 | } else { | 1313 | } else { |
| 1311 | if (key.type == BTRFS_EXTENT_DATA_REF_KEY) | 1314 | if (key.type == BTRFS_EXTENT_DATA_REF_KEY) |
| 1312 | btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); | 1315 | btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); |
| @@ -1542,6 +1545,7 @@ again: | |||
| 1542 | ret = 0; | 1545 | ret = 0; |
| 1543 | } | 1546 | } |
| 1544 | if (ret) { | 1547 | if (ret) { |
| 1548 | key.objectid = bytenr; | ||
| 1545 | key.type = BTRFS_EXTENT_ITEM_KEY; | 1549 | key.type = BTRFS_EXTENT_ITEM_KEY; |
| 1546 | key.offset = num_bytes; | 1550 | key.offset = num_bytes; |
| 1547 | btrfs_release_path(path); | 1551 | btrfs_release_path(path); |
| @@ -1763,7 +1767,8 @@ void update_inline_extent_backref(struct btrfs_root *root, | |||
| 1763 | struct btrfs_path *path, | 1767 | struct btrfs_path *path, |
| 1764 | struct btrfs_extent_inline_ref *iref, | 1768 | struct btrfs_extent_inline_ref *iref, |
| 1765 | int refs_to_mod, | 1769 | int refs_to_mod, |
| 1766 | struct btrfs_delayed_extent_op *extent_op) | 1770 | struct btrfs_delayed_extent_op *extent_op, |
| 1771 | int *last_ref) | ||
| 1767 | { | 1772 | { |
| 1768 | struct extent_buffer *leaf; | 1773 | struct extent_buffer *leaf; |
| 1769 | struct btrfs_extent_item *ei; | 1774 | struct btrfs_extent_item *ei; |
| @@ -1807,6 +1812,7 @@ void update_inline_extent_backref(struct btrfs_root *root, | |||
| 1807 | else | 1812 | else |
| 1808 | btrfs_set_shared_data_ref_count(leaf, sref, refs); | 1813 | btrfs_set_shared_data_ref_count(leaf, sref, refs); |
| 1809 | } else { | 1814 | } else { |
| 1815 | *last_ref = 1; | ||
| 1810 | size = btrfs_extent_inline_ref_size(type); | 1816 | size = btrfs_extent_inline_ref_size(type); |
| 1811 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | 1817 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); |
| 1812 | ptr = (unsigned long)iref; | 1818 | ptr = (unsigned long)iref; |
| @@ -1838,7 +1844,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans, | |||
| 1838 | if (ret == 0) { | 1844 | if (ret == 0) { |
| 1839 | BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); | 1845 | BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); |
| 1840 | update_inline_extent_backref(root, path, iref, | 1846 | update_inline_extent_backref(root, path, iref, |
| 1841 | refs_to_add, extent_op); | 1847 | refs_to_add, extent_op, NULL); |
| 1842 | } else if (ret == -ENOENT) { | 1848 | } else if (ret == -ENOENT) { |
| 1843 | setup_inline_extent_backref(root, path, iref, parent, | 1849 | setup_inline_extent_backref(root, path, iref, parent, |
| 1844 | root_objectid, owner, offset, | 1850 | root_objectid, owner, offset, |
| @@ -1871,17 +1877,19 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
| 1871 | struct btrfs_root *root, | 1877 | struct btrfs_root *root, |
| 1872 | struct btrfs_path *path, | 1878 | struct btrfs_path *path, |
| 1873 | struct btrfs_extent_inline_ref *iref, | 1879 | struct btrfs_extent_inline_ref *iref, |
| 1874 | int refs_to_drop, int is_data) | 1880 | int refs_to_drop, int is_data, int *last_ref) |
| 1875 | { | 1881 | { |
| 1876 | int ret = 0; | 1882 | int ret = 0; |
| 1877 | 1883 | ||
| 1878 | BUG_ON(!is_data && refs_to_drop != 1); | 1884 | BUG_ON(!is_data && refs_to_drop != 1); |
| 1879 | if (iref) { | 1885 | if (iref) { |
| 1880 | update_inline_extent_backref(root, path, iref, | 1886 | update_inline_extent_backref(root, path, iref, |
| 1881 | -refs_to_drop, NULL); | 1887 | -refs_to_drop, NULL, last_ref); |
| 1882 | } else if (is_data) { | 1888 | } else if (is_data) { |
| 1883 | ret = remove_extent_data_ref(trans, root, path, refs_to_drop); | 1889 | ret = remove_extent_data_ref(trans, root, path, refs_to_drop, |
| 1890 | last_ref); | ||
| 1884 | } else { | 1891 | } else { |
| 1892 | *last_ref = 1; | ||
| 1885 | ret = btrfs_del_item(trans, root, path); | 1893 | ret = btrfs_del_item(trans, root, path); |
| 1886 | } | 1894 | } |
| 1887 | return ret; | 1895 | return ret; |
| @@ -1945,7 +1953,8 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
| 1945 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 1953 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
| 1946 | struct btrfs_root *root, | 1954 | struct btrfs_root *root, |
| 1947 | u64 bytenr, u64 num_bytes, u64 parent, | 1955 | u64 bytenr, u64 num_bytes, u64 parent, |
| 1948 | u64 root_objectid, u64 owner, u64 offset, int for_cow) | 1956 | u64 root_objectid, u64 owner, u64 offset, |
| 1957 | int no_quota) | ||
| 1949 | { | 1958 | { |
| 1950 | int ret; | 1959 | int ret; |
| 1951 | struct btrfs_fs_info *fs_info = root->fs_info; | 1960 | struct btrfs_fs_info *fs_info = root->fs_info; |
| @@ -1957,12 +1966,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
| 1957 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, | 1966 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, |
| 1958 | num_bytes, | 1967 | num_bytes, |
| 1959 | parent, root_objectid, (int)owner, | 1968 | parent, root_objectid, (int)owner, |
| 1960 | BTRFS_ADD_DELAYED_REF, NULL, for_cow); | 1969 | BTRFS_ADD_DELAYED_REF, NULL, no_quota); |
| 1961 | } else { | 1970 | } else { |
| 1962 | ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, | 1971 | ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, |
| 1963 | num_bytes, | 1972 | num_bytes, |
| 1964 | parent, root_objectid, owner, offset, | 1973 | parent, root_objectid, owner, offset, |
| 1965 | BTRFS_ADD_DELAYED_REF, NULL, for_cow); | 1974 | BTRFS_ADD_DELAYED_REF, NULL, no_quota); |
| 1966 | } | 1975 | } |
| 1967 | return ret; | 1976 | return ret; |
| 1968 | } | 1977 | } |
| @@ -1972,31 +1981,64 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
| 1972 | u64 bytenr, u64 num_bytes, | 1981 | u64 bytenr, u64 num_bytes, |
| 1973 | u64 parent, u64 root_objectid, | 1982 | u64 parent, u64 root_objectid, |
| 1974 | u64 owner, u64 offset, int refs_to_add, | 1983 | u64 owner, u64 offset, int refs_to_add, |
| 1984 | int no_quota, | ||
| 1975 | struct btrfs_delayed_extent_op *extent_op) | 1985 | struct btrfs_delayed_extent_op *extent_op) |
| 1976 | { | 1986 | { |
| 1987 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 1977 | struct btrfs_path *path; | 1988 | struct btrfs_path *path; |
| 1978 | struct extent_buffer *leaf; | 1989 | struct extent_buffer *leaf; |
| 1979 | struct btrfs_extent_item *item; | 1990 | struct btrfs_extent_item *item; |
| 1991 | struct btrfs_key key; | ||
| 1980 | u64 refs; | 1992 | u64 refs; |
| 1981 | int ret; | 1993 | int ret; |
| 1994 | enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL; | ||
| 1982 | 1995 | ||
| 1983 | path = btrfs_alloc_path(); | 1996 | path = btrfs_alloc_path(); |
| 1984 | if (!path) | 1997 | if (!path) |
| 1985 | return -ENOMEM; | 1998 | return -ENOMEM; |
| 1986 | 1999 | ||
| 2000 | if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled) | ||
| 2001 | no_quota = 1; | ||
| 2002 | |||
| 1987 | path->reada = 1; | 2003 | path->reada = 1; |
| 1988 | path->leave_spinning = 1; | 2004 | path->leave_spinning = 1; |
| 1989 | /* this will setup the path even if it fails to insert the back ref */ | 2005 | /* this will setup the path even if it fails to insert the back ref */ |
| 1990 | ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, | 2006 | ret = insert_inline_extent_backref(trans, fs_info->extent_root, path, |
| 1991 | path, bytenr, num_bytes, parent, | 2007 | bytenr, num_bytes, parent, |
| 1992 | root_objectid, owner, offset, | 2008 | root_objectid, owner, offset, |
| 1993 | refs_to_add, extent_op); | 2009 | refs_to_add, extent_op); |
| 1994 | if (ret != -EAGAIN) | 2010 | if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota)) |
| 2011 | goto out; | ||
| 2012 | /* | ||
| 2013 | * Ok we were able to insert an inline extent and it appears to be a new | ||
| 2014 | * reference, deal with the qgroup accounting. | ||
| 2015 | */ | ||
| 2016 | if (!ret && !no_quota) { | ||
| 2017 | ASSERT(root->fs_info->quota_enabled); | ||
| 2018 | leaf = path->nodes[0]; | ||
| 2019 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 2020 | item = btrfs_item_ptr(leaf, path->slots[0], | ||
| 2021 | struct btrfs_extent_item); | ||
| 2022 | if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add) | ||
| 2023 | type = BTRFS_QGROUP_OPER_ADD_SHARED; | ||
| 2024 | btrfs_release_path(path); | ||
| 2025 | |||
| 2026 | ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, | ||
| 2027 | bytenr, num_bytes, type, 0); | ||
| 1995 | goto out; | 2028 | goto out; |
| 2029 | } | ||
| 1996 | 2030 | ||
| 2031 | /* | ||
| 2032 | * Ok we had -EAGAIN which means we didn't have space to insert and | ||
| 2033 | * inline extent ref, so just update the reference count and add a | ||
| 2034 | * normal backref. | ||
| 2035 | */ | ||
| 1997 | leaf = path->nodes[0]; | 2036 | leaf = path->nodes[0]; |
| 2037 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 1998 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | 2038 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); |
| 1999 | refs = btrfs_extent_refs(leaf, item); | 2039 | refs = btrfs_extent_refs(leaf, item); |
| 2040 | if (refs) | ||
| 2041 | type = BTRFS_QGROUP_OPER_ADD_SHARED; | ||
| 2000 | btrfs_set_extent_refs(leaf, item, refs + refs_to_add); | 2042 | btrfs_set_extent_refs(leaf, item, refs + refs_to_add); |
| 2001 | if (extent_op) | 2043 | if (extent_op) |
| 2002 | __run_delayed_extent_op(extent_op, leaf, item); | 2044 | __run_delayed_extent_op(extent_op, leaf, item); |
| @@ -2004,9 +2046,15 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
| 2004 | btrfs_mark_buffer_dirty(leaf); | 2046 | btrfs_mark_buffer_dirty(leaf); |
| 2005 | btrfs_release_path(path); | 2047 | btrfs_release_path(path); |
| 2006 | 2048 | ||
| 2049 | if (!no_quota) { | ||
| 2050 | ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, | ||
| 2051 | bytenr, num_bytes, type, 0); | ||
| 2052 | if (ret) | ||
| 2053 | goto out; | ||
| 2054 | } | ||
| 2055 | |||
| 2007 | path->reada = 1; | 2056 | path->reada = 1; |
| 2008 | path->leave_spinning = 1; | 2057 | path->leave_spinning = 1; |
| 2009 | |||
| 2010 | /* now insert the actual backref */ | 2058 | /* now insert the actual backref */ |
| 2011 | ret = insert_extent_backref(trans, root->fs_info->extent_root, | 2059 | ret = insert_extent_backref(trans, root->fs_info->extent_root, |
| 2012 | path, bytenr, parent, root_objectid, | 2060 | path, bytenr, parent, root_objectid, |
| @@ -2040,8 +2088,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
| 2040 | 2088 | ||
| 2041 | if (node->type == BTRFS_SHARED_DATA_REF_KEY) | 2089 | if (node->type == BTRFS_SHARED_DATA_REF_KEY) |
| 2042 | parent = ref->parent; | 2090 | parent = ref->parent; |
| 2043 | else | 2091 | ref_root = ref->root; |
| 2044 | ref_root = ref->root; | ||
| 2045 | 2092 | ||
| 2046 | if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { | 2093 | if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { |
| 2047 | if (extent_op) | 2094 | if (extent_op) |
| @@ -2055,13 +2102,13 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
| 2055 | node->num_bytes, parent, | 2102 | node->num_bytes, parent, |
| 2056 | ref_root, ref->objectid, | 2103 | ref_root, ref->objectid, |
| 2057 | ref->offset, node->ref_mod, | 2104 | ref->offset, node->ref_mod, |
| 2058 | extent_op); | 2105 | node->no_quota, extent_op); |
| 2059 | } else if (node->action == BTRFS_DROP_DELAYED_REF) { | 2106 | } else if (node->action == BTRFS_DROP_DELAYED_REF) { |
| 2060 | ret = __btrfs_free_extent(trans, root, node->bytenr, | 2107 | ret = __btrfs_free_extent(trans, root, node->bytenr, |
| 2061 | node->num_bytes, parent, | 2108 | node->num_bytes, parent, |
| 2062 | ref_root, ref->objectid, | 2109 | ref_root, ref->objectid, |
| 2063 | ref->offset, node->ref_mod, | 2110 | ref->offset, node->ref_mod, |
| 2064 | extent_op); | 2111 | extent_op, node->no_quota); |
| 2065 | } else { | 2112 | } else { |
| 2066 | BUG(); | 2113 | BUG(); |
| 2067 | } | 2114 | } |
| @@ -2198,8 +2245,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
| 2198 | 2245 | ||
| 2199 | if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) | 2246 | if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) |
| 2200 | parent = ref->parent; | 2247 | parent = ref->parent; |
| 2201 | else | 2248 | ref_root = ref->root; |
| 2202 | ref_root = ref->root; | ||
| 2203 | 2249 | ||
| 2204 | ins.objectid = node->bytenr; | 2250 | ins.objectid = node->bytenr; |
| 2205 | if (skinny_metadata) { | 2251 | if (skinny_metadata) { |
| @@ -2217,15 +2263,18 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
| 2217 | parent, ref_root, | 2263 | parent, ref_root, |
| 2218 | extent_op->flags_to_set, | 2264 | extent_op->flags_to_set, |
| 2219 | &extent_op->key, | 2265 | &extent_op->key, |
| 2220 | ref->level, &ins); | 2266 | ref->level, &ins, |
| 2267 | node->no_quota); | ||
| 2221 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | 2268 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { |
| 2222 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | 2269 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, |
| 2223 | node->num_bytes, parent, ref_root, | 2270 | node->num_bytes, parent, ref_root, |
| 2224 | ref->level, 0, 1, extent_op); | 2271 | ref->level, 0, 1, node->no_quota, |
| 2272 | extent_op); | ||
| 2225 | } else if (node->action == BTRFS_DROP_DELAYED_REF) { | 2273 | } else if (node->action == BTRFS_DROP_DELAYED_REF) { |
| 2226 | ret = __btrfs_free_extent(trans, root, node->bytenr, | 2274 | ret = __btrfs_free_extent(trans, root, node->bytenr, |
| 2227 | node->num_bytes, parent, ref_root, | 2275 | node->num_bytes, parent, ref_root, |
| 2228 | ref->level, 0, 1, extent_op); | 2276 | ref->level, 0, 1, extent_op, |
| 2277 | node->no_quota); | ||
| 2229 | } else { | 2278 | } else { |
| 2230 | BUG(); | 2279 | BUG(); |
| 2231 | } | 2280 | } |
| @@ -2573,42 +2622,6 @@ static u64 find_middle(struct rb_root *root) | |||
| 2573 | } | 2622 | } |
| 2574 | #endif | 2623 | #endif |
| 2575 | 2624 | ||
| 2576 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | ||
| 2577 | struct btrfs_fs_info *fs_info) | ||
| 2578 | { | ||
| 2579 | struct qgroup_update *qgroup_update; | ||
| 2580 | int ret = 0; | ||
| 2581 | |||
| 2582 | if (list_empty(&trans->qgroup_ref_list) != | ||
| 2583 | !trans->delayed_ref_elem.seq) { | ||
| 2584 | /* list without seq or seq without list */ | ||
| 2585 | btrfs_err(fs_info, | ||
| 2586 | "qgroup accounting update error, list is%s empty, seq is %#x.%x", | ||
| 2587 | list_empty(&trans->qgroup_ref_list) ? "" : " not", | ||
| 2588 | (u32)(trans->delayed_ref_elem.seq >> 32), | ||
| 2589 | (u32)trans->delayed_ref_elem.seq); | ||
| 2590 | BUG(); | ||
| 2591 | } | ||
| 2592 | |||
| 2593 | if (!trans->delayed_ref_elem.seq) | ||
| 2594 | return 0; | ||
| 2595 | |||
| 2596 | while (!list_empty(&trans->qgroup_ref_list)) { | ||
| 2597 | qgroup_update = list_first_entry(&trans->qgroup_ref_list, | ||
| 2598 | struct qgroup_update, list); | ||
| 2599 | list_del(&qgroup_update->list); | ||
| 2600 | if (!ret) | ||
| 2601 | ret = btrfs_qgroup_account_ref( | ||
| 2602 | trans, fs_info, qgroup_update->node, | ||
| 2603 | qgroup_update->extent_op); | ||
| 2604 | kfree(qgroup_update); | ||
| 2605 | } | ||
| 2606 | |||
| 2607 | btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem); | ||
| 2608 | |||
| 2609 | return ret; | ||
| 2610 | } | ||
| 2611 | |||
| 2612 | static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) | 2625 | static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) |
| 2613 | { | 2626 | { |
| 2614 | u64 num_bytes; | 2627 | u64 num_bytes; |
| @@ -2661,15 +2674,94 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, | |||
| 2661 | u64 num_entries = | 2674 | u64 num_entries = |
| 2662 | atomic_read(&trans->transaction->delayed_refs.num_entries); | 2675 | atomic_read(&trans->transaction->delayed_refs.num_entries); |
| 2663 | u64 avg_runtime; | 2676 | u64 avg_runtime; |
| 2677 | u64 val; | ||
| 2664 | 2678 | ||
| 2665 | smp_mb(); | 2679 | smp_mb(); |
| 2666 | avg_runtime = fs_info->avg_delayed_ref_runtime; | 2680 | avg_runtime = fs_info->avg_delayed_ref_runtime; |
| 2681 | val = num_entries * avg_runtime; | ||
| 2667 | if (num_entries * avg_runtime >= NSEC_PER_SEC) | 2682 | if (num_entries * avg_runtime >= NSEC_PER_SEC) |
| 2668 | return 1; | 2683 | return 1; |
| 2684 | if (val >= NSEC_PER_SEC / 2) | ||
| 2685 | return 2; | ||
| 2669 | 2686 | ||
| 2670 | return btrfs_check_space_for_delayed_refs(trans, root); | 2687 | return btrfs_check_space_for_delayed_refs(trans, root); |
| 2671 | } | 2688 | } |
| 2672 | 2689 | ||
| 2690 | struct async_delayed_refs { | ||
| 2691 | struct btrfs_root *root; | ||
| 2692 | int count; | ||
| 2693 | int error; | ||
| 2694 | int sync; | ||
| 2695 | struct completion wait; | ||
| 2696 | struct btrfs_work work; | ||
| 2697 | }; | ||
| 2698 | |||
| 2699 | static void delayed_ref_async_start(struct btrfs_work *work) | ||
| 2700 | { | ||
| 2701 | struct async_delayed_refs *async; | ||
| 2702 | struct btrfs_trans_handle *trans; | ||
| 2703 | int ret; | ||
| 2704 | |||
| 2705 | async = container_of(work, struct async_delayed_refs, work); | ||
| 2706 | |||
| 2707 | trans = btrfs_join_transaction(async->root); | ||
| 2708 | if (IS_ERR(trans)) { | ||
| 2709 | async->error = PTR_ERR(trans); | ||
| 2710 | goto done; | ||
| 2711 | } | ||
| 2712 | |||
| 2713 | /* | ||
| 2714 | * trans->sync means that when we call end_transaciton, we won't | ||
| 2715 | * wait on delayed refs | ||
| 2716 | */ | ||
| 2717 | trans->sync = true; | ||
| 2718 | ret = btrfs_run_delayed_refs(trans, async->root, async->count); | ||
| 2719 | if (ret) | ||
| 2720 | async->error = ret; | ||
| 2721 | |||
| 2722 | ret = btrfs_end_transaction(trans, async->root); | ||
| 2723 | if (ret && !async->error) | ||
| 2724 | async->error = ret; | ||
| 2725 | done: | ||
| 2726 | if (async->sync) | ||
| 2727 | complete(&async->wait); | ||
| 2728 | else | ||
| 2729 | kfree(async); | ||
| 2730 | } | ||
| 2731 | |||
| 2732 | int btrfs_async_run_delayed_refs(struct btrfs_root *root, | ||
| 2733 | unsigned long count, int wait) | ||
| 2734 | { | ||
| 2735 | struct async_delayed_refs *async; | ||
| 2736 | int ret; | ||
| 2737 | |||
| 2738 | async = kmalloc(sizeof(*async), GFP_NOFS); | ||
| 2739 | if (!async) | ||
| 2740 | return -ENOMEM; | ||
| 2741 | |||
| 2742 | async->root = root->fs_info->tree_root; | ||
| 2743 | async->count = count; | ||
| 2744 | async->error = 0; | ||
| 2745 | if (wait) | ||
| 2746 | async->sync = 1; | ||
| 2747 | else | ||
| 2748 | async->sync = 0; | ||
| 2749 | init_completion(&async->wait); | ||
| 2750 | |||
| 2751 | btrfs_init_work(&async->work, delayed_ref_async_start, | ||
| 2752 | NULL, NULL); | ||
| 2753 | |||
| 2754 | btrfs_queue_work(root->fs_info->extent_workers, &async->work); | ||
| 2755 | |||
| 2756 | if (wait) { | ||
| 2757 | wait_for_completion(&async->wait); | ||
| 2758 | ret = async->error; | ||
| 2759 | kfree(async); | ||
| 2760 | return ret; | ||
| 2761 | } | ||
| 2762 | return 0; | ||
| 2763 | } | ||
| 2764 | |||
| 2673 | /* | 2765 | /* |
| 2674 | * this starts processing the delayed reference count updates and | 2766 | * this starts processing the delayed reference count updates and |
| 2675 | * extent insertions we have queued up so far. count can be | 2767 | * extent insertions we have queued up so far. count can be |
| @@ -2697,8 +2789,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
| 2697 | if (root == root->fs_info->extent_root) | 2789 | if (root == root->fs_info->extent_root) |
| 2698 | root = root->fs_info->tree_root; | 2790 | root = root->fs_info->tree_root; |
| 2699 | 2791 | ||
| 2700 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
| 2701 | |||
| 2702 | delayed_refs = &trans->transaction->delayed_refs; | 2792 | delayed_refs = &trans->transaction->delayed_refs; |
| 2703 | if (count == 0) { | 2793 | if (count == 0) { |
| 2704 | count = atomic_read(&delayed_refs->num_entries) * 2; | 2794 | count = atomic_read(&delayed_refs->num_entries) * 2; |
| @@ -2757,6 +2847,9 @@ again: | |||
| 2757 | goto again; | 2847 | goto again; |
| 2758 | } | 2848 | } |
| 2759 | out: | 2849 | out: |
| 2850 | ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info); | ||
| 2851 | if (ret) | ||
| 2852 | return ret; | ||
| 2760 | assert_qgroups_uptodate(trans); | 2853 | assert_qgroups_uptodate(trans); |
| 2761 | return 0; | 2854 | return 0; |
| 2762 | } | 2855 | } |
| @@ -2963,7 +3056,7 @@ out: | |||
| 2963 | static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | 3056 | static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, |
| 2964 | struct btrfs_root *root, | 3057 | struct btrfs_root *root, |
| 2965 | struct extent_buffer *buf, | 3058 | struct extent_buffer *buf, |
| 2966 | int full_backref, int inc, int for_cow) | 3059 | int full_backref, int inc, int no_quota) |
| 2967 | { | 3060 | { |
| 2968 | u64 bytenr; | 3061 | u64 bytenr; |
| 2969 | u64 num_bytes; | 3062 | u64 num_bytes; |
| @@ -2978,11 +3071,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
| 2978 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, | 3071 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, |
| 2979 | u64, u64, u64, u64, u64, u64, int); | 3072 | u64, u64, u64, u64, u64, u64, int); |
| 2980 | 3073 | ||
| 3074 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 3075 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
| 3076 | return 0; | ||
| 3077 | #endif | ||
| 2981 | ref_root = btrfs_header_owner(buf); | 3078 | ref_root = btrfs_header_owner(buf); |
| 2982 | nritems = btrfs_header_nritems(buf); | 3079 | nritems = btrfs_header_nritems(buf); |
| 2983 | level = btrfs_header_level(buf); | 3080 | level = btrfs_header_level(buf); |
| 2984 | 3081 | ||
| 2985 | if (!root->ref_cows && level == 0) | 3082 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0) |
| 2986 | return 0; | 3083 | return 0; |
| 2987 | 3084 | ||
| 2988 | if (inc) | 3085 | if (inc) |
| @@ -3013,7 +3110,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
| 3013 | key.offset -= btrfs_file_extent_offset(buf, fi); | 3110 | key.offset -= btrfs_file_extent_offset(buf, fi); |
| 3014 | ret = process_func(trans, root, bytenr, num_bytes, | 3111 | ret = process_func(trans, root, bytenr, num_bytes, |
| 3015 | parent, ref_root, key.objectid, | 3112 | parent, ref_root, key.objectid, |
| 3016 | key.offset, for_cow); | 3113 | key.offset, no_quota); |
| 3017 | if (ret) | 3114 | if (ret) |
| 3018 | goto fail; | 3115 | goto fail; |
| 3019 | } else { | 3116 | } else { |
| @@ -3021,7 +3118,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
| 3021 | num_bytes = btrfs_level_size(root, level - 1); | 3118 | num_bytes = btrfs_level_size(root, level - 1); |
| 3022 | ret = process_func(trans, root, bytenr, num_bytes, | 3119 | ret = process_func(trans, root, bytenr, num_bytes, |
| 3023 | parent, ref_root, level - 1, 0, | 3120 | parent, ref_root, level - 1, 0, |
| 3024 | for_cow); | 3121 | no_quota); |
| 3025 | if (ret) | 3122 | if (ret) |
| 3026 | goto fail; | 3123 | goto fail; |
| 3027 | } | 3124 | } |
| @@ -3032,15 +3129,15 @@ fail: | |||
| 3032 | } | 3129 | } |
| 3033 | 3130 | ||
| 3034 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 3131 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 3035 | struct extent_buffer *buf, int full_backref, int for_cow) | 3132 | struct extent_buffer *buf, int full_backref, int no_quota) |
| 3036 | { | 3133 | { |
| 3037 | return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow); | 3134 | return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota); |
| 3038 | } | 3135 | } |
| 3039 | 3136 | ||
| 3040 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 3137 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 3041 | struct extent_buffer *buf, int full_backref, int for_cow) | 3138 | struct extent_buffer *buf, int full_backref, int no_quota) |
| 3042 | { | 3139 | { |
| 3043 | return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow); | 3140 | return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota); |
| 3044 | } | 3141 | } |
| 3045 | 3142 | ||
| 3046 | static int write_one_cache_group(struct btrfs_trans_handle *trans, | 3143 | static int write_one_cache_group(struct btrfs_trans_handle *trans, |
| @@ -3400,10 +3497,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
| 3400 | return ret; | 3497 | return ret; |
| 3401 | } | 3498 | } |
| 3402 | 3499 | ||
| 3403 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { | 3500 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
| 3404 | INIT_LIST_HEAD(&found->block_groups[i]); | 3501 | INIT_LIST_HEAD(&found->block_groups[i]); |
| 3405 | kobject_init(&found->block_group_kobjs[i], &btrfs_raid_ktype); | ||
| 3406 | } | ||
| 3407 | init_rwsem(&found->groups_sem); | 3502 | init_rwsem(&found->groups_sem); |
| 3408 | spin_lock_init(&found->lock); | 3503 | spin_lock_init(&found->lock); |
| 3409 | found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; | 3504 | found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; |
| @@ -3542,11 +3637,13 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
| 3542 | return extended_to_chunk(flags | tmp); | 3637 | return extended_to_chunk(flags | tmp); |
| 3543 | } | 3638 | } |
| 3544 | 3639 | ||
| 3545 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | 3640 | static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags) |
| 3546 | { | 3641 | { |
| 3547 | unsigned seq; | 3642 | unsigned seq; |
| 3643 | u64 flags; | ||
| 3548 | 3644 | ||
| 3549 | do { | 3645 | do { |
| 3646 | flags = orig_flags; | ||
| 3550 | seq = read_seqbegin(&root->fs_info->profiles_lock); | 3647 | seq = read_seqbegin(&root->fs_info->profiles_lock); |
| 3551 | 3648 | ||
| 3552 | if (flags & BTRFS_BLOCK_GROUP_DATA) | 3649 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
| @@ -4201,6 +4298,104 @@ static int flush_space(struct btrfs_root *root, | |||
| 4201 | 4298 | ||
| 4202 | return ret; | 4299 | return ret; |
| 4203 | } | 4300 | } |
| 4301 | |||
| 4302 | static inline u64 | ||
| 4303 | btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, | ||
| 4304 | struct btrfs_space_info *space_info) | ||
| 4305 | { | ||
| 4306 | u64 used; | ||
| 4307 | u64 expected; | ||
| 4308 | u64 to_reclaim; | ||
| 4309 | |||
| 4310 | to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024, | ||
| 4311 | 16 * 1024 * 1024); | ||
| 4312 | spin_lock(&space_info->lock); | ||
| 4313 | if (can_overcommit(root, space_info, to_reclaim, | ||
| 4314 | BTRFS_RESERVE_FLUSH_ALL)) { | ||
| 4315 | to_reclaim = 0; | ||
| 4316 | goto out; | ||
| 4317 | } | ||
| 4318 | |||
| 4319 | used = space_info->bytes_used + space_info->bytes_reserved + | ||
| 4320 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
| 4321 | space_info->bytes_may_use; | ||
| 4322 | if (can_overcommit(root, space_info, 1024 * 1024, | ||
| 4323 | BTRFS_RESERVE_FLUSH_ALL)) | ||
| 4324 | expected = div_factor_fine(space_info->total_bytes, 95); | ||
| 4325 | else | ||
| 4326 | expected = div_factor_fine(space_info->total_bytes, 90); | ||
| 4327 | |||
| 4328 | if (used > expected) | ||
| 4329 | to_reclaim = used - expected; | ||
| 4330 | else | ||
| 4331 | to_reclaim = 0; | ||
| 4332 | to_reclaim = min(to_reclaim, space_info->bytes_may_use + | ||
| 4333 | space_info->bytes_reserved); | ||
| 4334 | out: | ||
| 4335 | spin_unlock(&space_info->lock); | ||
| 4336 | |||
| 4337 | return to_reclaim; | ||
| 4338 | } | ||
| 4339 | |||
| 4340 | static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, | ||
| 4341 | struct btrfs_fs_info *fs_info, u64 used) | ||
| 4342 | { | ||
| 4343 | return (used >= div_factor_fine(space_info->total_bytes, 98) && | ||
| 4344 | !btrfs_fs_closing(fs_info) && | ||
| 4345 | !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); | ||
| 4346 | } | ||
| 4347 | |||
| 4348 | static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info, | ||
| 4349 | struct btrfs_fs_info *fs_info) | ||
| 4350 | { | ||
| 4351 | u64 used; | ||
| 4352 | |||
| 4353 | spin_lock(&space_info->lock); | ||
| 4354 | used = space_info->bytes_used + space_info->bytes_reserved + | ||
| 4355 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
| 4356 | space_info->bytes_may_use; | ||
| 4357 | if (need_do_async_reclaim(space_info, fs_info, used)) { | ||
| 4358 | spin_unlock(&space_info->lock); | ||
| 4359 | return 1; | ||
| 4360 | } | ||
| 4361 | spin_unlock(&space_info->lock); | ||
| 4362 | |||
| 4363 | return 0; | ||
| 4364 | } | ||
| 4365 | |||
| 4366 | static void btrfs_async_reclaim_metadata_space(struct work_struct *work) | ||
| 4367 | { | ||
| 4368 | struct btrfs_fs_info *fs_info; | ||
| 4369 | struct btrfs_space_info *space_info; | ||
| 4370 | u64 to_reclaim; | ||
| 4371 | int flush_state; | ||
| 4372 | |||
| 4373 | fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); | ||
| 4374 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
| 4375 | |||
| 4376 | to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, | ||
| 4377 | space_info); | ||
| 4378 | if (!to_reclaim) | ||
| 4379 | return; | ||
| 4380 | |||
| 4381 | flush_state = FLUSH_DELAYED_ITEMS_NR; | ||
| 4382 | do { | ||
| 4383 | flush_space(fs_info->fs_root, space_info, to_reclaim, | ||
| 4384 | to_reclaim, flush_state); | ||
| 4385 | flush_state++; | ||
| 4386 | if (!btrfs_need_do_async_reclaim(space_info, fs_info)) | ||
| 4387 | return; | ||
| 4388 | } while (flush_state <= COMMIT_TRANS); | ||
| 4389 | |||
| 4390 | if (btrfs_need_do_async_reclaim(space_info, fs_info)) | ||
| 4391 | queue_work(system_unbound_wq, work); | ||
| 4392 | } | ||
| 4393 | |||
| 4394 | void btrfs_init_async_reclaim_work(struct work_struct *work) | ||
| 4395 | { | ||
| 4396 | INIT_WORK(work, btrfs_async_reclaim_metadata_space); | ||
| 4397 | } | ||
| 4398 | |||
| 4204 | /** | 4399 | /** |
| 4205 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | 4400 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space |
| 4206 | * @root - the root we're allocating for | 4401 | * @root - the root we're allocating for |
| @@ -4308,8 +4503,13 @@ again: | |||
| 4308 | if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { | 4503 | if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { |
| 4309 | flushing = true; | 4504 | flushing = true; |
| 4310 | space_info->flush = 1; | 4505 | space_info->flush = 1; |
| 4506 | } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
| 4507 | used += orig_bytes; | ||
| 4508 | if (need_do_async_reclaim(space_info, root->fs_info, used) && | ||
| 4509 | !work_busy(&root->fs_info->async_reclaim_work)) | ||
| 4510 | queue_work(system_unbound_wq, | ||
| 4511 | &root->fs_info->async_reclaim_work); | ||
| 4311 | } | 4512 | } |
| 4312 | |||
| 4313 | spin_unlock(&space_info->lock); | 4513 | spin_unlock(&space_info->lock); |
| 4314 | 4514 | ||
| 4315 | if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) | 4515 | if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) |
| @@ -4366,7 +4566,7 @@ static struct btrfs_block_rsv *get_block_rsv( | |||
| 4366 | { | 4566 | { |
| 4367 | struct btrfs_block_rsv *block_rsv = NULL; | 4567 | struct btrfs_block_rsv *block_rsv = NULL; |
| 4368 | 4568 | ||
| 4369 | if (root->ref_cows) | 4569 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
| 4370 | block_rsv = trans->block_rsv; | 4570 | block_rsv = trans->block_rsv; |
| 4371 | 4571 | ||
| 4372 | if (root == root->fs_info->csum_root && trans->adding_csums) | 4572 | if (root == root->fs_info->csum_root && trans->adding_csums) |
| @@ -5618,7 +5818,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5618 | u64 bytenr, u64 num_bytes, u64 parent, | 5818 | u64 bytenr, u64 num_bytes, u64 parent, |
| 5619 | u64 root_objectid, u64 owner_objectid, | 5819 | u64 root_objectid, u64 owner_objectid, |
| 5620 | u64 owner_offset, int refs_to_drop, | 5820 | u64 owner_offset, int refs_to_drop, |
| 5621 | struct btrfs_delayed_extent_op *extent_op) | 5821 | struct btrfs_delayed_extent_op *extent_op, |
| 5822 | int no_quota) | ||
| 5622 | { | 5823 | { |
| 5623 | struct btrfs_key key; | 5824 | struct btrfs_key key; |
| 5624 | struct btrfs_path *path; | 5825 | struct btrfs_path *path; |
| @@ -5634,9 +5835,14 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5634 | int num_to_del = 1; | 5835 | int num_to_del = 1; |
| 5635 | u32 item_size; | 5836 | u32 item_size; |
| 5636 | u64 refs; | 5837 | u64 refs; |
| 5838 | int last_ref = 0; | ||
| 5839 | enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL; | ||
| 5637 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, | 5840 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, |
| 5638 | SKINNY_METADATA); | 5841 | SKINNY_METADATA); |
| 5639 | 5842 | ||
| 5843 | if (!info->quota_enabled || !is_fstree(root_objectid)) | ||
| 5844 | no_quota = 1; | ||
| 5845 | |||
| 5640 | path = btrfs_alloc_path(); | 5846 | path = btrfs_alloc_path(); |
| 5641 | if (!path) | 5847 | if (!path) |
| 5642 | return -ENOMEM; | 5848 | return -ENOMEM; |
| @@ -5684,7 +5890,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5684 | BUG_ON(iref); | 5890 | BUG_ON(iref); |
| 5685 | ret = remove_extent_backref(trans, extent_root, path, | 5891 | ret = remove_extent_backref(trans, extent_root, path, |
| 5686 | NULL, refs_to_drop, | 5892 | NULL, refs_to_drop, |
| 5687 | is_data); | 5893 | is_data, &last_ref); |
| 5688 | if (ret) { | 5894 | if (ret) { |
| 5689 | btrfs_abort_transaction(trans, extent_root, ret); | 5895 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5690 | goto out; | 5896 | goto out; |
| @@ -5719,6 +5925,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5719 | 5925 | ||
| 5720 | if (ret > 0 && skinny_metadata) { | 5926 | if (ret > 0 && skinny_metadata) { |
| 5721 | skinny_metadata = false; | 5927 | skinny_metadata = false; |
| 5928 | key.objectid = bytenr; | ||
| 5722 | key.type = BTRFS_EXTENT_ITEM_KEY; | 5929 | key.type = BTRFS_EXTENT_ITEM_KEY; |
| 5723 | key.offset = num_bytes; | 5930 | key.offset = num_bytes; |
| 5724 | btrfs_release_path(path); | 5931 | btrfs_release_path(path); |
| @@ -5802,7 +6009,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5802 | refs = btrfs_extent_refs(leaf, ei); | 6009 | refs = btrfs_extent_refs(leaf, ei); |
| 5803 | if (refs < refs_to_drop) { | 6010 | if (refs < refs_to_drop) { |
| 5804 | btrfs_err(info, "trying to drop %d refs but we only have %Lu " | 6011 | btrfs_err(info, "trying to drop %d refs but we only have %Lu " |
| 5805 | "for bytenr %Lu\n", refs_to_drop, refs, bytenr); | 6012 | "for bytenr %Lu", refs_to_drop, refs, bytenr); |
| 5806 | ret = -EINVAL; | 6013 | ret = -EINVAL; |
| 5807 | btrfs_abort_transaction(trans, extent_root, ret); | 6014 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5808 | goto out; | 6015 | goto out; |
| @@ -5810,6 +6017,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5810 | refs -= refs_to_drop; | 6017 | refs -= refs_to_drop; |
| 5811 | 6018 | ||
| 5812 | if (refs > 0) { | 6019 | if (refs > 0) { |
| 6020 | type = BTRFS_QGROUP_OPER_SUB_SHARED; | ||
| 5813 | if (extent_op) | 6021 | if (extent_op) |
| 5814 | __run_delayed_extent_op(extent_op, leaf, ei); | 6022 | __run_delayed_extent_op(extent_op, leaf, ei); |
| 5815 | /* | 6023 | /* |
| @@ -5825,7 +6033,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5825 | if (found_extent) { | 6033 | if (found_extent) { |
| 5826 | ret = remove_extent_backref(trans, extent_root, path, | 6034 | ret = remove_extent_backref(trans, extent_root, path, |
| 5827 | iref, refs_to_drop, | 6035 | iref, refs_to_drop, |
| 5828 | is_data); | 6036 | is_data, &last_ref); |
| 5829 | if (ret) { | 6037 | if (ret) { |
| 5830 | btrfs_abort_transaction(trans, extent_root, ret); | 6038 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5831 | goto out; | 6039 | goto out; |
| @@ -5846,6 +6054,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5846 | } | 6054 | } |
| 5847 | } | 6055 | } |
| 5848 | 6056 | ||
| 6057 | last_ref = 1; | ||
| 5849 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 6058 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
| 5850 | num_to_del); | 6059 | num_to_del); |
| 5851 | if (ret) { | 6060 | if (ret) { |
| @@ -5868,6 +6077,20 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5868 | goto out; | 6077 | goto out; |
| 5869 | } | 6078 | } |
| 5870 | } | 6079 | } |
| 6080 | btrfs_release_path(path); | ||
| 6081 | |||
| 6082 | /* Deal with the quota accounting */ | ||
| 6083 | if (!ret && last_ref && !no_quota) { | ||
| 6084 | int mod_seq = 0; | ||
| 6085 | |||
| 6086 | if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID && | ||
| 6087 | type == BTRFS_QGROUP_OPER_SUB_SHARED) | ||
| 6088 | mod_seq = 1; | ||
| 6089 | |||
| 6090 | ret = btrfs_qgroup_record_ref(trans, info, root_objectid, | ||
| 6091 | bytenr, num_bytes, type, | ||
| 6092 | mod_seq); | ||
| 6093 | } | ||
| 5871 | out: | 6094 | out: |
| 5872 | btrfs_free_path(path); | 6095 | btrfs_free_path(path); |
| 5873 | return ret; | 6096 | return ret; |
| @@ -6004,11 +6227,15 @@ out: | |||
| 6004 | /* Can return -ENOMEM */ | 6227 | /* Can return -ENOMEM */ |
| 6005 | int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 6228 | int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 6006 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, | 6229 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, |
| 6007 | u64 owner, u64 offset, int for_cow) | 6230 | u64 owner, u64 offset, int no_quota) |
| 6008 | { | 6231 | { |
| 6009 | int ret; | 6232 | int ret; |
| 6010 | struct btrfs_fs_info *fs_info = root->fs_info; | 6233 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 6011 | 6234 | ||
| 6235 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 6236 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
| 6237 | return 0; | ||
| 6238 | #endif | ||
| 6012 | add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); | 6239 | add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); |
| 6013 | 6240 | ||
| 6014 | /* | 6241 | /* |
| @@ -6024,13 +6251,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 6024 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, | 6251 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, |
| 6025 | num_bytes, | 6252 | num_bytes, |
| 6026 | parent, root_objectid, (int)owner, | 6253 | parent, root_objectid, (int)owner, |
| 6027 | BTRFS_DROP_DELAYED_REF, NULL, for_cow); | 6254 | BTRFS_DROP_DELAYED_REF, NULL, no_quota); |
| 6028 | } else { | 6255 | } else { |
| 6029 | ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, | 6256 | ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, |
| 6030 | num_bytes, | 6257 | num_bytes, |
| 6031 | parent, root_objectid, owner, | 6258 | parent, root_objectid, owner, |
| 6032 | offset, BTRFS_DROP_DELAYED_REF, | 6259 | offset, BTRFS_DROP_DELAYED_REF, |
| 6033 | NULL, for_cow); | 6260 | NULL, no_quota); |
| 6034 | } | 6261 | } |
| 6035 | return ret; | 6262 | return ret; |
| 6036 | } | 6263 | } |
| @@ -6510,8 +6737,14 @@ loop: | |||
| 6510 | loop++; | 6737 | loop++; |
| 6511 | if (loop == LOOP_ALLOC_CHUNK) { | 6738 | if (loop == LOOP_ALLOC_CHUNK) { |
| 6512 | struct btrfs_trans_handle *trans; | 6739 | struct btrfs_trans_handle *trans; |
| 6740 | int exist = 0; | ||
| 6741 | |||
| 6742 | trans = current->journal_info; | ||
| 6743 | if (trans) | ||
| 6744 | exist = 1; | ||
| 6745 | else | ||
| 6746 | trans = btrfs_join_transaction(root); | ||
| 6513 | 6747 | ||
| 6514 | trans = btrfs_join_transaction(root); | ||
| 6515 | if (IS_ERR(trans)) { | 6748 | if (IS_ERR(trans)) { |
| 6516 | ret = PTR_ERR(trans); | 6749 | ret = PTR_ERR(trans); |
| 6517 | goto out; | 6750 | goto out; |
| @@ -6528,7 +6761,8 @@ loop: | |||
| 6528 | root, ret); | 6761 | root, ret); |
| 6529 | else | 6762 | else |
| 6530 | ret = 0; | 6763 | ret = 0; |
| 6531 | btrfs_end_transaction(trans, root); | 6764 | if (!exist) |
| 6765 | btrfs_end_transaction(trans, root); | ||
| 6532 | if (ret) | 6766 | if (ret) |
| 6533 | goto out; | 6767 | goto out; |
| 6534 | } | 6768 | } |
| @@ -6729,6 +6963,13 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 6729 | btrfs_mark_buffer_dirty(path->nodes[0]); | 6963 | btrfs_mark_buffer_dirty(path->nodes[0]); |
| 6730 | btrfs_free_path(path); | 6964 | btrfs_free_path(path); |
| 6731 | 6965 | ||
| 6966 | /* Always set parent to 0 here since its exclusive anyway. */ | ||
| 6967 | ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, | ||
| 6968 | ins->objectid, ins->offset, | ||
| 6969 | BTRFS_QGROUP_OPER_ADD_EXCL, 0); | ||
| 6970 | if (ret) | ||
| 6971 | return ret; | ||
| 6972 | |||
| 6732 | ret = update_block_group(root, ins->objectid, ins->offset, 1); | 6973 | ret = update_block_group(root, ins->objectid, ins->offset, 1); |
| 6733 | if (ret) { /* -ENOENT, logic error */ | 6974 | if (ret) { /* -ENOENT, logic error */ |
| 6734 | btrfs_err(fs_info, "update block group failed for %llu %llu", | 6975 | btrfs_err(fs_info, "update block group failed for %llu %llu", |
| @@ -6743,7 +6984,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
| 6743 | struct btrfs_root *root, | 6984 | struct btrfs_root *root, |
| 6744 | u64 parent, u64 root_objectid, | 6985 | u64 parent, u64 root_objectid, |
| 6745 | u64 flags, struct btrfs_disk_key *key, | 6986 | u64 flags, struct btrfs_disk_key *key, |
| 6746 | int level, struct btrfs_key *ins) | 6987 | int level, struct btrfs_key *ins, |
| 6988 | int no_quota) | ||
| 6747 | { | 6989 | { |
| 6748 | int ret; | 6990 | int ret; |
| 6749 | struct btrfs_fs_info *fs_info = root->fs_info; | 6991 | struct btrfs_fs_info *fs_info = root->fs_info; |
| @@ -6753,6 +6995,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
| 6753 | struct btrfs_path *path; | 6995 | struct btrfs_path *path; |
| 6754 | struct extent_buffer *leaf; | 6996 | struct extent_buffer *leaf; |
| 6755 | u32 size = sizeof(*extent_item) + sizeof(*iref); | 6997 | u32 size = sizeof(*extent_item) + sizeof(*iref); |
| 6998 | u64 num_bytes = ins->offset; | ||
| 6756 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, | 6999 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, |
| 6757 | SKINNY_METADATA); | 7000 | SKINNY_METADATA); |
| 6758 | 7001 | ||
| @@ -6786,6 +7029,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
| 6786 | 7029 | ||
| 6787 | if (skinny_metadata) { | 7030 | if (skinny_metadata) { |
| 6788 | iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); | 7031 | iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); |
| 7032 | num_bytes = root->leafsize; | ||
| 6789 | } else { | 7033 | } else { |
| 6790 | block_info = (struct btrfs_tree_block_info *)(extent_item + 1); | 7034 | block_info = (struct btrfs_tree_block_info *)(extent_item + 1); |
| 6791 | btrfs_set_tree_block_key(leaf, block_info, key); | 7035 | btrfs_set_tree_block_key(leaf, block_info, key); |
| @@ -6807,6 +7051,14 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
| 6807 | btrfs_mark_buffer_dirty(leaf); | 7051 | btrfs_mark_buffer_dirty(leaf); |
| 6808 | btrfs_free_path(path); | 7052 | btrfs_free_path(path); |
| 6809 | 7053 | ||
| 7054 | if (!no_quota) { | ||
| 7055 | ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, | ||
| 7056 | ins->objectid, num_bytes, | ||
| 7057 | BTRFS_QGROUP_OPER_ADD_EXCL, 0); | ||
| 7058 | if (ret) | ||
| 7059 | return ret; | ||
| 7060 | } | ||
| 7061 | |||
| 6810 | ret = update_block_group(root, ins->objectid, root->leafsize, 1); | 7062 | ret = update_block_group(root, ins->objectid, root->leafsize, 1); |
| 6811 | if (ret) { /* -ENOENT, logic error */ | 7063 | if (ret) { /* -ENOENT, logic error */ |
| 6812 | btrfs_err(fs_info, "update block group failed for %llu %llu", | 7064 | btrfs_err(fs_info, "update block group failed for %llu %llu", |
| @@ -6990,6 +7242,15 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
| 6990 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, | 7242 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, |
| 6991 | SKINNY_METADATA); | 7243 | SKINNY_METADATA); |
| 6992 | 7244 | ||
| 7245 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 7246 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) { | ||
| 7247 | buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, | ||
| 7248 | blocksize, level); | ||
| 7249 | if (!IS_ERR(buf)) | ||
| 7250 | root->alloc_bytenr += blocksize; | ||
| 7251 | return buf; | ||
| 7252 | } | ||
| 7253 | #endif | ||
| 6993 | block_rsv = use_block_rsv(trans, root, blocksize); | 7254 | block_rsv = use_block_rsv(trans, root, blocksize); |
| 6994 | if (IS_ERR(block_rsv)) | 7255 | if (IS_ERR(block_rsv)) |
| 6995 | return ERR_CAST(block_rsv); | 7256 | return ERR_CAST(block_rsv); |
| @@ -7731,7 +7992,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
| 7731 | } | 7992 | } |
| 7732 | } | 7993 | } |
| 7733 | 7994 | ||
| 7734 | if (root->in_radix) { | 7995 | if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) { |
| 7735 | btrfs_drop_and_free_fs_root(tree_root->fs_info, root); | 7996 | btrfs_drop_and_free_fs_root(tree_root->fs_info, root); |
| 7736 | } else { | 7997 | } else { |
| 7737 | free_extent_buffer(root->node); | 7998 | free_extent_buffer(root->node); |
| @@ -8323,8 +8584,9 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
| 8323 | list_del(&space_info->list); | 8584 | list_del(&space_info->list); |
| 8324 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { | 8585 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { |
| 8325 | struct kobject *kobj; | 8586 | struct kobject *kobj; |
| 8326 | kobj = &space_info->block_group_kobjs[i]; | 8587 | kobj = space_info->block_group_kobjs[i]; |
| 8327 | if (kobj->parent) { | 8588 | space_info->block_group_kobjs[i] = NULL; |
| 8589 | if (kobj) { | ||
| 8328 | kobject_del(kobj); | 8590 | kobject_del(kobj); |
| 8329 | kobject_put(kobj); | 8591 | kobject_put(kobj); |
| 8330 | } | 8592 | } |
| @@ -8348,17 +8610,26 @@ static void __link_block_group(struct btrfs_space_info *space_info, | |||
| 8348 | up_write(&space_info->groups_sem); | 8610 | up_write(&space_info->groups_sem); |
| 8349 | 8611 | ||
| 8350 | if (first) { | 8612 | if (first) { |
| 8351 | struct kobject *kobj = &space_info->block_group_kobjs[index]; | 8613 | struct raid_kobject *rkobj; |
| 8352 | int ret; | 8614 | int ret; |
| 8353 | 8615 | ||
| 8354 | kobject_get(&space_info->kobj); /* put in release */ | 8616 | rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS); |
| 8355 | ret = kobject_add(kobj, &space_info->kobj, "%s", | 8617 | if (!rkobj) |
| 8356 | get_raid_name(index)); | 8618 | goto out_err; |
| 8619 | rkobj->raid_type = index; | ||
| 8620 | kobject_init(&rkobj->kobj, &btrfs_raid_ktype); | ||
| 8621 | ret = kobject_add(&rkobj->kobj, &space_info->kobj, | ||
| 8622 | "%s", get_raid_name(index)); | ||
| 8357 | if (ret) { | 8623 | if (ret) { |
| 8358 | pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n"); | 8624 | kobject_put(&rkobj->kobj); |
| 8359 | kobject_put(&space_info->kobj); | 8625 | goto out_err; |
| 8360 | } | 8626 | } |
| 8627 | space_info->block_group_kobjs[index] = &rkobj->kobj; | ||
| 8361 | } | 8628 | } |
| 8629 | |||
| 8630 | return; | ||
| 8631 | out_err: | ||
| 8632 | pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n"); | ||
| 8362 | } | 8633 | } |
| 8363 | 8634 | ||
| 8364 | static struct btrfs_block_group_cache * | 8635 | static struct btrfs_block_group_cache * |
| @@ -8607,7 +8878,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 8607 | 8878 | ||
| 8608 | extent_root = root->fs_info->extent_root; | 8879 | extent_root = root->fs_info->extent_root; |
| 8609 | 8880 | ||
| 8610 | root->fs_info->last_trans_log_full_commit = trans->transid; | 8881 | btrfs_set_log_full_commit(root->fs_info, trans); |
| 8611 | 8882 | ||
| 8612 | cache = btrfs_create_block_group_cache(root, chunk_offset, size); | 8883 | cache = btrfs_create_block_group_cache(root, chunk_offset, size); |
| 8613 | if (!cache) | 8884 | if (!cache) |
| @@ -8693,6 +8964,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
| 8693 | struct btrfs_root *tree_root = root->fs_info->tree_root; | 8964 | struct btrfs_root *tree_root = root->fs_info->tree_root; |
| 8694 | struct btrfs_key key; | 8965 | struct btrfs_key key; |
| 8695 | struct inode *inode; | 8966 | struct inode *inode; |
| 8967 | struct kobject *kobj = NULL; | ||
| 8696 | int ret; | 8968 | int ret; |
| 8697 | int index; | 8969 | int index; |
| 8698 | int factor; | 8970 | int factor; |
| @@ -8792,11 +9064,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
| 8792 | */ | 9064 | */ |
| 8793 | list_del_init(&block_group->list); | 9065 | list_del_init(&block_group->list); |
| 8794 | if (list_empty(&block_group->space_info->block_groups[index])) { | 9066 | if (list_empty(&block_group->space_info->block_groups[index])) { |
| 8795 | kobject_del(&block_group->space_info->block_group_kobjs[index]); | 9067 | kobj = block_group->space_info->block_group_kobjs[index]; |
| 8796 | kobject_put(&block_group->space_info->block_group_kobjs[index]); | 9068 | block_group->space_info->block_group_kobjs[index] = NULL; |
| 8797 | clear_avail_alloc_bits(root->fs_info, block_group->flags); | 9069 | clear_avail_alloc_bits(root->fs_info, block_group->flags); |
| 8798 | } | 9070 | } |
| 8799 | up_write(&block_group->space_info->groups_sem); | 9071 | up_write(&block_group->space_info->groups_sem); |
| 9072 | if (kobj) { | ||
| 9073 | kobject_del(kobj); | ||
| 9074 | kobject_put(kobj); | ||
| 9075 | } | ||
| 8800 | 9076 | ||
| 8801 | if (block_group->cached == BTRFS_CACHE_STARTED) | 9077 | if (block_group->cached == BTRFS_CACHE_STARTED) |
| 8802 | wait_block_group_cache_done(block_group); | 9078 | wait_block_group_cache_done(block_group); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3955e475ceec..a389820d158b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -1693,6 +1693,7 @@ again: | |||
| 1693 | * shortening the size of the delalloc range we're searching | 1693 | * shortening the size of the delalloc range we're searching |
| 1694 | */ | 1694 | */ |
| 1695 | free_extent_state(cached_state); | 1695 | free_extent_state(cached_state); |
| 1696 | cached_state = NULL; | ||
| 1696 | if (!loops) { | 1697 | if (!loops) { |
| 1697 | max_bytes = PAGE_CACHE_SIZE; | 1698 | max_bytes = PAGE_CACHE_SIZE; |
| 1698 | loops = 1; | 1699 | loops = 1; |
| @@ -2353,7 +2354,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) | |||
| 2353 | { | 2354 | { |
| 2354 | int uptodate = (err == 0); | 2355 | int uptodate = (err == 0); |
| 2355 | struct extent_io_tree *tree; | 2356 | struct extent_io_tree *tree; |
| 2356 | int ret; | 2357 | int ret = 0; |
| 2357 | 2358 | ||
| 2358 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2359 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
| 2359 | 2360 | ||
| @@ -2367,6 +2368,8 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) | |||
| 2367 | if (!uptodate) { | 2368 | if (!uptodate) { |
| 2368 | ClearPageUptodate(page); | 2369 | ClearPageUptodate(page); |
| 2369 | SetPageError(page); | 2370 | SetPageError(page); |
| 2371 | ret = ret < 0 ? ret : -EIO; | ||
| 2372 | mapping_set_error(page->mapping, ret); | ||
| 2370 | } | 2373 | } |
| 2371 | return 0; | 2374 | return 0; |
| 2372 | } | 2375 | } |
| @@ -3098,143 +3101,130 @@ static noinline void update_nr_written(struct page *page, | |||
| 3098 | } | 3101 | } |
| 3099 | 3102 | ||
| 3100 | /* | 3103 | /* |
| 3101 | * the writepage semantics are similar to regular writepage. extent | 3104 | * helper for __extent_writepage, doing all of the delayed allocation setup. |
| 3102 | * records are inserted to lock ranges in the tree, and as dirty areas | 3105 | * |
| 3103 | * are found, they are marked writeback. Then the lock bits are removed | 3106 | * This returns 1 if our fill_delalloc function did all the work required |
| 3104 | * and the end_io handler clears the writeback ranges | 3107 | * to write the page (copy into inline extent). In this case the IO has |
| 3108 | * been started and the page is already unlocked. | ||
| 3109 | * | ||
| 3110 | * This returns 0 if all went well (page still locked) | ||
| 3111 | * This returns < 0 if there were errors (page still locked) | ||
| 3105 | */ | 3112 | */ |
| 3106 | static int __extent_writepage(struct page *page, struct writeback_control *wbc, | 3113 | static noinline_for_stack int writepage_delalloc(struct inode *inode, |
| 3107 | void *data) | 3114 | struct page *page, struct writeback_control *wbc, |
| 3115 | struct extent_page_data *epd, | ||
| 3116 | u64 delalloc_start, | ||
| 3117 | unsigned long *nr_written) | ||
| 3118 | { | ||
| 3119 | struct extent_io_tree *tree = epd->tree; | ||
| 3120 | u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1; | ||
| 3121 | u64 nr_delalloc; | ||
| 3122 | u64 delalloc_to_write = 0; | ||
| 3123 | u64 delalloc_end = 0; | ||
| 3124 | int ret; | ||
| 3125 | int page_started = 0; | ||
| 3126 | |||
| 3127 | if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc) | ||
| 3128 | return 0; | ||
| 3129 | |||
| 3130 | while (delalloc_end < page_end) { | ||
| 3131 | nr_delalloc = find_lock_delalloc_range(inode, tree, | ||
| 3132 | page, | ||
| 3133 | &delalloc_start, | ||
| 3134 | &delalloc_end, | ||
| 3135 | 128 * 1024 * 1024); | ||
| 3136 | if (nr_delalloc == 0) { | ||
| 3137 | delalloc_start = delalloc_end + 1; | ||
| 3138 | continue; | ||
| 3139 | } | ||
| 3140 | ret = tree->ops->fill_delalloc(inode, page, | ||
| 3141 | delalloc_start, | ||
| 3142 | delalloc_end, | ||
| 3143 | &page_started, | ||
| 3144 | nr_written); | ||
| 3145 | /* File system has been set read-only */ | ||
| 3146 | if (ret) { | ||
| 3147 | SetPageError(page); | ||
| 3148 | /* fill_delalloc should be return < 0 for error | ||
| 3149 | * but just in case, we use > 0 here meaning the | ||
| 3150 | * IO is started, so we don't want to return > 0 | ||
| 3151 | * unless things are going well. | ||
| 3152 | */ | ||
| 3153 | ret = ret < 0 ? ret : -EIO; | ||
| 3154 | goto done; | ||
| 3155 | } | ||
| 3156 | /* | ||
| 3157 | * delalloc_end is already one less than the total | ||
| 3158 | * length, so we don't subtract one from | ||
| 3159 | * PAGE_CACHE_SIZE | ||
| 3160 | */ | ||
| 3161 | delalloc_to_write += (delalloc_end - delalloc_start + | ||
| 3162 | PAGE_CACHE_SIZE) >> | ||
| 3163 | PAGE_CACHE_SHIFT; | ||
| 3164 | delalloc_start = delalloc_end + 1; | ||
| 3165 | } | ||
| 3166 | if (wbc->nr_to_write < delalloc_to_write) { | ||
| 3167 | int thresh = 8192; | ||
| 3168 | |||
| 3169 | if (delalloc_to_write < thresh * 2) | ||
| 3170 | thresh = delalloc_to_write; | ||
| 3171 | wbc->nr_to_write = min_t(u64, delalloc_to_write, | ||
| 3172 | thresh); | ||
| 3173 | } | ||
| 3174 | |||
| 3175 | /* did the fill delalloc function already unlock and start | ||
| 3176 | * the IO? | ||
| 3177 | */ | ||
| 3178 | if (page_started) { | ||
| 3179 | /* | ||
| 3180 | * we've unlocked the page, so we can't update | ||
| 3181 | * the mapping's writeback index, just update | ||
| 3182 | * nr_to_write. | ||
| 3183 | */ | ||
| 3184 | wbc->nr_to_write -= *nr_written; | ||
| 3185 | return 1; | ||
| 3186 | } | ||
| 3187 | |||
| 3188 | ret = 0; | ||
| 3189 | |||
| 3190 | done: | ||
| 3191 | return ret; | ||
| 3192 | } | ||
| 3193 | |||
| 3194 | /* | ||
| 3195 | * helper for __extent_writepage. This calls the writepage start hooks, | ||
| 3196 | * and does the loop to map the page into extents and bios. | ||
| 3197 | * | ||
| 3198 | * We return 1 if the IO is started and the page is unlocked, | ||
| 3199 | * 0 if all went well (page still locked) | ||
| 3200 | * < 0 if there were errors (page still locked) | ||
| 3201 | */ | ||
| 3202 | static noinline_for_stack int __extent_writepage_io(struct inode *inode, | ||
| 3203 | struct page *page, | ||
| 3204 | struct writeback_control *wbc, | ||
| 3205 | struct extent_page_data *epd, | ||
| 3206 | loff_t i_size, | ||
| 3207 | unsigned long nr_written, | ||
| 3208 | int write_flags, int *nr_ret) | ||
| 3108 | { | 3209 | { |
| 3109 | struct inode *inode = page->mapping->host; | ||
| 3110 | struct extent_page_data *epd = data; | ||
| 3111 | struct extent_io_tree *tree = epd->tree; | 3210 | struct extent_io_tree *tree = epd->tree; |
| 3112 | u64 start = page_offset(page); | 3211 | u64 start = page_offset(page); |
| 3113 | u64 delalloc_start; | ||
| 3114 | u64 page_end = start + PAGE_CACHE_SIZE - 1; | 3212 | u64 page_end = start + PAGE_CACHE_SIZE - 1; |
| 3115 | u64 end; | 3213 | u64 end; |
| 3116 | u64 cur = start; | 3214 | u64 cur = start; |
| 3117 | u64 extent_offset; | 3215 | u64 extent_offset; |
| 3118 | u64 last_byte = i_size_read(inode); | ||
| 3119 | u64 block_start; | 3216 | u64 block_start; |
| 3120 | u64 iosize; | 3217 | u64 iosize; |
| 3121 | sector_t sector; | 3218 | sector_t sector; |
| 3122 | struct extent_state *cached_state = NULL; | 3219 | struct extent_state *cached_state = NULL; |
| 3123 | struct extent_map *em; | 3220 | struct extent_map *em; |
| 3124 | struct block_device *bdev; | 3221 | struct block_device *bdev; |
| 3125 | int ret; | ||
| 3126 | int nr = 0; | ||
| 3127 | size_t pg_offset = 0; | 3222 | size_t pg_offset = 0; |
| 3128 | size_t blocksize; | 3223 | size_t blocksize; |
| 3129 | loff_t i_size = i_size_read(inode); | 3224 | int ret = 0; |
| 3130 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; | 3225 | int nr = 0; |
| 3131 | u64 nr_delalloc; | 3226 | bool compressed; |
| 3132 | u64 delalloc_end; | ||
| 3133 | int page_started; | ||
| 3134 | int compressed; | ||
| 3135 | int write_flags; | ||
| 3136 | unsigned long nr_written = 0; | ||
| 3137 | bool fill_delalloc = true; | ||
| 3138 | |||
| 3139 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
| 3140 | write_flags = WRITE_SYNC; | ||
| 3141 | else | ||
| 3142 | write_flags = WRITE; | ||
| 3143 | |||
| 3144 | trace___extent_writepage(page, inode, wbc); | ||
| 3145 | |||
| 3146 | WARN_ON(!PageLocked(page)); | ||
| 3147 | |||
| 3148 | ClearPageError(page); | ||
| 3149 | |||
| 3150 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | ||
| 3151 | if (page->index > end_index || | ||
| 3152 | (page->index == end_index && !pg_offset)) { | ||
| 3153 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); | ||
| 3154 | unlock_page(page); | ||
| 3155 | return 0; | ||
| 3156 | } | ||
| 3157 | |||
| 3158 | if (page->index == end_index) { | ||
| 3159 | char *userpage; | ||
| 3160 | |||
| 3161 | userpage = kmap_atomic(page); | ||
| 3162 | memset(userpage + pg_offset, 0, | ||
| 3163 | PAGE_CACHE_SIZE - pg_offset); | ||
| 3164 | kunmap_atomic(userpage); | ||
| 3165 | flush_dcache_page(page); | ||
| 3166 | } | ||
| 3167 | pg_offset = 0; | ||
| 3168 | |||
| 3169 | set_page_extent_mapped(page); | ||
| 3170 | |||
| 3171 | if (!tree->ops || !tree->ops->fill_delalloc) | ||
| 3172 | fill_delalloc = false; | ||
| 3173 | |||
| 3174 | delalloc_start = start; | ||
| 3175 | delalloc_end = 0; | ||
| 3176 | page_started = 0; | ||
| 3177 | if (!epd->extent_locked && fill_delalloc) { | ||
| 3178 | u64 delalloc_to_write = 0; | ||
| 3179 | /* | ||
| 3180 | * make sure the wbc mapping index is at least updated | ||
| 3181 | * to this page. | ||
| 3182 | */ | ||
| 3183 | update_nr_written(page, wbc, 0); | ||
| 3184 | |||
| 3185 | while (delalloc_end < page_end) { | ||
| 3186 | nr_delalloc = find_lock_delalloc_range(inode, tree, | ||
| 3187 | page, | ||
| 3188 | &delalloc_start, | ||
| 3189 | &delalloc_end, | ||
| 3190 | 128 * 1024 * 1024); | ||
| 3191 | if (nr_delalloc == 0) { | ||
| 3192 | delalloc_start = delalloc_end + 1; | ||
| 3193 | continue; | ||
| 3194 | } | ||
| 3195 | ret = tree->ops->fill_delalloc(inode, page, | ||
| 3196 | delalloc_start, | ||
| 3197 | delalloc_end, | ||
| 3198 | &page_started, | ||
| 3199 | &nr_written); | ||
| 3200 | /* File system has been set read-only */ | ||
| 3201 | if (ret) { | ||
| 3202 | SetPageError(page); | ||
| 3203 | goto done; | ||
| 3204 | } | ||
| 3205 | /* | ||
| 3206 | * delalloc_end is already one less than the total | ||
| 3207 | * length, so we don't subtract one from | ||
| 3208 | * PAGE_CACHE_SIZE | ||
| 3209 | */ | ||
| 3210 | delalloc_to_write += (delalloc_end - delalloc_start + | ||
| 3211 | PAGE_CACHE_SIZE) >> | ||
| 3212 | PAGE_CACHE_SHIFT; | ||
| 3213 | delalloc_start = delalloc_end + 1; | ||
| 3214 | } | ||
| 3215 | if (wbc->nr_to_write < delalloc_to_write) { | ||
| 3216 | int thresh = 8192; | ||
| 3217 | |||
| 3218 | if (delalloc_to_write < thresh * 2) | ||
| 3219 | thresh = delalloc_to_write; | ||
| 3220 | wbc->nr_to_write = min_t(u64, delalloc_to_write, | ||
| 3221 | thresh); | ||
| 3222 | } | ||
| 3223 | 3227 | ||
| 3224 | /* did the fill delalloc function already unlock and start | ||
| 3225 | * the IO? | ||
| 3226 | */ | ||
| 3227 | if (page_started) { | ||
| 3228 | ret = 0; | ||
| 3229 | /* | ||
| 3230 | * we've unlocked the page, so we can't update | ||
| 3231 | * the mapping's writeback index, just update | ||
| 3232 | * nr_to_write. | ||
| 3233 | */ | ||
| 3234 | wbc->nr_to_write -= nr_written; | ||
| 3235 | goto done_unlocked; | ||
| 3236 | } | ||
| 3237 | } | ||
| 3238 | if (tree->ops && tree->ops->writepage_start_hook) { | 3228 | if (tree->ops && tree->ops->writepage_start_hook) { |
| 3239 | ret = tree->ops->writepage_start_hook(page, start, | 3229 | ret = tree->ops->writepage_start_hook(page, start, |
| 3240 | page_end); | 3230 | page_end); |
| @@ -3244,9 +3234,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 3244 | wbc->pages_skipped++; | 3234 | wbc->pages_skipped++; |
| 3245 | else | 3235 | else |
| 3246 | redirty_page_for_writepage(wbc, page); | 3236 | redirty_page_for_writepage(wbc, page); |
| 3237 | |||
| 3247 | update_nr_written(page, wbc, nr_written); | 3238 | update_nr_written(page, wbc, nr_written); |
| 3248 | unlock_page(page); | 3239 | unlock_page(page); |
| 3249 | ret = 0; | 3240 | ret = 1; |
| 3250 | goto done_unlocked; | 3241 | goto done_unlocked; |
| 3251 | } | 3242 | } |
| 3252 | } | 3243 | } |
| @@ -3258,7 +3249,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 3258 | update_nr_written(page, wbc, nr_written + 1); | 3249 | update_nr_written(page, wbc, nr_written + 1); |
| 3259 | 3250 | ||
| 3260 | end = page_end; | 3251 | end = page_end; |
| 3261 | if (last_byte <= start) { | 3252 | if (i_size <= start) { |
| 3262 | if (tree->ops && tree->ops->writepage_end_io_hook) | 3253 | if (tree->ops && tree->ops->writepage_end_io_hook) |
| 3263 | tree->ops->writepage_end_io_hook(page, start, | 3254 | tree->ops->writepage_end_io_hook(page, start, |
| 3264 | page_end, NULL, 1); | 3255 | page_end, NULL, 1); |
| @@ -3268,7 +3259,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 3268 | blocksize = inode->i_sb->s_blocksize; | 3259 | blocksize = inode->i_sb->s_blocksize; |
| 3269 | 3260 | ||
| 3270 | while (cur <= end) { | 3261 | while (cur <= end) { |
| 3271 | if (cur >= last_byte) { | 3262 | u64 em_end; |
| 3263 | if (cur >= i_size) { | ||
| 3272 | if (tree->ops && tree->ops->writepage_end_io_hook) | 3264 | if (tree->ops && tree->ops->writepage_end_io_hook) |
| 3273 | tree->ops->writepage_end_io_hook(page, cur, | 3265 | tree->ops->writepage_end_io_hook(page, cur, |
| 3274 | page_end, NULL, 1); | 3266 | page_end, NULL, 1); |
| @@ -3278,13 +3270,15 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 3278 | end - cur + 1, 1); | 3270 | end - cur + 1, 1); |
| 3279 | if (IS_ERR_OR_NULL(em)) { | 3271 | if (IS_ERR_OR_NULL(em)) { |
| 3280 | SetPageError(page); | 3272 | SetPageError(page); |
| 3273 | ret = PTR_ERR_OR_ZERO(em); | ||
| 3281 | break; | 3274 | break; |
| 3282 | } | 3275 | } |
| 3283 | 3276 | ||
| 3284 | extent_offset = cur - em->start; | 3277 | extent_offset = cur - em->start; |
| 3285 | BUG_ON(extent_map_end(em) <= cur); | 3278 | em_end = extent_map_end(em); |
| 3279 | BUG_ON(em_end <= cur); | ||
| 3286 | BUG_ON(end < cur); | 3280 | BUG_ON(end < cur); |
| 3287 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 3281 | iosize = min(em_end - cur, end - cur + 1); |
| 3288 | iosize = ALIGN(iosize, blocksize); | 3282 | iosize = ALIGN(iosize, blocksize); |
| 3289 | sector = (em->block_start + extent_offset) >> 9; | 3283 | sector = (em->block_start + extent_offset) >> 9; |
| 3290 | bdev = em->bdev; | 3284 | bdev = em->bdev; |
| @@ -3320,13 +3314,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 3320 | pg_offset += iosize; | 3314 | pg_offset += iosize; |
| 3321 | continue; | 3315 | continue; |
| 3322 | } | 3316 | } |
| 3323 | /* leave this out until we have a page_mkwrite call */ | ||
| 3324 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, | ||
| 3325 | EXTENT_DIRTY, 0, NULL)) { | ||
| 3326 | cur = cur + iosize; | ||
| 3327 | pg_offset += iosize; | ||
| 3328 | continue; | ||
| 3329 | } | ||
| 3330 | 3317 | ||
| 3331 | if (tree->ops && tree->ops->writepage_io_hook) { | 3318 | if (tree->ops && tree->ops->writepage_io_hook) { |
| 3332 | ret = tree->ops->writepage_io_hook(page, cur, | 3319 | ret = tree->ops->writepage_io_hook(page, cur, |
| @@ -3337,7 +3324,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 3337 | if (ret) { | 3324 | if (ret) { |
| 3338 | SetPageError(page); | 3325 | SetPageError(page); |
| 3339 | } else { | 3326 | } else { |
| 3340 | unsigned long max_nr = end_index + 1; | 3327 | unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1; |
| 3341 | 3328 | ||
| 3342 | set_range_writeback(tree, cur, cur + iosize - 1); | 3329 | set_range_writeback(tree, cur, cur + iosize - 1); |
| 3343 | if (!PageWriteback(page)) { | 3330 | if (!PageWriteback(page)) { |
| @@ -3359,17 +3346,94 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 3359 | nr++; | 3346 | nr++; |
| 3360 | } | 3347 | } |
| 3361 | done: | 3348 | done: |
| 3349 | *nr_ret = nr; | ||
| 3350 | |||
| 3351 | done_unlocked: | ||
| 3352 | |||
| 3353 | /* drop our reference on any cached states */ | ||
| 3354 | free_extent_state(cached_state); | ||
| 3355 | return ret; | ||
| 3356 | } | ||
| 3357 | |||
| 3358 | /* | ||
| 3359 | * the writepage semantics are similar to regular writepage. extent | ||
| 3360 | * records are inserted to lock ranges in the tree, and as dirty areas | ||
| 3361 | * are found, they are marked writeback. Then the lock bits are removed | ||
| 3362 | * and the end_io handler clears the writeback ranges | ||
| 3363 | */ | ||
| 3364 | static int __extent_writepage(struct page *page, struct writeback_control *wbc, | ||
| 3365 | void *data) | ||
| 3366 | { | ||
| 3367 | struct inode *inode = page->mapping->host; | ||
| 3368 | struct extent_page_data *epd = data; | ||
| 3369 | u64 start = page_offset(page); | ||
| 3370 | u64 page_end = start + PAGE_CACHE_SIZE - 1; | ||
| 3371 | int ret; | ||
| 3372 | int nr = 0; | ||
| 3373 | size_t pg_offset = 0; | ||
| 3374 | loff_t i_size = i_size_read(inode); | ||
| 3375 | unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; | ||
| 3376 | int write_flags; | ||
| 3377 | unsigned long nr_written = 0; | ||
| 3378 | |||
| 3379 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
| 3380 | write_flags = WRITE_SYNC; | ||
| 3381 | else | ||
| 3382 | write_flags = WRITE; | ||
| 3383 | |||
| 3384 | trace___extent_writepage(page, inode, wbc); | ||
| 3385 | |||
| 3386 | WARN_ON(!PageLocked(page)); | ||
| 3387 | |||
| 3388 | ClearPageError(page); | ||
| 3389 | |||
| 3390 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | ||
| 3391 | if (page->index > end_index || | ||
| 3392 | (page->index == end_index && !pg_offset)) { | ||
| 3393 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); | ||
| 3394 | unlock_page(page); | ||
| 3395 | return 0; | ||
| 3396 | } | ||
| 3397 | |||
| 3398 | if (page->index == end_index) { | ||
| 3399 | char *userpage; | ||
| 3400 | |||
| 3401 | userpage = kmap_atomic(page); | ||
| 3402 | memset(userpage + pg_offset, 0, | ||
| 3403 | PAGE_CACHE_SIZE - pg_offset); | ||
| 3404 | kunmap_atomic(userpage); | ||
| 3405 | flush_dcache_page(page); | ||
| 3406 | } | ||
| 3407 | |||
| 3408 | pg_offset = 0; | ||
| 3409 | |||
| 3410 | set_page_extent_mapped(page); | ||
| 3411 | |||
| 3412 | ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written); | ||
| 3413 | if (ret == 1) | ||
| 3414 | goto done_unlocked; | ||
| 3415 | if (ret) | ||
| 3416 | goto done; | ||
| 3417 | |||
| 3418 | ret = __extent_writepage_io(inode, page, wbc, epd, | ||
| 3419 | i_size, nr_written, write_flags, &nr); | ||
| 3420 | if (ret == 1) | ||
| 3421 | goto done_unlocked; | ||
| 3422 | |||
| 3423 | done: | ||
| 3362 | if (nr == 0) { | 3424 | if (nr == 0) { |
| 3363 | /* make sure the mapping tag for page dirty gets cleared */ | 3425 | /* make sure the mapping tag for page dirty gets cleared */ |
| 3364 | set_page_writeback(page); | 3426 | set_page_writeback(page); |
| 3365 | end_page_writeback(page); | 3427 | end_page_writeback(page); |
| 3366 | } | 3428 | } |
| 3429 | if (PageError(page)) { | ||
| 3430 | ret = ret < 0 ? ret : -EIO; | ||
| 3431 | end_extent_writepage(page, ret, start, page_end); | ||
| 3432 | } | ||
| 3367 | unlock_page(page); | 3433 | unlock_page(page); |
| 3434 | return ret; | ||
| 3368 | 3435 | ||
| 3369 | done_unlocked: | 3436 | done_unlocked: |
| 3370 | |||
| 3371 | /* drop our reference on any cached states */ | ||
| 3372 | free_extent_state(cached_state); | ||
| 3373 | return 0; | 3437 | return 0; |
| 3374 | } | 3438 | } |
| 3375 | 3439 | ||
| @@ -3385,9 +3449,10 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb) | |||
| 3385 | TASK_UNINTERRUPTIBLE); | 3449 | TASK_UNINTERRUPTIBLE); |
| 3386 | } | 3450 | } |
| 3387 | 3451 | ||
| 3388 | static int lock_extent_buffer_for_io(struct extent_buffer *eb, | 3452 | static noinline_for_stack int |
| 3389 | struct btrfs_fs_info *fs_info, | 3453 | lock_extent_buffer_for_io(struct extent_buffer *eb, |
| 3390 | struct extent_page_data *epd) | 3454 | struct btrfs_fs_info *fs_info, |
| 3455 | struct extent_page_data *epd) | ||
| 3391 | { | 3456 | { |
| 3392 | unsigned long i, num_pages; | 3457 | unsigned long i, num_pages; |
| 3393 | int flush = 0; | 3458 | int flush = 0; |
| @@ -3458,7 +3523,7 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb, | |||
| 3458 | static void end_extent_buffer_writeback(struct extent_buffer *eb) | 3523 | static void end_extent_buffer_writeback(struct extent_buffer *eb) |
| 3459 | { | 3524 | { |
| 3460 | clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); | 3525 | clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); |
| 3461 | smp_mb__after_clear_bit(); | 3526 | smp_mb__after_atomic(); |
| 3462 | wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); | 3527 | wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); |
| 3463 | } | 3528 | } |
| 3464 | 3529 | ||
| @@ -3492,7 +3557,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err) | |||
| 3492 | bio_put(bio); | 3557 | bio_put(bio); |
| 3493 | } | 3558 | } |
| 3494 | 3559 | ||
| 3495 | static int write_one_eb(struct extent_buffer *eb, | 3560 | static noinline_for_stack int write_one_eb(struct extent_buffer *eb, |
| 3496 | struct btrfs_fs_info *fs_info, | 3561 | struct btrfs_fs_info *fs_info, |
| 3497 | struct writeback_control *wbc, | 3562 | struct writeback_control *wbc, |
| 3498 | struct extent_page_data *epd) | 3563 | struct extent_page_data *epd) |
| @@ -3690,6 +3755,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
| 3690 | struct inode *inode = mapping->host; | 3755 | struct inode *inode = mapping->host; |
| 3691 | int ret = 0; | 3756 | int ret = 0; |
| 3692 | int done = 0; | 3757 | int done = 0; |
| 3758 | int err = 0; | ||
| 3693 | int nr_to_write_done = 0; | 3759 | int nr_to_write_done = 0; |
| 3694 | struct pagevec pvec; | 3760 | struct pagevec pvec; |
| 3695 | int nr_pages; | 3761 | int nr_pages; |
| @@ -3776,8 +3842,8 @@ retry: | |||
| 3776 | unlock_page(page); | 3842 | unlock_page(page); |
| 3777 | ret = 0; | 3843 | ret = 0; |
| 3778 | } | 3844 | } |
| 3779 | if (ret) | 3845 | if (!err && ret < 0) |
| 3780 | done = 1; | 3846 | err = ret; |
| 3781 | 3847 | ||
| 3782 | /* | 3848 | /* |
| 3783 | * the filesystem may choose to bump up nr_to_write. | 3849 | * the filesystem may choose to bump up nr_to_write. |
| @@ -3789,7 +3855,7 @@ retry: | |||
| 3789 | pagevec_release(&pvec); | 3855 | pagevec_release(&pvec); |
| 3790 | cond_resched(); | 3856 | cond_resched(); |
| 3791 | } | 3857 | } |
| 3792 | if (!scanned && !done) { | 3858 | if (!scanned && !done && !err) { |
| 3793 | /* | 3859 | /* |
| 3794 | * We hit the last page and there is more work to be done: wrap | 3860 | * We hit the last page and there is more work to be done: wrap |
| 3795 | * back to the start of the file | 3861 | * back to the start of the file |
| @@ -3799,7 +3865,7 @@ retry: | |||
| 3799 | goto retry; | 3865 | goto retry; |
| 3800 | } | 3866 | } |
| 3801 | btrfs_add_delayed_iput(inode); | 3867 | btrfs_add_delayed_iput(inode); |
| 3802 | return ret; | 3868 | return err; |
| 3803 | } | 3869 | } |
| 3804 | 3870 | ||
| 3805 | static void flush_epd_write_bio(struct extent_page_data *epd) | 3871 | static void flush_epd_write_bio(struct extent_page_data *epd) |
| @@ -4510,7 +4576,8 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) | |||
| 4510 | spin_unlock(&eb->refs_lock); | 4576 | spin_unlock(&eb->refs_lock); |
| 4511 | } | 4577 | } |
| 4512 | 4578 | ||
| 4513 | static void mark_extent_buffer_accessed(struct extent_buffer *eb) | 4579 | static void mark_extent_buffer_accessed(struct extent_buffer *eb, |
| 4580 | struct page *accessed) | ||
| 4514 | { | 4581 | { |
| 4515 | unsigned long num_pages, i; | 4582 | unsigned long num_pages, i; |
| 4516 | 4583 | ||
| @@ -4519,7 +4586,8 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb) | |||
| 4519 | num_pages = num_extent_pages(eb->start, eb->len); | 4586 | num_pages = num_extent_pages(eb->start, eb->len); |
| 4520 | for (i = 0; i < num_pages; i++) { | 4587 | for (i = 0; i < num_pages; i++) { |
| 4521 | struct page *p = extent_buffer_page(eb, i); | 4588 | struct page *p = extent_buffer_page(eb, i); |
| 4522 | mark_page_accessed(p); | 4589 | if (p != accessed) |
| 4590 | mark_page_accessed(p); | ||
| 4523 | } | 4591 | } |
| 4524 | } | 4592 | } |
| 4525 | 4593 | ||
| @@ -4533,7 +4601,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, | |||
| 4533 | start >> PAGE_CACHE_SHIFT); | 4601 | start >> PAGE_CACHE_SHIFT); |
| 4534 | if (eb && atomic_inc_not_zero(&eb->refs)) { | 4602 | if (eb && atomic_inc_not_zero(&eb->refs)) { |
| 4535 | rcu_read_unlock(); | 4603 | rcu_read_unlock(); |
| 4536 | mark_extent_buffer_accessed(eb); | 4604 | mark_extent_buffer_accessed(eb, NULL); |
| 4537 | return eb; | 4605 | return eb; |
| 4538 | } | 4606 | } |
| 4539 | rcu_read_unlock(); | 4607 | rcu_read_unlock(); |
| @@ -4541,6 +4609,53 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, | |||
| 4541 | return NULL; | 4609 | return NULL; |
| 4542 | } | 4610 | } |
| 4543 | 4611 | ||
| 4612 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 4613 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, | ||
| 4614 | u64 start, unsigned long len) | ||
| 4615 | { | ||
| 4616 | struct extent_buffer *eb, *exists = NULL; | ||
| 4617 | int ret; | ||
| 4618 | |||
| 4619 | eb = find_extent_buffer(fs_info, start); | ||
| 4620 | if (eb) | ||
| 4621 | return eb; | ||
| 4622 | eb = alloc_dummy_extent_buffer(start, len); | ||
| 4623 | if (!eb) | ||
| 4624 | return NULL; | ||
| 4625 | eb->fs_info = fs_info; | ||
| 4626 | again: | ||
| 4627 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
| 4628 | if (ret) | ||
| 4629 | goto free_eb; | ||
| 4630 | spin_lock(&fs_info->buffer_lock); | ||
| 4631 | ret = radix_tree_insert(&fs_info->buffer_radix, | ||
| 4632 | start >> PAGE_CACHE_SHIFT, eb); | ||
| 4633 | spin_unlock(&fs_info->buffer_lock); | ||
| 4634 | radix_tree_preload_end(); | ||
| 4635 | if (ret == -EEXIST) { | ||
| 4636 | exists = find_extent_buffer(fs_info, start); | ||
| 4637 | if (exists) | ||
| 4638 | goto free_eb; | ||
| 4639 | else | ||
| 4640 | goto again; | ||
| 4641 | } | ||
| 4642 | check_buffer_tree_ref(eb); | ||
| 4643 | set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags); | ||
| 4644 | |||
| 4645 | /* | ||
| 4646 | * We will free dummy extent buffer's if they come into | ||
| 4647 | * free_extent_buffer with a ref count of 2, but if we are using this we | ||
| 4648 | * want the buffers to stay in memory until we're done with them, so | ||
| 4649 | * bump the ref count again. | ||
| 4650 | */ | ||
| 4651 | atomic_inc(&eb->refs); | ||
| 4652 | return eb; | ||
| 4653 | free_eb: | ||
| 4654 | btrfs_release_extent_buffer(eb); | ||
| 4655 | return exists; | ||
| 4656 | } | ||
| 4657 | #endif | ||
| 4658 | |||
| 4544 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | 4659 | struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, |
| 4545 | u64 start, unsigned long len) | 4660 | u64 start, unsigned long len) |
| 4546 | { | 4661 | { |
| @@ -4581,7 +4696,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | |||
| 4581 | spin_unlock(&mapping->private_lock); | 4696 | spin_unlock(&mapping->private_lock); |
| 4582 | unlock_page(p); | 4697 | unlock_page(p); |
| 4583 | page_cache_release(p); | 4698 | page_cache_release(p); |
| 4584 | mark_extent_buffer_accessed(exists); | 4699 | mark_extent_buffer_accessed(exists, p); |
| 4585 | goto free_eb; | 4700 | goto free_eb; |
| 4586 | } | 4701 | } |
| 4587 | 4702 | ||
| @@ -4596,7 +4711,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | |||
| 4596 | attach_extent_buffer_page(eb, p); | 4711 | attach_extent_buffer_page(eb, p); |
| 4597 | spin_unlock(&mapping->private_lock); | 4712 | spin_unlock(&mapping->private_lock); |
| 4598 | WARN_ON(PageDirty(p)); | 4713 | WARN_ON(PageDirty(p)); |
| 4599 | mark_page_accessed(p); | ||
| 4600 | eb->pages[i] = p; | 4714 | eb->pages[i] = p; |
| 4601 | if (!PageUptodate(p)) | 4715 | if (!PageUptodate(p)) |
| 4602 | uptodate = 0; | 4716 | uptodate = 0; |
| @@ -4954,6 +5068,43 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
| 4954 | } | 5068 | } |
| 4955 | } | 5069 | } |
| 4956 | 5070 | ||
| 5071 | int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, | ||
| 5072 | unsigned long start, | ||
| 5073 | unsigned long len) | ||
| 5074 | { | ||
| 5075 | size_t cur; | ||
| 5076 | size_t offset; | ||
| 5077 | struct page *page; | ||
| 5078 | char *kaddr; | ||
| 5079 | char __user *dst = (char __user *)dstv; | ||
| 5080 | size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
| 5081 | unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; | ||
| 5082 | int ret = 0; | ||
| 5083 | |||
| 5084 | WARN_ON(start > eb->len); | ||
| 5085 | WARN_ON(start + len > eb->start + eb->len); | ||
| 5086 | |||
| 5087 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); | ||
| 5088 | |||
| 5089 | while (len > 0) { | ||
| 5090 | page = extent_buffer_page(eb, i); | ||
| 5091 | |||
| 5092 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | ||
| 5093 | kaddr = page_address(page); | ||
| 5094 | if (copy_to_user(dst, kaddr + offset, cur)) { | ||
| 5095 | ret = -EFAULT; | ||
| 5096 | break; | ||
| 5097 | } | ||
| 5098 | |||
| 5099 | dst += cur; | ||
| 5100 | len -= cur; | ||
| 5101 | offset = 0; | ||
| 5102 | i++; | ||
| 5103 | } | ||
| 5104 | |||
| 5105 | return ret; | ||
| 5106 | } | ||
| 5107 | |||
| 4957 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | 5108 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, |
| 4958 | unsigned long min_len, char **map, | 5109 | unsigned long min_len, char **map, |
| 4959 | unsigned long *map_start, | 5110 | unsigned long *map_start, |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index c488b45237bf..15ce5f2a2b62 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -304,6 +304,9 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | |||
| 304 | void read_extent_buffer(struct extent_buffer *eb, void *dst, | 304 | void read_extent_buffer(struct extent_buffer *eb, void *dst, |
| 305 | unsigned long start, | 305 | unsigned long start, |
| 306 | unsigned long len); | 306 | unsigned long len); |
| 307 | int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst, | ||
| 308 | unsigned long start, | ||
| 309 | unsigned long len); | ||
| 307 | void write_extent_buffer(struct extent_buffer *eb, const void *src, | 310 | void write_extent_buffer(struct extent_buffer *eb, const void *src, |
| 308 | unsigned long start, unsigned long len); | 311 | unsigned long start, unsigned long len); |
| 309 | void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, | 312 | void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, |
| @@ -350,5 +353,7 @@ noinline u64 find_lock_delalloc_range(struct inode *inode, | |||
| 350 | struct extent_io_tree *tree, | 353 | struct extent_io_tree *tree, |
| 351 | struct page *locked_page, u64 *start, | 354 | struct page *locked_page, u64 *start, |
| 352 | u64 *end, u64 max_bytes); | 355 | u64 *end, u64 max_bytes); |
| 356 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, | ||
| 357 | u64 start, unsigned long len); | ||
| 353 | #endif | 358 | #endif |
| 354 | #endif | 359 | #endif |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 127555b29f58..f46cfe45d686 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
| @@ -281,10 +281,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
| 281 | found: | 281 | found: |
| 282 | csum += count * csum_size; | 282 | csum += count * csum_size; |
| 283 | nblocks -= count; | 283 | nblocks -= count; |
| 284 | bio_index += count; | ||
| 284 | while (count--) { | 285 | while (count--) { |
| 285 | disk_bytenr += bvec->bv_len; | 286 | disk_bytenr += bvec->bv_len; |
| 286 | offset += bvec->bv_len; | 287 | offset += bvec->bv_len; |
| 287 | bio_index++; | ||
| 288 | bvec++; | 288 | bvec++; |
| 289 | } | 289 | } |
| 290 | } | 290 | } |
| @@ -750,7 +750,7 @@ again: | |||
| 750 | int slot = path->slots[0] + 1; | 750 | int slot = path->slots[0] + 1; |
| 751 | /* we didn't find a csum item, insert one */ | 751 | /* we didn't find a csum item, insert one */ |
| 752 | nritems = btrfs_header_nritems(path->nodes[0]); | 752 | nritems = btrfs_header_nritems(path->nodes[0]); |
| 753 | if (path->slots[0] >= nritems - 1) { | 753 | if (!nritems || (path->slots[0] >= nritems - 1)) { |
| 754 | ret = btrfs_next_leaf(root, path); | 754 | ret = btrfs_next_leaf(root, path); |
| 755 | if (ret == 1) | 755 | if (ret == 1) |
| 756 | found_next = 1; | 756 | found_next = 1; |
| @@ -885,3 +885,79 @@ out: | |||
| 885 | fail_unlock: | 885 | fail_unlock: |
| 886 | goto out; | 886 | goto out; |
| 887 | } | 887 | } |
| 888 | |||
| 889 | void btrfs_extent_item_to_extent_map(struct inode *inode, | ||
| 890 | const struct btrfs_path *path, | ||
| 891 | struct btrfs_file_extent_item *fi, | ||
| 892 | const bool new_inline, | ||
| 893 | struct extent_map *em) | ||
| 894 | { | ||
| 895 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 896 | struct extent_buffer *leaf = path->nodes[0]; | ||
| 897 | const int slot = path->slots[0]; | ||
| 898 | struct btrfs_key key; | ||
| 899 | u64 extent_start, extent_end; | ||
| 900 | u64 bytenr; | ||
| 901 | u8 type = btrfs_file_extent_type(leaf, fi); | ||
| 902 | int compress_type = btrfs_file_extent_compression(leaf, fi); | ||
| 903 | |||
| 904 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 905 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
| 906 | extent_start = key.offset; | ||
| 907 | |||
| 908 | if (type == BTRFS_FILE_EXTENT_REG || | ||
| 909 | type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
| 910 | extent_end = extent_start + | ||
| 911 | btrfs_file_extent_num_bytes(leaf, fi); | ||
| 912 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { | ||
| 913 | size_t size; | ||
| 914 | size = btrfs_file_extent_inline_len(leaf, slot, fi); | ||
| 915 | extent_end = ALIGN(extent_start + size, root->sectorsize); | ||
| 916 | } | ||
| 917 | |||
| 918 | em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); | ||
| 919 | if (type == BTRFS_FILE_EXTENT_REG || | ||
| 920 | type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
| 921 | em->start = extent_start; | ||
| 922 | em->len = extent_end - extent_start; | ||
| 923 | em->orig_start = extent_start - | ||
| 924 | btrfs_file_extent_offset(leaf, fi); | ||
| 925 | em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
| 926 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
| 927 | if (bytenr == 0) { | ||
| 928 | em->block_start = EXTENT_MAP_HOLE; | ||
| 929 | return; | ||
| 930 | } | ||
| 931 | if (compress_type != BTRFS_COMPRESS_NONE) { | ||
| 932 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
| 933 | em->compress_type = compress_type; | ||
| 934 | em->block_start = bytenr; | ||
| 935 | em->block_len = em->orig_block_len; | ||
| 936 | } else { | ||
| 937 | bytenr += btrfs_file_extent_offset(leaf, fi); | ||
| 938 | em->block_start = bytenr; | ||
| 939 | em->block_len = em->len; | ||
| 940 | if (type == BTRFS_FILE_EXTENT_PREALLOC) | ||
| 941 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
| 942 | } | ||
| 943 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { | ||
| 944 | em->block_start = EXTENT_MAP_INLINE; | ||
| 945 | em->start = extent_start; | ||
| 946 | em->len = extent_end - extent_start; | ||
| 947 | /* | ||
| 948 | * Initialize orig_start and block_len with the same values | ||
| 949 | * as in inode.c:btrfs_get_extent(). | ||
| 950 | */ | ||
| 951 | em->orig_start = EXTENT_MAP_HOLE; | ||
| 952 | em->block_len = (u64)-1; | ||
| 953 | if (!new_inline && compress_type != BTRFS_COMPRESS_NONE) { | ||
| 954 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
| 955 | em->compress_type = compress_type; | ||
| 956 | } | ||
| 957 | } else { | ||
| 958 | btrfs_err(root->fs_info, | ||
| 959 | "unknown file extent item type %d, inode %llu, offset %llu, root %llu", | ||
| 960 | type, btrfs_ino(inode), extent_start, | ||
| 961 | root->root_key.objectid); | ||
| 962 | } | ||
| 963 | } | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index eb742c07e7a4..1f2b99cb55ea 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #include "tree-log.h" | 40 | #include "tree-log.h" |
| 41 | #include "locking.h" | 41 | #include "locking.h" |
| 42 | #include "volumes.h" | 42 | #include "volumes.h" |
| 43 | #include "qgroup.h" | ||
| 43 | 44 | ||
| 44 | static struct kmem_cache *btrfs_inode_defrag_cachep; | 45 | static struct kmem_cache *btrfs_inode_defrag_cachep; |
| 45 | /* | 46 | /* |
| @@ -447,7 +448,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
| 447 | write_bytes -= copied; | 448 | write_bytes -= copied; |
| 448 | total_copied += copied; | 449 | total_copied += copied; |
| 449 | 450 | ||
| 450 | /* Return to btrfs_file_aio_write to fault page */ | 451 | /* Return to btrfs_file_write_iter to fault page */ |
| 451 | if (unlikely(copied == 0)) | 452 | if (unlikely(copied == 0)) |
| 452 | break; | 453 | break; |
| 453 | 454 | ||
| @@ -470,11 +471,12 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
| 470 | for (i = 0; i < num_pages; i++) { | 471 | for (i = 0; i < num_pages; i++) { |
| 471 | /* page checked is some magic around finding pages that | 472 | /* page checked is some magic around finding pages that |
| 472 | * have been modified without going through btrfs_set_page_dirty | 473 | * have been modified without going through btrfs_set_page_dirty |
| 473 | * clear it here | 474 | * clear it here. There should be no need to mark the pages |
| 475 | * accessed as prepare_pages should have marked them accessed | ||
| 476 | * in prepare_pages via find_or_create_page() | ||
| 474 | */ | 477 | */ |
| 475 | ClearPageChecked(pages[i]); | 478 | ClearPageChecked(pages[i]); |
| 476 | unlock_page(pages[i]); | 479 | unlock_page(pages[i]); |
| 477 | mark_page_accessed(pages[i]); | ||
| 478 | page_cache_release(pages[i]); | 480 | page_cache_release(pages[i]); |
| 479 | } | 481 | } |
| 480 | } | 482 | } |
| @@ -714,7 +716,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
| 714 | int recow; | 716 | int recow; |
| 715 | int ret; | 717 | int ret; |
| 716 | int modify_tree = -1; | 718 | int modify_tree = -1; |
| 717 | int update_refs = (root->ref_cows || root == root->fs_info->tree_root); | 719 | int update_refs; |
| 718 | int found = 0; | 720 | int found = 0; |
| 719 | int leafs_visited = 0; | 721 | int leafs_visited = 0; |
| 720 | 722 | ||
| @@ -724,6 +726,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
| 724 | if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) | 726 | if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) |
| 725 | modify_tree = 0; | 727 | modify_tree = 0; |
| 726 | 728 | ||
| 729 | update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || | ||
| 730 | root == root->fs_info->tree_root); | ||
| 727 | while (1) { | 731 | while (1) { |
| 728 | recow = 0; | 732 | recow = 0; |
| 729 | ret = btrfs_lookup_file_extent(trans, root, path, ino, | 733 | ret = btrfs_lookup_file_extent(trans, root, path, ino, |
| @@ -780,6 +784,18 @@ next_slot: | |||
| 780 | extent_end = search_start; | 784 | extent_end = search_start; |
| 781 | } | 785 | } |
| 782 | 786 | ||
| 787 | /* | ||
| 788 | * Don't skip extent items representing 0 byte lengths. They | ||
| 789 | * used to be created (bug) if while punching holes we hit | ||
| 790 | * -ENOSPC condition. So if we find one here, just ensure we | ||
| 791 | * delete it, otherwise we would insert a new file extent item | ||
| 792 | * with the same key (offset) as that 0 bytes length file | ||
| 793 | * extent item in the call to setup_items_for_insert() later | ||
| 794 | * in this function. | ||
| 795 | */ | ||
| 796 | if (extent_end == key.offset && extent_end >= search_start) | ||
| 797 | goto delete_extent_item; | ||
| 798 | |||
| 783 | if (extent_end <= search_start) { | 799 | if (extent_end <= search_start) { |
| 784 | path->slots[0]++; | 800 | path->slots[0]++; |
| 785 | goto next_slot; | 801 | goto next_slot; |
| @@ -800,7 +816,7 @@ next_slot: | |||
| 800 | if (start > key.offset && end < extent_end) { | 816 | if (start > key.offset && end < extent_end) { |
| 801 | BUG_ON(del_nr > 0); | 817 | BUG_ON(del_nr > 0); |
| 802 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 818 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
| 803 | ret = -EINVAL; | 819 | ret = -EOPNOTSUPP; |
| 804 | break; | 820 | break; |
| 805 | } | 821 | } |
| 806 | 822 | ||
| @@ -835,7 +851,7 @@ next_slot: | |||
| 835 | disk_bytenr, num_bytes, 0, | 851 | disk_bytenr, num_bytes, 0, |
| 836 | root->root_key.objectid, | 852 | root->root_key.objectid, |
| 837 | new_key.objectid, | 853 | new_key.objectid, |
| 838 | start - extent_offset, 0); | 854 | start - extent_offset, 1); |
| 839 | BUG_ON(ret); /* -ENOMEM */ | 855 | BUG_ON(ret); /* -ENOMEM */ |
| 840 | } | 856 | } |
| 841 | key.offset = start; | 857 | key.offset = start; |
| @@ -846,7 +862,7 @@ next_slot: | |||
| 846 | */ | 862 | */ |
| 847 | if (start <= key.offset && end < extent_end) { | 863 | if (start <= key.offset && end < extent_end) { |
| 848 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 864 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
| 849 | ret = -EINVAL; | 865 | ret = -EOPNOTSUPP; |
| 850 | break; | 866 | break; |
| 851 | } | 867 | } |
| 852 | 868 | ||
| @@ -872,7 +888,7 @@ next_slot: | |||
| 872 | if (start > key.offset && end >= extent_end) { | 888 | if (start > key.offset && end >= extent_end) { |
| 873 | BUG_ON(del_nr > 0); | 889 | BUG_ON(del_nr > 0); |
| 874 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 890 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
| 875 | ret = -EINVAL; | 891 | ret = -EOPNOTSUPP; |
| 876 | break; | 892 | break; |
| 877 | } | 893 | } |
| 878 | 894 | ||
| @@ -893,6 +909,7 @@ next_slot: | |||
| 893 | * | ------ extent ------ | | 909 | * | ------ extent ------ | |
| 894 | */ | 910 | */ |
| 895 | if (start <= key.offset && end >= extent_end) { | 911 | if (start <= key.offset && end >= extent_end) { |
| 912 | delete_extent_item: | ||
| 896 | if (del_nr == 0) { | 913 | if (del_nr == 0) { |
| 897 | del_slot = path->slots[0]; | 914 | del_slot = path->slots[0]; |
| 898 | del_nr = 1; | 915 | del_nr = 1; |
| @@ -1191,7 +1208,7 @@ again: | |||
| 1191 | 1208 | ||
| 1192 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, | 1209 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, |
| 1193 | root->root_key.objectid, | 1210 | root->root_key.objectid, |
| 1194 | ino, orig_offset, 0); | 1211 | ino, orig_offset, 1); |
| 1195 | BUG_ON(ret); /* -ENOMEM */ | 1212 | BUG_ON(ret); /* -ENOMEM */ |
| 1196 | 1213 | ||
| 1197 | if (split == start) { | 1214 | if (split == start) { |
| @@ -1658,27 +1675,22 @@ again: | |||
| 1658 | } | 1675 | } |
| 1659 | 1676 | ||
| 1660 | static ssize_t __btrfs_direct_write(struct kiocb *iocb, | 1677 | static ssize_t __btrfs_direct_write(struct kiocb *iocb, |
| 1661 | const struct iovec *iov, | 1678 | struct iov_iter *from, |
| 1662 | unsigned long nr_segs, loff_t pos, | 1679 | loff_t pos) |
| 1663 | size_t count, size_t ocount) | ||
| 1664 | { | 1680 | { |
| 1665 | struct file *file = iocb->ki_filp; | 1681 | struct file *file = iocb->ki_filp; |
| 1666 | struct iov_iter i; | ||
| 1667 | ssize_t written; | 1682 | ssize_t written; |
| 1668 | ssize_t written_buffered; | 1683 | ssize_t written_buffered; |
| 1669 | loff_t endbyte; | 1684 | loff_t endbyte; |
| 1670 | int err; | 1685 | int err; |
| 1671 | 1686 | ||
| 1672 | written = generic_file_direct_write(iocb, iov, &nr_segs, pos, | 1687 | written = generic_file_direct_write(iocb, from, pos); |
| 1673 | count, ocount); | ||
| 1674 | 1688 | ||
| 1675 | if (written < 0 || written == count) | 1689 | if (written < 0 || !iov_iter_count(from)) |
| 1676 | return written; | 1690 | return written; |
| 1677 | 1691 | ||
| 1678 | pos += written; | 1692 | pos += written; |
| 1679 | count -= written; | 1693 | written_buffered = __btrfs_buffered_write(file, from, pos); |
| 1680 | iov_iter_init(&i, iov, nr_segs, count, written); | ||
| 1681 | written_buffered = __btrfs_buffered_write(file, &i, pos); | ||
| 1682 | if (written_buffered < 0) { | 1694 | if (written_buffered < 0) { |
| 1683 | err = written_buffered; | 1695 | err = written_buffered; |
| 1684 | goto out; | 1696 | goto out; |
| @@ -1713,9 +1725,8 @@ static void update_time_for_write(struct inode *inode) | |||
| 1713 | inode_inc_iversion(inode); | 1725 | inode_inc_iversion(inode); |
| 1714 | } | 1726 | } |
| 1715 | 1727 | ||
| 1716 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | 1728 | static ssize_t btrfs_file_write_iter(struct kiocb *iocb, |
| 1717 | const struct iovec *iov, | 1729 | struct iov_iter *from) |
| 1718 | unsigned long nr_segs, loff_t pos) | ||
| 1719 | { | 1730 | { |
| 1720 | struct file *file = iocb->ki_filp; | 1731 | struct file *file = iocb->ki_filp; |
| 1721 | struct inode *inode = file_inode(file); | 1732 | struct inode *inode = file_inode(file); |
| @@ -1724,18 +1735,12 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1724 | u64 end_pos; | 1735 | u64 end_pos; |
| 1725 | ssize_t num_written = 0; | 1736 | ssize_t num_written = 0; |
| 1726 | ssize_t err = 0; | 1737 | ssize_t err = 0; |
| 1727 | size_t count, ocount; | 1738 | size_t count = iov_iter_count(from); |
| 1728 | bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); | 1739 | bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); |
| 1740 | loff_t pos = iocb->ki_pos; | ||
| 1729 | 1741 | ||
| 1730 | mutex_lock(&inode->i_mutex); | 1742 | mutex_lock(&inode->i_mutex); |
| 1731 | 1743 | ||
| 1732 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
| 1733 | if (err) { | ||
| 1734 | mutex_unlock(&inode->i_mutex); | ||
| 1735 | goto out; | ||
| 1736 | } | ||
| 1737 | count = ocount; | ||
| 1738 | |||
| 1739 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 1744 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
| 1740 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 1745 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
| 1741 | if (err) { | 1746 | if (err) { |
| @@ -1748,6 +1753,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1748 | goto out; | 1753 | goto out; |
| 1749 | } | 1754 | } |
| 1750 | 1755 | ||
| 1756 | iov_iter_truncate(from, count); | ||
| 1757 | |||
| 1751 | err = file_remove_suid(file); | 1758 | err = file_remove_suid(file); |
| 1752 | if (err) { | 1759 | if (err) { |
| 1753 | mutex_unlock(&inode->i_mutex); | 1760 | mutex_unlock(&inode->i_mutex); |
| @@ -1777,7 +1784,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1777 | start_pos = round_down(pos, root->sectorsize); | 1784 | start_pos = round_down(pos, root->sectorsize); |
| 1778 | if (start_pos > i_size_read(inode)) { | 1785 | if (start_pos > i_size_read(inode)) { |
| 1779 | /* Expand hole size to cover write data, preventing empty gap */ | 1786 | /* Expand hole size to cover write data, preventing empty gap */ |
| 1780 | end_pos = round_up(pos + iov->iov_len, root->sectorsize); | 1787 | end_pos = round_up(pos + count, root->sectorsize); |
| 1781 | err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); | 1788 | err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); |
| 1782 | if (err) { | 1789 | if (err) { |
| 1783 | mutex_unlock(&inode->i_mutex); | 1790 | mutex_unlock(&inode->i_mutex); |
| @@ -1789,14 +1796,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1789 | atomic_inc(&BTRFS_I(inode)->sync_writers); | 1796 | atomic_inc(&BTRFS_I(inode)->sync_writers); |
| 1790 | 1797 | ||
| 1791 | if (unlikely(file->f_flags & O_DIRECT)) { | 1798 | if (unlikely(file->f_flags & O_DIRECT)) { |
| 1792 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | 1799 | num_written = __btrfs_direct_write(iocb, from, pos); |
| 1793 | pos, count, ocount); | ||
| 1794 | } else { | 1800 | } else { |
| 1795 | struct iov_iter i; | 1801 | num_written = __btrfs_buffered_write(file, from, pos); |
| 1796 | |||
| 1797 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
| 1798 | |||
| 1799 | num_written = __btrfs_buffered_write(file, &i, pos); | ||
| 1800 | if (num_written > 0) | 1802 | if (num_written > 0) |
| 1801 | iocb->ki_pos = pos + num_written; | 1803 | iocb->ki_pos = pos + num_written; |
| 1802 | } | 1804 | } |
| @@ -2009,8 +2011,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 2009 | if (!full_sync) { | 2011 | if (!full_sync) { |
| 2010 | ret = btrfs_wait_ordered_range(inode, start, | 2012 | ret = btrfs_wait_ordered_range(inode, start, |
| 2011 | end - start + 1); | 2013 | end - start + 1); |
| 2012 | if (ret) | 2014 | if (ret) { |
| 2015 | btrfs_end_transaction(trans, root); | ||
| 2013 | goto out; | 2016 | goto out; |
| 2017 | } | ||
| 2014 | } | 2018 | } |
| 2015 | ret = btrfs_commit_transaction(trans, root); | 2019 | ret = btrfs_commit_transaction(trans, root); |
| 2016 | } else { | 2020 | } else { |
| @@ -2168,6 +2172,37 @@ out: | |||
| 2168 | return 0; | 2172 | return 0; |
| 2169 | } | 2173 | } |
| 2170 | 2174 | ||
| 2175 | /* | ||
| 2176 | * Find a hole extent on given inode and change start/len to the end of hole | ||
| 2177 | * extent.(hole/vacuum extent whose em->start <= start && | ||
| 2178 | * em->start + em->len > start) | ||
| 2179 | * When a hole extent is found, return 1 and modify start/len. | ||
| 2180 | */ | ||
| 2181 | static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len) | ||
| 2182 | { | ||
| 2183 | struct extent_map *em; | ||
| 2184 | int ret = 0; | ||
| 2185 | |||
| 2186 | em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0); | ||
| 2187 | if (IS_ERR_OR_NULL(em)) { | ||
| 2188 | if (!em) | ||
| 2189 | ret = -ENOMEM; | ||
| 2190 | else | ||
| 2191 | ret = PTR_ERR(em); | ||
| 2192 | return ret; | ||
| 2193 | } | ||
| 2194 | |||
| 2195 | /* Hole or vacuum extent(only exists in no-hole mode) */ | ||
| 2196 | if (em->block_start == EXTENT_MAP_HOLE) { | ||
| 2197 | ret = 1; | ||
| 2198 | *len = em->start + em->len > *start + *len ? | ||
| 2199 | 0 : *start + *len - em->start - em->len; | ||
| 2200 | *start = em->start + em->len; | ||
| 2201 | } | ||
| 2202 | free_extent_map(em); | ||
| 2203 | return ret; | ||
| 2204 | } | ||
| 2205 | |||
| 2171 | static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | 2206 | static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) |
| 2172 | { | 2207 | { |
| 2173 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2208 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| @@ -2175,25 +2210,42 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2175 | struct btrfs_path *path; | 2210 | struct btrfs_path *path; |
| 2176 | struct btrfs_block_rsv *rsv; | 2211 | struct btrfs_block_rsv *rsv; |
| 2177 | struct btrfs_trans_handle *trans; | 2212 | struct btrfs_trans_handle *trans; |
| 2178 | u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); | 2213 | u64 lockstart; |
| 2179 | u64 lockend = round_down(offset + len, | 2214 | u64 lockend; |
| 2180 | BTRFS_I(inode)->root->sectorsize) - 1; | 2215 | u64 tail_start; |
| 2181 | u64 cur_offset = lockstart; | 2216 | u64 tail_len; |
| 2217 | u64 orig_start = offset; | ||
| 2218 | u64 cur_offset; | ||
| 2182 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 2219 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
| 2183 | u64 drop_end; | 2220 | u64 drop_end; |
| 2184 | int ret = 0; | 2221 | int ret = 0; |
| 2185 | int err = 0; | 2222 | int err = 0; |
| 2186 | int rsv_count; | 2223 | int rsv_count; |
| 2187 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == | 2224 | bool same_page; |
| 2188 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); | ||
| 2189 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); | 2225 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); |
| 2190 | u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); | 2226 | u64 ino_size; |
| 2191 | 2227 | ||
| 2192 | ret = btrfs_wait_ordered_range(inode, offset, len); | 2228 | ret = btrfs_wait_ordered_range(inode, offset, len); |
| 2193 | if (ret) | 2229 | if (ret) |
| 2194 | return ret; | 2230 | return ret; |
| 2195 | 2231 | ||
| 2196 | mutex_lock(&inode->i_mutex); | 2232 | mutex_lock(&inode->i_mutex); |
| 2233 | ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); | ||
| 2234 | ret = find_first_non_hole(inode, &offset, &len); | ||
| 2235 | if (ret < 0) | ||
| 2236 | goto out_only_mutex; | ||
| 2237 | if (ret && !len) { | ||
| 2238 | /* Already in a large hole */ | ||
| 2239 | ret = 0; | ||
| 2240 | goto out_only_mutex; | ||
| 2241 | } | ||
| 2242 | |||
| 2243 | lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize); | ||
| 2244 | lockend = round_down(offset + len, | ||
| 2245 | BTRFS_I(inode)->root->sectorsize) - 1; | ||
| 2246 | same_page = ((offset >> PAGE_CACHE_SHIFT) == | ||
| 2247 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); | ||
| 2248 | |||
| 2197 | /* | 2249 | /* |
| 2198 | * We needn't truncate any page which is beyond the end of the file | 2250 | * We needn't truncate any page which is beyond the end of the file |
| 2199 | * because we are sure there is no data there. | 2251 | * because we are sure there is no data there. |
| @@ -2205,8 +2257,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2205 | if (same_page && len < PAGE_CACHE_SIZE) { | 2257 | if (same_page && len < PAGE_CACHE_SIZE) { |
| 2206 | if (offset < ino_size) | 2258 | if (offset < ino_size) |
| 2207 | ret = btrfs_truncate_page(inode, offset, len, 0); | 2259 | ret = btrfs_truncate_page(inode, offset, len, 0); |
| 2208 | mutex_unlock(&inode->i_mutex); | 2260 | goto out_only_mutex; |
| 2209 | return ret; | ||
| 2210 | } | 2261 | } |
| 2211 | 2262 | ||
| 2212 | /* zero back part of the first page */ | 2263 | /* zero back part of the first page */ |
| @@ -2218,12 +2269,39 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2218 | } | 2269 | } |
| 2219 | } | 2270 | } |
| 2220 | 2271 | ||
| 2221 | /* zero the front end of the last page */ | 2272 | /* Check the aligned pages after the first unaligned page, |
| 2222 | if (offset + len < ino_size) { | 2273 | * if offset != orig_start, which means the first unaligned page |
| 2223 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | 2274 | * including serveral following pages are already in holes, |
| 2224 | if (ret) { | 2275 | * the extra check can be skipped */ |
| 2225 | mutex_unlock(&inode->i_mutex); | 2276 | if (offset == orig_start) { |
| 2226 | return ret; | 2277 | /* after truncate page, check hole again */ |
| 2278 | len = offset + len - lockstart; | ||
| 2279 | offset = lockstart; | ||
| 2280 | ret = find_first_non_hole(inode, &offset, &len); | ||
| 2281 | if (ret < 0) | ||
| 2282 | goto out_only_mutex; | ||
| 2283 | if (ret && !len) { | ||
| 2284 | ret = 0; | ||
| 2285 | goto out_only_mutex; | ||
| 2286 | } | ||
| 2287 | lockstart = offset; | ||
| 2288 | } | ||
| 2289 | |||
| 2290 | /* Check the tail unaligned part is in a hole */ | ||
| 2291 | tail_start = lockend + 1; | ||
| 2292 | tail_len = offset + len - tail_start; | ||
| 2293 | if (tail_len) { | ||
| 2294 | ret = find_first_non_hole(inode, &tail_start, &tail_len); | ||
| 2295 | if (unlikely(ret < 0)) | ||
| 2296 | goto out_only_mutex; | ||
| 2297 | if (!ret) { | ||
| 2298 | /* zero the front end of the last page */ | ||
| 2299 | if (tail_start + tail_len < ino_size) { | ||
| 2300 | ret = btrfs_truncate_page(inode, | ||
| 2301 | tail_start + tail_len, 0, 1); | ||
| 2302 | if (ret) | ||
| 2303 | goto out_only_mutex; | ||
| 2304 | } | ||
| 2227 | } | 2305 | } |
| 2228 | } | 2306 | } |
| 2229 | 2307 | ||
| @@ -2249,9 +2327,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2249 | if ((!ordered || | 2327 | if ((!ordered || |
| 2250 | (ordered->file_offset + ordered->len <= lockstart || | 2328 | (ordered->file_offset + ordered->len <= lockstart || |
| 2251 | ordered->file_offset > lockend)) && | 2329 | ordered->file_offset > lockend)) && |
| 2252 | !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, | 2330 | !btrfs_page_exists_in_range(inode, lockstart, lockend)) { |
| 2253 | lockend, EXTENT_UPTODATE, 0, | ||
| 2254 | cached_state)) { | ||
| 2255 | if (ordered) | 2331 | if (ordered) |
| 2256 | btrfs_put_ordered_extent(ordered); | 2332 | btrfs_put_ordered_extent(ordered); |
| 2257 | break; | 2333 | break; |
| @@ -2299,6 +2375,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2299 | BUG_ON(ret); | 2375 | BUG_ON(ret); |
| 2300 | trans->block_rsv = rsv; | 2376 | trans->block_rsv = rsv; |
| 2301 | 2377 | ||
| 2378 | cur_offset = lockstart; | ||
| 2379 | len = lockend - cur_offset; | ||
| 2302 | while (cur_offset < lockend) { | 2380 | while (cur_offset < lockend) { |
| 2303 | ret = __btrfs_drop_extents(trans, root, inode, path, | 2381 | ret = __btrfs_drop_extents(trans, root, inode, path, |
| 2304 | cur_offset, lockend + 1, | 2382 | cur_offset, lockend + 1, |
| @@ -2339,6 +2417,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2339 | rsv, min_size); | 2417 | rsv, min_size); |
| 2340 | BUG_ON(ret); /* shouldn't happen */ | 2418 | BUG_ON(ret); /* shouldn't happen */ |
| 2341 | trans->block_rsv = rsv; | 2419 | trans->block_rsv = rsv; |
| 2420 | |||
| 2421 | ret = find_first_non_hole(inode, &cur_offset, &len); | ||
| 2422 | if (unlikely(ret < 0)) | ||
| 2423 | break; | ||
| 2424 | if (ret && !len) { | ||
| 2425 | ret = 0; | ||
| 2426 | break; | ||
| 2427 | } | ||
| 2342 | } | 2428 | } |
| 2343 | 2429 | ||
| 2344 | if (ret) { | 2430 | if (ret) { |
| @@ -2347,7 +2433,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2347 | } | 2433 | } |
| 2348 | 2434 | ||
| 2349 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2435 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
| 2350 | if (cur_offset < ino_size) { | 2436 | /* |
| 2437 | * Don't insert file hole extent item if it's for a range beyond eof | ||
| 2438 | * (because it's useless) or if it represents a 0 bytes range (when | ||
| 2439 | * cur_offset == drop_end). | ||
| 2440 | */ | ||
| 2441 | if (cur_offset < ino_size && cur_offset < drop_end) { | ||
| 2351 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | 2442 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); |
| 2352 | if (ret) { | 2443 | if (ret) { |
| 2353 | err = ret; | 2444 | err = ret; |
| @@ -2372,6 +2463,7 @@ out_free: | |||
| 2372 | out: | 2463 | out: |
| 2373 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 2464 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
| 2374 | &cached_state, GFP_NOFS); | 2465 | &cached_state, GFP_NOFS); |
| 2466 | out_only_mutex: | ||
| 2375 | mutex_unlock(&inode->i_mutex); | 2467 | mutex_unlock(&inode->i_mutex); |
| 2376 | if (ret && !err) | 2468 | if (ret && !err) |
| 2377 | err = ret; | 2469 | err = ret; |
| @@ -2633,11 +2725,11 @@ out: | |||
| 2633 | 2725 | ||
| 2634 | const struct file_operations btrfs_file_operations = { | 2726 | const struct file_operations btrfs_file_operations = { |
| 2635 | .llseek = btrfs_file_llseek, | 2727 | .llseek = btrfs_file_llseek, |
| 2636 | .read = do_sync_read, | 2728 | .read = new_sync_read, |
| 2637 | .write = do_sync_write, | 2729 | .write = new_sync_write, |
| 2638 | .aio_read = generic_file_aio_read, | 2730 | .read_iter = generic_file_read_iter, |
| 2639 | .splice_read = generic_file_splice_read, | 2731 | .splice_read = generic_file_splice_read, |
| 2640 | .aio_write = btrfs_file_aio_write, | 2732 | .write_iter = btrfs_file_write_iter, |
| 2641 | .mmap = btrfs_file_mmap, | 2733 | .mmap = btrfs_file_mmap, |
| 2642 | .open = generic_file_open, | 2734 | .open = generic_file_open, |
| 2643 | .release = btrfs_release_file, | 2735 | .release = btrfs_release_file, |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 73f3de7a083c..372b05ff1943 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
| @@ -831,7 +831,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
| 831 | 831 | ||
| 832 | if (!matched) { | 832 | if (!matched) { |
| 833 | __btrfs_remove_free_space_cache(ctl); | 833 | __btrfs_remove_free_space_cache(ctl); |
| 834 | btrfs_err(fs_info, "block group %llu has wrong amount of free space", | 834 | btrfs_warn(fs_info, "block group %llu has wrong amount of free space", |
| 835 | block_group->key.objectid); | 835 | block_group->key.objectid); |
| 836 | ret = -1; | 836 | ret = -1; |
| 837 | } | 837 | } |
| @@ -843,7 +843,7 @@ out: | |||
| 843 | spin_unlock(&block_group->lock); | 843 | spin_unlock(&block_group->lock); |
| 844 | ret = 0; | 844 | ret = 0; |
| 845 | 845 | ||
| 846 | btrfs_err(fs_info, "failed to load free space cache for block group %llu", | 846 | btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuild it now", |
| 847 | block_group->key.objectid); | 847 | block_group->key.objectid); |
| 848 | } | 848 | } |
| 849 | 849 | ||
| @@ -851,90 +851,44 @@ out: | |||
| 851 | return ret; | 851 | return ret; |
| 852 | } | 852 | } |
| 853 | 853 | ||
| 854 | /** | 854 | static noinline_for_stack |
| 855 | * __btrfs_write_out_cache - write out cached info to an inode | 855 | int write_cache_extent_entries(struct io_ctl *io_ctl, |
| 856 | * @root - the root the inode belongs to | 856 | struct btrfs_free_space_ctl *ctl, |
| 857 | * @ctl - the free space cache we are going to write out | 857 | struct btrfs_block_group_cache *block_group, |
| 858 | * @block_group - the block_group for this cache if it belongs to a block_group | 858 | int *entries, int *bitmaps, |
| 859 | * @trans - the trans handle | 859 | struct list_head *bitmap_list) |
| 860 | * @path - the path to use | ||
| 861 | * @offset - the offset for the key we'll insert | ||
| 862 | * | ||
| 863 | * This function writes out a free space cache struct to disk for quick recovery | ||
| 864 | * on mount. This will return 0 if it was successfull in writing the cache out, | ||
| 865 | * and -1 if it was not. | ||
| 866 | */ | ||
| 867 | static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | ||
| 868 | struct btrfs_free_space_ctl *ctl, | ||
| 869 | struct btrfs_block_group_cache *block_group, | ||
| 870 | struct btrfs_trans_handle *trans, | ||
| 871 | struct btrfs_path *path, u64 offset) | ||
| 872 | { | 860 | { |
| 873 | struct btrfs_free_space_header *header; | ||
| 874 | struct extent_buffer *leaf; | ||
| 875 | struct rb_node *node; | ||
| 876 | struct list_head *pos, *n; | ||
| 877 | struct extent_state *cached_state = NULL; | ||
| 878 | struct btrfs_free_cluster *cluster = NULL; | ||
| 879 | struct extent_io_tree *unpin = NULL; | ||
| 880 | struct io_ctl io_ctl; | ||
| 881 | struct list_head bitmap_list; | ||
| 882 | struct btrfs_key key; | ||
| 883 | u64 start, extent_start, extent_end, len; | ||
| 884 | int entries = 0; | ||
| 885 | int bitmaps = 0; | ||
| 886 | int ret; | 861 | int ret; |
| 887 | int err = -1; | 862 | struct btrfs_free_cluster *cluster = NULL; |
| 888 | 863 | struct rb_node *node = rb_first(&ctl->free_space_offset); | |
| 889 | INIT_LIST_HEAD(&bitmap_list); | ||
| 890 | |||
| 891 | if (!i_size_read(inode)) | ||
| 892 | return -1; | ||
| 893 | |||
| 894 | ret = io_ctl_init(&io_ctl, inode, root); | ||
| 895 | if (ret) | ||
| 896 | return -1; | ||
| 897 | 864 | ||
| 898 | /* Get the cluster for this block_group if it exists */ | 865 | /* Get the cluster for this block_group if it exists */ |
| 899 | if (block_group && !list_empty(&block_group->cluster_list)) | 866 | if (block_group && !list_empty(&block_group->cluster_list)) { |
| 900 | cluster = list_entry(block_group->cluster_list.next, | 867 | cluster = list_entry(block_group->cluster_list.next, |
| 901 | struct btrfs_free_cluster, | 868 | struct btrfs_free_cluster, |
| 902 | block_group_list); | 869 | block_group_list); |
| 870 | } | ||
| 903 | 871 | ||
| 904 | /* Lock all pages first so we can lock the extent safely. */ | ||
| 905 | io_ctl_prepare_pages(&io_ctl, inode, 0); | ||
| 906 | |||
| 907 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | ||
| 908 | 0, &cached_state); | ||
| 909 | |||
| 910 | node = rb_first(&ctl->free_space_offset); | ||
| 911 | if (!node && cluster) { | 872 | if (!node && cluster) { |
| 912 | node = rb_first(&cluster->root); | 873 | node = rb_first(&cluster->root); |
| 913 | cluster = NULL; | 874 | cluster = NULL; |
| 914 | } | 875 | } |
| 915 | 876 | ||
| 916 | /* Make sure we can fit our crcs into the first page */ | ||
| 917 | if (io_ctl.check_crcs && | ||
| 918 | (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) | ||
| 919 | goto out_nospc; | ||
| 920 | |||
| 921 | io_ctl_set_generation(&io_ctl, trans->transid); | ||
| 922 | |||
| 923 | /* Write out the extent entries */ | 877 | /* Write out the extent entries */ |
| 924 | while (node) { | 878 | while (node) { |
| 925 | struct btrfs_free_space *e; | 879 | struct btrfs_free_space *e; |
| 926 | 880 | ||
| 927 | e = rb_entry(node, struct btrfs_free_space, offset_index); | 881 | e = rb_entry(node, struct btrfs_free_space, offset_index); |
| 928 | entries++; | 882 | *entries += 1; |
| 929 | 883 | ||
| 930 | ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes, | 884 | ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, |
| 931 | e->bitmap); | 885 | e->bitmap); |
| 932 | if (ret) | 886 | if (ret) |
| 933 | goto out_nospc; | 887 | goto fail; |
| 934 | 888 | ||
| 935 | if (e->bitmap) { | 889 | if (e->bitmap) { |
| 936 | list_add_tail(&e->list, &bitmap_list); | 890 | list_add_tail(&e->list, bitmap_list); |
| 937 | bitmaps++; | 891 | *bitmaps += 1; |
| 938 | } | 892 | } |
| 939 | node = rb_next(node); | 893 | node = rb_next(node); |
| 940 | if (!node && cluster) { | 894 | if (!node && cluster) { |
| @@ -942,13 +896,84 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
| 942 | cluster = NULL; | 896 | cluster = NULL; |
| 943 | } | 897 | } |
| 944 | } | 898 | } |
| 899 | return 0; | ||
| 900 | fail: | ||
| 901 | return -ENOSPC; | ||
| 902 | } | ||
| 903 | |||
| 904 | static noinline_for_stack int | ||
| 905 | update_cache_item(struct btrfs_trans_handle *trans, | ||
| 906 | struct btrfs_root *root, | ||
| 907 | struct inode *inode, | ||
| 908 | struct btrfs_path *path, u64 offset, | ||
| 909 | int entries, int bitmaps) | ||
| 910 | { | ||
| 911 | struct btrfs_key key; | ||
| 912 | struct btrfs_free_space_header *header; | ||
| 913 | struct extent_buffer *leaf; | ||
| 914 | int ret; | ||
| 915 | |||
| 916 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
| 917 | key.offset = offset; | ||
| 918 | key.type = 0; | ||
| 919 | |||
| 920 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
| 921 | if (ret < 0) { | ||
| 922 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, | ||
| 923 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, | ||
| 924 | GFP_NOFS); | ||
| 925 | goto fail; | ||
| 926 | } | ||
| 927 | leaf = path->nodes[0]; | ||
| 928 | if (ret > 0) { | ||
| 929 | struct btrfs_key found_key; | ||
| 930 | ASSERT(path->slots[0]); | ||
| 931 | path->slots[0]--; | ||
| 932 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
| 933 | if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || | ||
| 934 | found_key.offset != offset) { | ||
| 935 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, | ||
| 936 | inode->i_size - 1, | ||
| 937 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, | ||
| 938 | NULL, GFP_NOFS); | ||
| 939 | btrfs_release_path(path); | ||
| 940 | goto fail; | ||
| 941 | } | ||
| 942 | } | ||
| 943 | |||
| 944 | BTRFS_I(inode)->generation = trans->transid; | ||
| 945 | header = btrfs_item_ptr(leaf, path->slots[0], | ||
| 946 | struct btrfs_free_space_header); | ||
| 947 | btrfs_set_free_space_entries(leaf, header, entries); | ||
| 948 | btrfs_set_free_space_bitmaps(leaf, header, bitmaps); | ||
| 949 | btrfs_set_free_space_generation(leaf, header, trans->transid); | ||
| 950 | btrfs_mark_buffer_dirty(leaf); | ||
| 951 | btrfs_release_path(path); | ||
| 952 | |||
| 953 | return 0; | ||
| 954 | |||
| 955 | fail: | ||
| 956 | return -1; | ||
| 957 | } | ||
| 958 | |||
| 959 | static noinline_for_stack int | ||
| 960 | add_ioctl_entries(struct btrfs_root *root, | ||
| 961 | struct inode *inode, | ||
| 962 | struct btrfs_block_group_cache *block_group, | ||
| 963 | struct io_ctl *io_ctl, | ||
| 964 | struct extent_state **cached_state, | ||
| 965 | struct list_head *bitmap_list, | ||
| 966 | int *entries) | ||
| 967 | { | ||
| 968 | u64 start, extent_start, extent_end, len; | ||
| 969 | struct list_head *pos, *n; | ||
| 970 | struct extent_io_tree *unpin = NULL; | ||
| 971 | int ret; | ||
| 945 | 972 | ||
| 946 | /* | 973 | /* |
| 947 | * We want to add any pinned extents to our free space cache | 974 | * We want to add any pinned extents to our free space cache |
| 948 | * so we don't leak the space | 975 | * so we don't leak the space |
| 949 | */ | 976 | * |
| 950 | |||
| 951 | /* | ||
| 952 | * We shouldn't have switched the pinned extents yet so this is the | 977 | * We shouldn't have switched the pinned extents yet so this is the |
| 953 | * right one | 978 | * right one |
| 954 | */ | 979 | */ |
| @@ -977,8 +1002,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
| 977 | block_group->key.offset, extent_end + 1); | 1002 | block_group->key.offset, extent_end + 1); |
| 978 | len = extent_end - extent_start; | 1003 | len = extent_end - extent_start; |
| 979 | 1004 | ||
| 980 | entries++; | 1005 | *entries += 1; |
| 981 | ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL); | 1006 | ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); |
| 982 | if (ret) | 1007 | if (ret) |
| 983 | goto out_nospc; | 1008 | goto out_nospc; |
| 984 | 1009 | ||
| @@ -986,74 +1011,129 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
| 986 | } | 1011 | } |
| 987 | 1012 | ||
| 988 | /* Write out the bitmaps */ | 1013 | /* Write out the bitmaps */ |
| 989 | list_for_each_safe(pos, n, &bitmap_list) { | 1014 | list_for_each_safe(pos, n, bitmap_list) { |
| 990 | struct btrfs_free_space *entry = | 1015 | struct btrfs_free_space *entry = |
| 991 | list_entry(pos, struct btrfs_free_space, list); | 1016 | list_entry(pos, struct btrfs_free_space, list); |
| 992 | 1017 | ||
| 993 | ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap); | 1018 | ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); |
| 994 | if (ret) | 1019 | if (ret) |
| 995 | goto out_nospc; | 1020 | goto out_nospc; |
| 996 | list_del_init(&entry->list); | 1021 | list_del_init(&entry->list); |
| 997 | } | 1022 | } |
| 998 | 1023 | ||
| 999 | /* Zero out the rest of the pages just to make sure */ | 1024 | /* Zero out the rest of the pages just to make sure */ |
| 1000 | io_ctl_zero_remaining_pages(&io_ctl); | 1025 | io_ctl_zero_remaining_pages(io_ctl); |
| 1001 | 1026 | ||
| 1002 | ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, | 1027 | ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages, |
| 1003 | 0, i_size_read(inode), &cached_state); | 1028 | 0, i_size_read(inode), cached_state); |
| 1004 | io_ctl_drop_pages(&io_ctl); | 1029 | io_ctl_drop_pages(io_ctl); |
| 1005 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | 1030 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, |
| 1006 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | 1031 | i_size_read(inode) - 1, cached_state, GFP_NOFS); |
| 1007 | 1032 | ||
| 1008 | if (ret) | 1033 | if (ret) |
| 1009 | goto out; | 1034 | goto fail; |
| 1010 | 1035 | ||
| 1011 | ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); | 1036 | ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); |
| 1012 | if (ret) { | 1037 | if (ret) { |
| 1013 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, | 1038 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, |
| 1014 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, | 1039 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, |
| 1015 | GFP_NOFS); | 1040 | GFP_NOFS); |
| 1016 | goto out; | 1041 | goto fail; |
| 1017 | } | 1042 | } |
| 1043 | return 0; | ||
| 1018 | 1044 | ||
| 1019 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 1045 | fail: |
| 1020 | key.offset = offset; | 1046 | return -1; |
| 1021 | key.type = 0; | ||
| 1022 | 1047 | ||
| 1023 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | 1048 | out_nospc: |
| 1024 | if (ret < 0) { | 1049 | return -ENOSPC; |
| 1025 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, | 1050 | } |
| 1026 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, | 1051 | |
| 1027 | GFP_NOFS); | 1052 | static void noinline_for_stack |
| 1028 | goto out; | 1053 | cleanup_write_cache_enospc(struct inode *inode, |
| 1029 | } | 1054 | struct io_ctl *io_ctl, |
| 1030 | leaf = path->nodes[0]; | 1055 | struct extent_state **cached_state, |
| 1031 | if (ret > 0) { | 1056 | struct list_head *bitmap_list) |
| 1032 | struct btrfs_key found_key; | 1057 | { |
| 1033 | ASSERT(path->slots[0]); | 1058 | struct list_head *pos, *n; |
| 1034 | path->slots[0]--; | 1059 | list_for_each_safe(pos, n, bitmap_list) { |
| 1035 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 1060 | struct btrfs_free_space *entry = |
| 1036 | if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || | 1061 | list_entry(pos, struct btrfs_free_space, list); |
| 1037 | found_key.offset != offset) { | 1062 | list_del_init(&entry->list); |
| 1038 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, | ||
| 1039 | inode->i_size - 1, | ||
| 1040 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, | ||
| 1041 | NULL, GFP_NOFS); | ||
| 1042 | btrfs_release_path(path); | ||
| 1043 | goto out; | ||
| 1044 | } | ||
| 1045 | } | 1063 | } |
| 1064 | io_ctl_drop_pages(io_ctl); | ||
| 1065 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
| 1066 | i_size_read(inode) - 1, cached_state, | ||
| 1067 | GFP_NOFS); | ||
| 1068 | } | ||
| 1046 | 1069 | ||
| 1047 | BTRFS_I(inode)->generation = trans->transid; | 1070 | /** |
| 1048 | header = btrfs_item_ptr(leaf, path->slots[0], | 1071 | * __btrfs_write_out_cache - write out cached info to an inode |
| 1049 | struct btrfs_free_space_header); | 1072 | * @root - the root the inode belongs to |
| 1050 | btrfs_set_free_space_entries(leaf, header, entries); | 1073 | * @ctl - the free space cache we are going to write out |
| 1051 | btrfs_set_free_space_bitmaps(leaf, header, bitmaps); | 1074 | * @block_group - the block_group for this cache if it belongs to a block_group |
| 1052 | btrfs_set_free_space_generation(leaf, header, trans->transid); | 1075 | * @trans - the trans handle |
| 1053 | btrfs_mark_buffer_dirty(leaf); | 1076 | * @path - the path to use |
| 1054 | btrfs_release_path(path); | 1077 | * @offset - the offset for the key we'll insert |
| 1078 | * | ||
| 1079 | * This function writes out a free space cache struct to disk for quick recovery | ||
| 1080 | * on mount. This will return 0 if it was successfull in writing the cache out, | ||
| 1081 | * and -1 if it was not. | ||
| 1082 | */ | ||
| 1083 | static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | ||
| 1084 | struct btrfs_free_space_ctl *ctl, | ||
| 1085 | struct btrfs_block_group_cache *block_group, | ||
| 1086 | struct btrfs_trans_handle *trans, | ||
| 1087 | struct btrfs_path *path, u64 offset) | ||
| 1088 | { | ||
| 1089 | struct extent_state *cached_state = NULL; | ||
| 1090 | struct io_ctl io_ctl; | ||
| 1091 | struct list_head bitmap_list; | ||
| 1092 | int entries = 0; | ||
| 1093 | int bitmaps = 0; | ||
| 1094 | int ret; | ||
| 1095 | int err = -1; | ||
| 1096 | |||
| 1097 | INIT_LIST_HEAD(&bitmap_list); | ||
| 1098 | |||
| 1099 | if (!i_size_read(inode)) | ||
| 1100 | return -1; | ||
| 1101 | |||
| 1102 | ret = io_ctl_init(&io_ctl, inode, root); | ||
| 1103 | if (ret) | ||
| 1104 | return -1; | ||
| 1105 | |||
| 1106 | /* Lock all pages first so we can lock the extent safely. */ | ||
| 1107 | io_ctl_prepare_pages(&io_ctl, inode, 0); | ||
| 1108 | |||
| 1109 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | ||
| 1110 | 0, &cached_state); | ||
| 1111 | |||
| 1112 | |||
| 1113 | /* Make sure we can fit our crcs into the first page */ | ||
| 1114 | if (io_ctl.check_crcs && | ||
| 1115 | (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) | ||
| 1116 | goto out_nospc; | ||
| 1117 | |||
| 1118 | io_ctl_set_generation(&io_ctl, trans->transid); | ||
| 1119 | |||
| 1120 | ret = write_cache_extent_entries(&io_ctl, ctl, | ||
| 1121 | block_group, &entries, &bitmaps, | ||
| 1122 | &bitmap_list); | ||
| 1123 | if (ret) | ||
| 1124 | goto out_nospc; | ||
| 1125 | |||
| 1126 | ret = add_ioctl_entries(root, inode, block_group, &io_ctl, | ||
| 1127 | &cached_state, &bitmap_list, &entries); | ||
| 1128 | |||
| 1129 | if (ret == -ENOSPC) | ||
| 1130 | goto out_nospc; | ||
| 1131 | else if (ret) | ||
| 1132 | goto out; | ||
| 1133 | |||
| 1134 | err = update_cache_item(trans, root, inode, path, offset, | ||
| 1135 | entries, bitmaps); | ||
| 1055 | 1136 | ||
| 1056 | err = 0; | ||
| 1057 | out: | 1137 | out: |
| 1058 | io_ctl_free(&io_ctl); | 1138 | io_ctl_free(&io_ctl); |
| 1059 | if (err) { | 1139 | if (err) { |
| @@ -1064,14 +1144,8 @@ out: | |||
| 1064 | return err; | 1144 | return err; |
| 1065 | 1145 | ||
| 1066 | out_nospc: | 1146 | out_nospc: |
| 1067 | list_for_each_safe(pos, n, &bitmap_list) { | 1147 | |
| 1068 | struct btrfs_free_space *entry = | 1148 | cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); |
| 1069 | list_entry(pos, struct btrfs_free_space, list); | ||
| 1070 | list_del_init(&entry->list); | ||
| 1071 | } | ||
| 1072 | io_ctl_drop_pages(&io_ctl); | ||
| 1073 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
| 1074 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | ||
| 1075 | goto out; | 1149 | goto out; |
| 1076 | } | 1150 | } |
| 1077 | 1151 | ||
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index cc8ca193d830..888fbe19079f 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
| @@ -174,9 +174,13 @@ static void start_caching(struct btrfs_root *root) | |||
| 174 | BTRFS_LAST_FREE_OBJECTID - objectid + 1); | 174 | BTRFS_LAST_FREE_OBJECTID - objectid + 1); |
| 175 | } | 175 | } |
| 176 | 176 | ||
| 177 | tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", | 177 | tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu", |
| 178 | root->root_key.objectid); | 178 | root->root_key.objectid); |
| 179 | BUG_ON(IS_ERR(tsk)); /* -ENOMEM */ | 179 | if (IS_ERR(tsk)) { |
| 180 | btrfs_warn(root->fs_info, "failed to start inode caching task"); | ||
| 181 | btrfs_clear_and_info(root, CHANGE_INODE_CACHE, | ||
| 182 | "disabling inode map caching"); | ||
| 183 | } | ||
| 180 | } | 184 | } |
| 181 | 185 | ||
| 182 | int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) | 186 | int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) |
| @@ -205,24 +209,14 @@ again: | |||
| 205 | 209 | ||
| 206 | void btrfs_return_ino(struct btrfs_root *root, u64 objectid) | 210 | void btrfs_return_ino(struct btrfs_root *root, u64 objectid) |
| 207 | { | 211 | { |
| 208 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
| 209 | struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; | 212 | struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; |
| 210 | 213 | ||
| 211 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | 214 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) |
| 212 | return; | 215 | return; |
| 213 | |||
| 214 | again: | 216 | again: |
| 215 | if (root->cached == BTRFS_CACHE_FINISHED) { | 217 | if (root->cached == BTRFS_CACHE_FINISHED) { |
| 216 | __btrfs_add_free_space(ctl, objectid, 1); | 218 | __btrfs_add_free_space(pinned, objectid, 1); |
| 217 | } else { | 219 | } else { |
| 218 | /* | ||
| 219 | * If we are in the process of caching free ino chunks, | ||
| 220 | * to avoid adding the same inode number to the free_ino | ||
| 221 | * tree twice due to cross transaction, we'll leave it | ||
| 222 | * in the pinned tree until a transaction is committed | ||
| 223 | * or the caching work is done. | ||
| 224 | */ | ||
| 225 | |||
| 226 | down_write(&root->fs_info->commit_root_sem); | 220 | down_write(&root->fs_info->commit_root_sem); |
| 227 | spin_lock(&root->cache_lock); | 221 | spin_lock(&root->cache_lock); |
| 228 | if (root->cached == BTRFS_CACHE_FINISHED) { | 222 | if (root->cached == BTRFS_CACHE_FINISHED) { |
| @@ -234,11 +228,7 @@ again: | |||
| 234 | 228 | ||
| 235 | start_caching(root); | 229 | start_caching(root); |
| 236 | 230 | ||
| 237 | if (objectid <= root->cache_progress || | 231 | __btrfs_add_free_space(pinned, objectid, 1); |
| 238 | objectid >= root->highest_objectid) | ||
| 239 | __btrfs_add_free_space(ctl, objectid, 1); | ||
| 240 | else | ||
| 241 | __btrfs_add_free_space(pinned, objectid, 1); | ||
| 242 | 232 | ||
| 243 | up_write(&root->fs_info->commit_root_sem); | 233 | up_write(&root->fs_info->commit_root_sem); |
| 244 | } | 234 | } |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f805bc944fa..8925f66a1411 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -125,7 +125,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, | |||
| 125 | * the btree. The caller should have done a btrfs_drop_extents so that | 125 | * the btree. The caller should have done a btrfs_drop_extents so that |
| 126 | * no overlapping inline items exist in the btree | 126 | * no overlapping inline items exist in the btree |
| 127 | */ | 127 | */ |
| 128 | static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | 128 | static int insert_inline_extent(struct btrfs_trans_handle *trans, |
| 129 | struct btrfs_path *path, int extent_inserted, | 129 | struct btrfs_path *path, int extent_inserted, |
| 130 | struct btrfs_root *root, struct inode *inode, | 130 | struct btrfs_root *root, struct inode *inode, |
| 131 | u64 start, size_t size, size_t compressed_size, | 131 | u64 start, size_t size, size_t compressed_size, |
| @@ -2678,6 +2678,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
| 2678 | trans = NULL; | 2678 | trans = NULL; |
| 2679 | goto out_unlock; | 2679 | goto out_unlock; |
| 2680 | } | 2680 | } |
| 2681 | |||
| 2681 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 2682 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
| 2682 | 2683 | ||
| 2683 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 2684 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
| @@ -2947,14 +2948,15 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | |||
| 2947 | root->orphan_block_rsv = NULL; | 2948 | root->orphan_block_rsv = NULL; |
| 2948 | spin_unlock(&root->orphan_lock); | 2949 | spin_unlock(&root->orphan_lock); |
| 2949 | 2950 | ||
| 2950 | if (root->orphan_item_inserted && | 2951 | if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state) && |
| 2951 | btrfs_root_refs(&root->root_item) > 0) { | 2952 | btrfs_root_refs(&root->root_item) > 0) { |
| 2952 | ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, | 2953 | ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, |
| 2953 | root->root_key.objectid); | 2954 | root->root_key.objectid); |
| 2954 | if (ret) | 2955 | if (ret) |
| 2955 | btrfs_abort_transaction(trans, root, ret); | 2956 | btrfs_abort_transaction(trans, root, ret); |
| 2956 | else | 2957 | else |
| 2957 | root->orphan_item_inserted = 0; | 2958 | clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, |
| 2959 | &root->state); | ||
| 2958 | } | 2960 | } |
| 2959 | 2961 | ||
| 2960 | if (block_rsv) { | 2962 | if (block_rsv) { |
| @@ -3271,7 +3273,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 3271 | btrfs_block_rsv_release(root, root->orphan_block_rsv, | 3273 | btrfs_block_rsv_release(root, root->orphan_block_rsv, |
| 3272 | (u64)-1); | 3274 | (u64)-1); |
| 3273 | 3275 | ||
| 3274 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | 3276 | if (root->orphan_block_rsv || |
| 3277 | test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) { | ||
| 3275 | trans = btrfs_join_transaction(root); | 3278 | trans = btrfs_join_transaction(root); |
| 3276 | if (!IS_ERR(trans)) | 3279 | if (!IS_ERR(trans)) |
| 3277 | btrfs_end_transaction(trans, root); | 3280 | btrfs_end_transaction(trans, root); |
| @@ -3473,7 +3476,7 @@ cache_acl: | |||
| 3473 | ret = btrfs_load_inode_props(inode, path); | 3476 | ret = btrfs_load_inode_props(inode, path); |
| 3474 | if (ret) | 3477 | if (ret) |
| 3475 | btrfs_err(root->fs_info, | 3478 | btrfs_err(root->fs_info, |
| 3476 | "error loading props for ino %llu (root %llu): %d\n", | 3479 | "error loading props for ino %llu (root %llu): %d", |
| 3477 | btrfs_ino(inode), | 3480 | btrfs_ino(inode), |
| 3478 | root->root_key.objectid, ret); | 3481 | root->root_key.objectid, ret); |
| 3479 | } | 3482 | } |
| @@ -3998,7 +4001,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
| 3998 | * not block aligned since we will be keeping the last block of the | 4001 | * not block aligned since we will be keeping the last block of the |
| 3999 | * extent just the way it is. | 4002 | * extent just the way it is. |
| 4000 | */ | 4003 | */ |
| 4001 | if (root->ref_cows || root == root->fs_info->tree_root) | 4004 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || |
| 4005 | root == root->fs_info->tree_root) | ||
| 4002 | btrfs_drop_extent_cache(inode, ALIGN(new_size, | 4006 | btrfs_drop_extent_cache(inode, ALIGN(new_size, |
| 4003 | root->sectorsize), (u64)-1, 0); | 4007 | root->sectorsize), (u64)-1, 0); |
| 4004 | 4008 | ||
| @@ -4091,7 +4095,9 @@ search_again: | |||
| 4091 | extent_num_bytes); | 4095 | extent_num_bytes); |
| 4092 | num_dec = (orig_num_bytes - | 4096 | num_dec = (orig_num_bytes - |
| 4093 | extent_num_bytes); | 4097 | extent_num_bytes); |
| 4094 | if (root->ref_cows && extent_start != 0) | 4098 | if (test_bit(BTRFS_ROOT_REF_COWS, |
| 4099 | &root->state) && | ||
| 4100 | extent_start != 0) | ||
| 4095 | inode_sub_bytes(inode, num_dec); | 4101 | inode_sub_bytes(inode, num_dec); |
| 4096 | btrfs_mark_buffer_dirty(leaf); | 4102 | btrfs_mark_buffer_dirty(leaf); |
| 4097 | } else { | 4103 | } else { |
| @@ -4105,7 +4111,8 @@ search_again: | |||
| 4105 | num_dec = btrfs_file_extent_num_bytes(leaf, fi); | 4111 | num_dec = btrfs_file_extent_num_bytes(leaf, fi); |
| 4106 | if (extent_start != 0) { | 4112 | if (extent_start != 0) { |
| 4107 | found_extent = 1; | 4113 | found_extent = 1; |
| 4108 | if (root->ref_cows) | 4114 | if (test_bit(BTRFS_ROOT_REF_COWS, |
| 4115 | &root->state)) | ||
| 4109 | inode_sub_bytes(inode, num_dec); | 4116 | inode_sub_bytes(inode, num_dec); |
| 4110 | } | 4117 | } |
| 4111 | } | 4118 | } |
| @@ -4120,10 +4127,9 @@ search_again: | |||
| 4120 | btrfs_file_extent_other_encoding(leaf, fi) == 0) { | 4127 | btrfs_file_extent_other_encoding(leaf, fi) == 0) { |
| 4121 | u32 size = new_size - found_key.offset; | 4128 | u32 size = new_size - found_key.offset; |
| 4122 | 4129 | ||
| 4123 | if (root->ref_cows) { | 4130 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
| 4124 | inode_sub_bytes(inode, item_end + 1 - | 4131 | inode_sub_bytes(inode, item_end + 1 - |
| 4125 | new_size); | 4132 | new_size); |
| 4126 | } | ||
| 4127 | 4133 | ||
| 4128 | /* | 4134 | /* |
| 4129 | * update the ram bytes to properly reflect | 4135 | * update the ram bytes to properly reflect |
| @@ -4133,7 +4139,8 @@ search_again: | |||
| 4133 | size = | 4139 | size = |
| 4134 | btrfs_file_extent_calc_inline_size(size); | 4140 | btrfs_file_extent_calc_inline_size(size); |
| 4135 | btrfs_truncate_item(root, path, size, 1); | 4141 | btrfs_truncate_item(root, path, size, 1); |
| 4136 | } else if (root->ref_cows) { | 4142 | } else if (test_bit(BTRFS_ROOT_REF_COWS, |
| 4143 | &root->state)) { | ||
| 4137 | inode_sub_bytes(inode, item_end + 1 - | 4144 | inode_sub_bytes(inode, item_end + 1 - |
| 4138 | found_key.offset); | 4145 | found_key.offset); |
| 4139 | } | 4146 | } |
| @@ -4155,8 +4162,9 @@ delete: | |||
| 4155 | } else { | 4162 | } else { |
| 4156 | break; | 4163 | break; |
| 4157 | } | 4164 | } |
| 4158 | if (found_extent && (root->ref_cows || | 4165 | if (found_extent && |
| 4159 | root == root->fs_info->tree_root)) { | 4166 | (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || |
| 4167 | root == root->fs_info->tree_root)) { | ||
| 4160 | btrfs_set_path_blocking(path); | 4168 | btrfs_set_path_blocking(path); |
| 4161 | ret = btrfs_free_extent(trans, root, extent_start, | 4169 | ret = btrfs_free_extent(trans, root, extent_start, |
| 4162 | extent_num_bytes, 0, | 4170 | extent_num_bytes, 0, |
| @@ -5168,8 +5176,7 @@ static int btrfs_dentry_delete(const struct dentry *dentry) | |||
| 5168 | 5176 | ||
| 5169 | static void btrfs_dentry_release(struct dentry *dentry) | 5177 | static void btrfs_dentry_release(struct dentry *dentry) |
| 5170 | { | 5178 | { |
| 5171 | if (dentry->d_fsdata) | 5179 | kfree(dentry->d_fsdata); |
| 5172 | kfree(dentry->d_fsdata); | ||
| 5173 | } | 5180 | } |
| 5174 | 5181 | ||
| 5175 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, | 5182 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, |
| @@ -5553,6 +5560,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 5553 | struct btrfs_inode_ref *ref; | 5560 | struct btrfs_inode_ref *ref; |
| 5554 | struct btrfs_key key[2]; | 5561 | struct btrfs_key key[2]; |
| 5555 | u32 sizes[2]; | 5562 | u32 sizes[2]; |
| 5563 | int nitems = name ? 2 : 1; | ||
| 5556 | unsigned long ptr; | 5564 | unsigned long ptr; |
| 5557 | int ret; | 5565 | int ret; |
| 5558 | 5566 | ||
| @@ -5572,7 +5580,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 5572 | */ | 5580 | */ |
| 5573 | inode->i_ino = objectid; | 5581 | inode->i_ino = objectid; |
| 5574 | 5582 | ||
| 5575 | if (dir) { | 5583 | if (dir && name) { |
| 5576 | trace_btrfs_inode_request(dir); | 5584 | trace_btrfs_inode_request(dir); |
| 5577 | 5585 | ||
| 5578 | ret = btrfs_set_inode_index(dir, index); | 5586 | ret = btrfs_set_inode_index(dir, index); |
| @@ -5581,6 +5589,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 5581 | iput(inode); | 5589 | iput(inode); |
| 5582 | return ERR_PTR(ret); | 5590 | return ERR_PTR(ret); |
| 5583 | } | 5591 | } |
| 5592 | } else if (dir) { | ||
| 5593 | *index = 0; | ||
| 5584 | } | 5594 | } |
| 5585 | /* | 5595 | /* |
| 5586 | * index_cnt is ignored for everything but a dir, | 5596 | * index_cnt is ignored for everything but a dir, |
| @@ -5605,21 +5615,24 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 5605 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); | 5615 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); |
| 5606 | key[0].offset = 0; | 5616 | key[0].offset = 0; |
| 5607 | 5617 | ||
| 5608 | /* | ||
| 5609 | * Start new inodes with an inode_ref. This is slightly more | ||
| 5610 | * efficient for small numbers of hard links since they will | ||
| 5611 | * be packed into one item. Extended refs will kick in if we | ||
| 5612 | * add more hard links than can fit in the ref item. | ||
| 5613 | */ | ||
| 5614 | key[1].objectid = objectid; | ||
| 5615 | btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); | ||
| 5616 | key[1].offset = ref_objectid; | ||
| 5617 | |||
| 5618 | sizes[0] = sizeof(struct btrfs_inode_item); | 5618 | sizes[0] = sizeof(struct btrfs_inode_item); |
| 5619 | sizes[1] = name_len + sizeof(*ref); | 5619 | |
| 5620 | if (name) { | ||
| 5621 | /* | ||
| 5622 | * Start new inodes with an inode_ref. This is slightly more | ||
| 5623 | * efficient for small numbers of hard links since they will | ||
| 5624 | * be packed into one item. Extended refs will kick in if we | ||
| 5625 | * add more hard links than can fit in the ref item. | ||
| 5626 | */ | ||
| 5627 | key[1].objectid = objectid; | ||
| 5628 | btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); | ||
| 5629 | key[1].offset = ref_objectid; | ||
| 5630 | |||
| 5631 | sizes[1] = name_len + sizeof(*ref); | ||
| 5632 | } | ||
| 5620 | 5633 | ||
| 5621 | path->leave_spinning = 1; | 5634 | path->leave_spinning = 1; |
| 5622 | ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2); | 5635 | ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems); |
| 5623 | if (ret != 0) | 5636 | if (ret != 0) |
| 5624 | goto fail; | 5637 | goto fail; |
| 5625 | 5638 | ||
| @@ -5632,12 +5645,14 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 5632 | sizeof(*inode_item)); | 5645 | sizeof(*inode_item)); |
| 5633 | fill_inode_item(trans, path->nodes[0], inode_item, inode); | 5646 | fill_inode_item(trans, path->nodes[0], inode_item, inode); |
| 5634 | 5647 | ||
| 5635 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, | 5648 | if (name) { |
| 5636 | struct btrfs_inode_ref); | 5649 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, |
| 5637 | btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); | 5650 | struct btrfs_inode_ref); |
| 5638 | btrfs_set_inode_ref_index(path->nodes[0], ref, *index); | 5651 | btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); |
| 5639 | ptr = (unsigned long)(ref + 1); | 5652 | btrfs_set_inode_ref_index(path->nodes[0], ref, *index); |
| 5640 | write_extent_buffer(path->nodes[0], name, ptr, name_len); | 5653 | ptr = (unsigned long)(ref + 1); |
| 5654 | write_extent_buffer(path->nodes[0], name, ptr, name_len); | ||
| 5655 | } | ||
| 5641 | 5656 | ||
| 5642 | btrfs_mark_buffer_dirty(path->nodes[0]); | 5657 | btrfs_mark_buffer_dirty(path->nodes[0]); |
| 5643 | btrfs_free_path(path); | 5658 | btrfs_free_path(path); |
| @@ -5673,7 +5688,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 5673 | 5688 | ||
| 5674 | return inode; | 5689 | return inode; |
| 5675 | fail: | 5690 | fail: |
| 5676 | if (dir) | 5691 | if (dir && name) |
| 5677 | BTRFS_I(dir)->index_cnt--; | 5692 | BTRFS_I(dir)->index_cnt--; |
| 5678 | btrfs_free_path(path); | 5693 | btrfs_free_path(path); |
| 5679 | iput(inode); | 5694 | iput(inode); |
| @@ -5958,6 +5973,15 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 5958 | err = btrfs_update_inode(trans, root, inode); | 5973 | err = btrfs_update_inode(trans, root, inode); |
| 5959 | if (err) | 5974 | if (err) |
| 5960 | goto fail; | 5975 | goto fail; |
| 5976 | if (inode->i_nlink == 1) { | ||
| 5977 | /* | ||
| 5978 | * If new hard link count is 1, it's a file created | ||
| 5979 | * with open(2) O_TMPFILE flag. | ||
| 5980 | */ | ||
| 5981 | err = btrfs_orphan_del(trans, inode); | ||
| 5982 | if (err) | ||
| 5983 | goto fail; | ||
| 5984 | } | ||
| 5961 | d_instantiate(dentry, inode); | 5985 | d_instantiate(dentry, inode); |
| 5962 | btrfs_log_new_name(trans, inode, NULL, parent); | 5986 | btrfs_log_new_name(trans, inode, NULL, parent); |
| 5963 | } | 5987 | } |
| @@ -6086,16 +6110,8 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
| 6086 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); | 6110 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); |
| 6087 | ret = btrfs_decompress(compress_type, tmp, page, | 6111 | ret = btrfs_decompress(compress_type, tmp, page, |
| 6088 | extent_offset, inline_size, max_size); | 6112 | extent_offset, inline_size, max_size); |
| 6089 | if (ret) { | ||
| 6090 | char *kaddr = kmap_atomic(page); | ||
| 6091 | unsigned long copy_size = min_t(u64, | ||
| 6092 | PAGE_CACHE_SIZE - pg_offset, | ||
| 6093 | max_size - extent_offset); | ||
| 6094 | memset(kaddr + pg_offset, 0, copy_size); | ||
| 6095 | kunmap_atomic(kaddr); | ||
| 6096 | } | ||
| 6097 | kfree(tmp); | 6113 | kfree(tmp); |
| 6098 | return 0; | 6114 | return ret; |
| 6099 | } | 6115 | } |
| 6100 | 6116 | ||
| 6101 | /* | 6117 | /* |
| @@ -6113,7 +6129,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
| 6113 | { | 6129 | { |
| 6114 | int ret; | 6130 | int ret; |
| 6115 | int err = 0; | 6131 | int err = 0; |
| 6116 | u64 bytenr; | ||
| 6117 | u64 extent_start = 0; | 6132 | u64 extent_start = 0; |
| 6118 | u64 extent_end = 0; | 6133 | u64 extent_end = 0; |
| 6119 | u64 objectid = btrfs_ino(inode); | 6134 | u64 objectid = btrfs_ino(inode); |
| @@ -6127,7 +6142,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
| 6127 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 6142 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
| 6128 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 6143 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
| 6129 | struct btrfs_trans_handle *trans = NULL; | 6144 | struct btrfs_trans_handle *trans = NULL; |
| 6130 | int compress_type; | 6145 | const bool new_inline = !page || create; |
| 6131 | 6146 | ||
| 6132 | again: | 6147 | again: |
| 6133 | read_lock(&em_tree->lock); | 6148 | read_lock(&em_tree->lock); |
| @@ -6201,7 +6216,6 @@ again: | |||
| 6201 | 6216 | ||
| 6202 | found_type = btrfs_file_extent_type(leaf, item); | 6217 | found_type = btrfs_file_extent_type(leaf, item); |
| 6203 | extent_start = found_key.offset; | 6218 | extent_start = found_key.offset; |
| 6204 | compress_type = btrfs_file_extent_compression(leaf, item); | ||
| 6205 | if (found_type == BTRFS_FILE_EXTENT_REG || | 6219 | if (found_type == BTRFS_FILE_EXTENT_REG || |
| 6206 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 6220 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
| 6207 | extent_end = extent_start + | 6221 | extent_end = extent_start + |
| @@ -6236,32 +6250,10 @@ next: | |||
| 6236 | goto not_found_em; | 6250 | goto not_found_em; |
| 6237 | } | 6251 | } |
| 6238 | 6252 | ||
| 6239 | em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, item); | 6253 | btrfs_extent_item_to_extent_map(inode, path, item, new_inline, em); |
| 6254 | |||
| 6240 | if (found_type == BTRFS_FILE_EXTENT_REG || | 6255 | if (found_type == BTRFS_FILE_EXTENT_REG || |
| 6241 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 6256 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
| 6242 | em->start = extent_start; | ||
| 6243 | em->len = extent_end - extent_start; | ||
| 6244 | em->orig_start = extent_start - | ||
| 6245 | btrfs_file_extent_offset(leaf, item); | ||
| 6246 | em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, | ||
| 6247 | item); | ||
| 6248 | bytenr = btrfs_file_extent_disk_bytenr(leaf, item); | ||
| 6249 | if (bytenr == 0) { | ||
| 6250 | em->block_start = EXTENT_MAP_HOLE; | ||
| 6251 | goto insert; | ||
| 6252 | } | ||
| 6253 | if (compress_type != BTRFS_COMPRESS_NONE) { | ||
| 6254 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
| 6255 | em->compress_type = compress_type; | ||
| 6256 | em->block_start = bytenr; | ||
| 6257 | em->block_len = em->orig_block_len; | ||
| 6258 | } else { | ||
| 6259 | bytenr += btrfs_file_extent_offset(leaf, item); | ||
| 6260 | em->block_start = bytenr; | ||
| 6261 | em->block_len = em->len; | ||
| 6262 | if (found_type == BTRFS_FILE_EXTENT_PREALLOC) | ||
| 6263 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
| 6264 | } | ||
| 6265 | goto insert; | 6257 | goto insert; |
| 6266 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | 6258 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
| 6267 | unsigned long ptr; | 6259 | unsigned long ptr; |
| @@ -6270,12 +6262,8 @@ next: | |||
| 6270 | size_t extent_offset; | 6262 | size_t extent_offset; |
| 6271 | size_t copy_size; | 6263 | size_t copy_size; |
| 6272 | 6264 | ||
| 6273 | em->block_start = EXTENT_MAP_INLINE; | 6265 | if (new_inline) |
| 6274 | if (!page || create) { | ||
| 6275 | em->start = extent_start; | ||
| 6276 | em->len = extent_end - extent_start; | ||
| 6277 | goto out; | 6266 | goto out; |
| 6278 | } | ||
| 6279 | 6267 | ||
| 6280 | size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); | 6268 | size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); |
| 6281 | extent_offset = page_offset(page) + pg_offset - extent_start; | 6269 | extent_offset = page_offset(page) + pg_offset - extent_start; |
| @@ -6285,10 +6273,6 @@ next: | |||
| 6285 | em->len = ALIGN(copy_size, root->sectorsize); | 6273 | em->len = ALIGN(copy_size, root->sectorsize); |
| 6286 | em->orig_block_len = em->len; | 6274 | em->orig_block_len = em->len; |
| 6287 | em->orig_start = em->start; | 6275 | em->orig_start = em->start; |
| 6288 | if (compress_type) { | ||
| 6289 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | ||
| 6290 | em->compress_type = compress_type; | ||
| 6291 | } | ||
| 6292 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; | 6276 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; |
| 6293 | if (create == 0 && !PageUptodate(page)) { | 6277 | if (create == 0 && !PageUptodate(page)) { |
| 6294 | if (btrfs_file_extent_compression(leaf, item) != | 6278 | if (btrfs_file_extent_compression(leaf, item) != |
| @@ -6296,7 +6280,10 @@ next: | |||
| 6296 | ret = uncompress_inline(path, inode, page, | 6280 | ret = uncompress_inline(path, inode, page, |
| 6297 | pg_offset, | 6281 | pg_offset, |
| 6298 | extent_offset, item); | 6282 | extent_offset, item); |
| 6299 | BUG_ON(ret); /* -ENOMEM */ | 6283 | if (ret) { |
| 6284 | err = ret; | ||
| 6285 | goto out; | ||
| 6286 | } | ||
| 6300 | } else { | 6287 | } else { |
| 6301 | map = kmap(page); | 6288 | map = kmap(page); |
| 6302 | read_extent_buffer(leaf, map + pg_offset, ptr, | 6289 | read_extent_buffer(leaf, map + pg_offset, ptr, |
| @@ -6332,8 +6319,6 @@ next: | |||
| 6332 | set_extent_uptodate(io_tree, em->start, | 6319 | set_extent_uptodate(io_tree, em->start, |
| 6333 | extent_map_end(em) - 1, NULL, GFP_NOFS); | 6320 | extent_map_end(em) - 1, NULL, GFP_NOFS); |
| 6334 | goto insert; | 6321 | goto insert; |
| 6335 | } else { | ||
| 6336 | WARN(1, KERN_ERR "btrfs unknown found_type %d\n", found_type); | ||
| 6337 | } | 6322 | } |
| 6338 | not_found: | 6323 | not_found: |
| 6339 | em->start = start; | 6324 | em->start = start; |
| @@ -6717,6 +6702,76 @@ out: | |||
| 6717 | return ret; | 6702 | return ret; |
| 6718 | } | 6703 | } |
| 6719 | 6704 | ||
| 6705 | bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end) | ||
| 6706 | { | ||
| 6707 | struct radix_tree_root *root = &inode->i_mapping->page_tree; | ||
| 6708 | int found = false; | ||
| 6709 | void **pagep = NULL; | ||
| 6710 | struct page *page = NULL; | ||
| 6711 | int start_idx; | ||
| 6712 | int end_idx; | ||
| 6713 | |||
| 6714 | start_idx = start >> PAGE_CACHE_SHIFT; | ||
| 6715 | |||
| 6716 | /* | ||
| 6717 | * end is the last byte in the last page. end == start is legal | ||
| 6718 | */ | ||
| 6719 | end_idx = end >> PAGE_CACHE_SHIFT; | ||
| 6720 | |||
| 6721 | rcu_read_lock(); | ||
| 6722 | |||
| 6723 | /* Most of the code in this while loop is lifted from | ||
| 6724 | * find_get_page. It's been modified to begin searching from a | ||
| 6725 | * page and return just the first page found in that range. If the | ||
| 6726 | * found idx is less than or equal to the end idx then we know that | ||
| 6727 | * a page exists. If no pages are found or if those pages are | ||
| 6728 | * outside of the range then we're fine (yay!) */ | ||
| 6729 | while (page == NULL && | ||
| 6730 | radix_tree_gang_lookup_slot(root, &pagep, NULL, start_idx, 1)) { | ||
| 6731 | page = radix_tree_deref_slot(pagep); | ||
| 6732 | if (unlikely(!page)) | ||
| 6733 | break; | ||
| 6734 | |||
| 6735 | if (radix_tree_exception(page)) { | ||
| 6736 | if (radix_tree_deref_retry(page)) { | ||
| 6737 | page = NULL; | ||
| 6738 | continue; | ||
| 6739 | } | ||
| 6740 | /* | ||
| 6741 | * Otherwise, shmem/tmpfs must be storing a swap entry | ||
| 6742 | * here as an exceptional entry: so return it without | ||
| 6743 | * attempting to raise page count. | ||
| 6744 | */ | ||
| 6745 | page = NULL; | ||
| 6746 | break; /* TODO: Is this relevant for this use case? */ | ||
| 6747 | } | ||
| 6748 | |||
| 6749 | if (!page_cache_get_speculative(page)) { | ||
| 6750 | page = NULL; | ||
| 6751 | continue; | ||
| 6752 | } | ||
| 6753 | |||
| 6754 | /* | ||
| 6755 | * Has the page moved? | ||
| 6756 | * This is part of the lockless pagecache protocol. See | ||
| 6757 | * include/linux/pagemap.h for details. | ||
| 6758 | */ | ||
| 6759 | if (unlikely(page != *pagep)) { | ||
| 6760 | page_cache_release(page); | ||
| 6761 | page = NULL; | ||
| 6762 | } | ||
| 6763 | } | ||
| 6764 | |||
| 6765 | if (page) { | ||
| 6766 | if (page->index <= end_idx) | ||
| 6767 | found = true; | ||
| 6768 | page_cache_release(page); | ||
| 6769 | } | ||
| 6770 | |||
| 6771 | rcu_read_unlock(); | ||
| 6772 | return found; | ||
| 6773 | } | ||
| 6774 | |||
| 6720 | static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | 6775 | static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, |
| 6721 | struct extent_state **cached_state, int writing) | 6776 | struct extent_state **cached_state, int writing) |
| 6722 | { | 6777 | { |
| @@ -6741,10 +6796,9 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | |||
| 6741 | * invalidate needs to happen so that reads after a write do not | 6796 | * invalidate needs to happen so that reads after a write do not |
| 6742 | * get stale data. | 6797 | * get stale data. |
| 6743 | */ | 6798 | */ |
| 6744 | if (!ordered && (!writing || | 6799 | if (!ordered && |
| 6745 | !test_range_bit(&BTRFS_I(inode)->io_tree, | 6800 | (!writing || |
| 6746 | lockstart, lockend, EXTENT_UPTODATE, 0, | 6801 | !btrfs_page_exists_in_range(inode, lockstart, lockend))) |
| 6747 | *cached_state))) | ||
| 6748 | break; | 6802 | break; |
| 6749 | 6803 | ||
| 6750 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 6804 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
| @@ -7126,7 +7180,7 @@ static void btrfs_end_dio_bio(struct bio *bio, int err) | |||
| 7126 | * before atomic variable goto zero, we must make sure | 7180 | * before atomic variable goto zero, we must make sure |
| 7127 | * dip->errors is perceived to be set. | 7181 | * dip->errors is perceived to be set. |
| 7128 | */ | 7182 | */ |
| 7129 | smp_mb__before_atomic_dec(); | 7183 | smp_mb__before_atomic(); |
| 7130 | } | 7184 | } |
| 7131 | 7185 | ||
| 7132 | /* if there are more bios still pending for this dio, just exit */ | 7186 | /* if there are more bios still pending for this dio, just exit */ |
| @@ -7306,7 +7360,7 @@ out_err: | |||
| 7306 | * before atomic variable goto zero, we must | 7360 | * before atomic variable goto zero, we must |
| 7307 | * make sure dip->errors is perceived to be set. | 7361 | * make sure dip->errors is perceived to be set. |
| 7308 | */ | 7362 | */ |
| 7309 | smp_mb__before_atomic_dec(); | 7363 | smp_mb__before_atomic(); |
| 7310 | if (atomic_dec_and_test(&dip->pending_bios)) | 7364 | if (atomic_dec_and_test(&dip->pending_bios)) |
| 7311 | bio_io_error(dip->orig_bio); | 7365 | bio_io_error(dip->orig_bio); |
| 7312 | 7366 | ||
| @@ -7391,39 +7445,30 @@ free_ordered: | |||
| 7391 | } | 7445 | } |
| 7392 | 7446 | ||
| 7393 | static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, | 7447 | static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, |
| 7394 | const struct iovec *iov, loff_t offset, | 7448 | const struct iov_iter *iter, loff_t offset) |
| 7395 | unsigned long nr_segs) | ||
| 7396 | { | 7449 | { |
| 7397 | int seg; | 7450 | int seg; |
| 7398 | int i; | 7451 | int i; |
| 7399 | size_t size; | ||
| 7400 | unsigned long addr; | ||
| 7401 | unsigned blocksize_mask = root->sectorsize - 1; | 7452 | unsigned blocksize_mask = root->sectorsize - 1; |
| 7402 | ssize_t retval = -EINVAL; | 7453 | ssize_t retval = -EINVAL; |
| 7403 | loff_t end = offset; | ||
| 7404 | 7454 | ||
| 7405 | if (offset & blocksize_mask) | 7455 | if (offset & blocksize_mask) |
| 7406 | goto out; | 7456 | goto out; |
| 7407 | 7457 | ||
| 7408 | /* Check the memory alignment. Blocks cannot straddle pages */ | 7458 | if (iov_iter_alignment(iter) & blocksize_mask) |
| 7409 | for (seg = 0; seg < nr_segs; seg++) { | 7459 | goto out; |
| 7410 | addr = (unsigned long)iov[seg].iov_base; | ||
| 7411 | size = iov[seg].iov_len; | ||
| 7412 | end += size; | ||
| 7413 | if ((addr & blocksize_mask) || (size & blocksize_mask)) | ||
| 7414 | goto out; | ||
| 7415 | |||
| 7416 | /* If this is a write we don't need to check anymore */ | ||
| 7417 | if (rw & WRITE) | ||
| 7418 | continue; | ||
| 7419 | 7460 | ||
| 7420 | /* | 7461 | /* If this is a write we don't need to check anymore */ |
| 7421 | * Check to make sure we don't have duplicate iov_base's in this | 7462 | if (rw & WRITE) |
| 7422 | * iovec, if so return EINVAL, otherwise we'll get csum errors | 7463 | return 0; |
| 7423 | * when reading back. | 7464 | /* |
| 7424 | */ | 7465 | * Check to make sure we don't have duplicate iov_base's in this |
| 7425 | for (i = seg + 1; i < nr_segs; i++) { | 7466 | * iovec, if so return EINVAL, otherwise we'll get csum errors |
| 7426 | if (iov[seg].iov_base == iov[i].iov_base) | 7467 | * when reading back. |
| 7468 | */ | ||
| 7469 | for (seg = 0; seg < iter->nr_segs; seg++) { | ||
| 7470 | for (i = seg + 1; i < iter->nr_segs; i++) { | ||
| 7471 | if (iter->iov[seg].iov_base == iter->iov[i].iov_base) | ||
| 7427 | goto out; | 7472 | goto out; |
| 7428 | } | 7473 | } |
| 7429 | } | 7474 | } |
| @@ -7433,8 +7478,7 @@ out: | |||
| 7433 | } | 7478 | } |
| 7434 | 7479 | ||
| 7435 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | 7480 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, |
| 7436 | const struct iovec *iov, loff_t offset, | 7481 | struct iov_iter *iter, loff_t offset) |
| 7437 | unsigned long nr_segs) | ||
| 7438 | { | 7482 | { |
| 7439 | struct file *file = iocb->ki_filp; | 7483 | struct file *file = iocb->ki_filp; |
| 7440 | struct inode *inode = file->f_mapping->host; | 7484 | struct inode *inode = file->f_mapping->host; |
| @@ -7444,12 +7488,11 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 7444 | bool relock = false; | 7488 | bool relock = false; |
| 7445 | ssize_t ret; | 7489 | ssize_t ret; |
| 7446 | 7490 | ||
| 7447 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, | 7491 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset)) |
| 7448 | offset, nr_segs)) | ||
| 7449 | return 0; | 7492 | return 0; |
| 7450 | 7493 | ||
| 7451 | atomic_inc(&inode->i_dio_count); | 7494 | atomic_inc(&inode->i_dio_count); |
| 7452 | smp_mb__after_atomic_inc(); | 7495 | smp_mb__after_atomic(); |
| 7453 | 7496 | ||
| 7454 | /* | 7497 | /* |
| 7455 | * The generic stuff only does filemap_write_and_wait_range, which | 7498 | * The generic stuff only does filemap_write_and_wait_range, which |
| @@ -7457,7 +7500,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 7457 | * we need to flush the dirty pages again to make absolutely sure | 7500 | * we need to flush the dirty pages again to make absolutely sure |
| 7458 | * that any outstanding dirty pages are on disk. | 7501 | * that any outstanding dirty pages are on disk. |
| 7459 | */ | 7502 | */ |
| 7460 | count = iov_length(iov, nr_segs); | 7503 | count = iov_iter_count(iter); |
| 7461 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | 7504 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
| 7462 | &BTRFS_I(inode)->runtime_flags)) | 7505 | &BTRFS_I(inode)->runtime_flags)) |
| 7463 | filemap_fdatawrite_range(inode->i_mapping, offset, count); | 7506 | filemap_fdatawrite_range(inode->i_mapping, offset, count); |
| @@ -7484,7 +7527,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 7484 | 7527 | ||
| 7485 | ret = __blockdev_direct_IO(rw, iocb, inode, | 7528 | ret = __blockdev_direct_IO(rw, iocb, inode, |
| 7486 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, | 7529 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, |
| 7487 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, | 7530 | iter, offset, btrfs_get_blocks_direct, NULL, |
| 7488 | btrfs_submit_direct, flags); | 7531 | btrfs_submit_direct, flags); |
| 7489 | if (rw & WRITE) { | 7532 | if (rw & WRITE) { |
| 7490 | if (ret < 0 && ret != -EIOCBQUEUED) | 7533 | if (ret < 0 && ret != -EIOCBQUEUED) |
| @@ -7992,7 +8035,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
| 7992 | err = btrfs_subvol_inherit_props(trans, new_root, parent_root); | 8035 | err = btrfs_subvol_inherit_props(trans, new_root, parent_root); |
| 7993 | if (err) | 8036 | if (err) |
| 7994 | btrfs_err(new_root->fs_info, | 8037 | btrfs_err(new_root->fs_info, |
| 7995 | "error inheriting subvolume %llu properties: %d\n", | 8038 | "error inheriting subvolume %llu properties: %d", |
| 7996 | new_root->root_key.objectid, err); | 8039 | new_root->root_key.objectid, err); |
| 7997 | 8040 | ||
| 7998 | err = btrfs_update_inode(trans, new_root, inode); | 8041 | err = btrfs_update_inode(trans, new_root, inode); |
| @@ -8311,7 +8354,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 8311 | BTRFS_I(old_inode)->dir_index = 0ULL; | 8354 | BTRFS_I(old_inode)->dir_index = 0ULL; |
| 8312 | if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { | 8355 | if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { |
| 8313 | /* force full log commit if subvolume involved. */ | 8356 | /* force full log commit if subvolume involved. */ |
| 8314 | root->fs_info->last_trans_log_full_commit = trans->transid; | 8357 | btrfs_set_log_full_commit(root->fs_info, trans); |
| 8315 | } else { | 8358 | } else { |
| 8316 | ret = btrfs_insert_inode_ref(trans, dest, | 8359 | ret = btrfs_insert_inode_ref(trans, dest, |
| 8317 | new_dentry->d_name.name, | 8360 | new_dentry->d_name.name, |
| @@ -8889,6 +8932,66 @@ static int btrfs_permission(struct inode *inode, int mask) | |||
| 8889 | return generic_permission(inode, mask); | 8932 | return generic_permission(inode, mask); |
| 8890 | } | 8933 | } |
| 8891 | 8934 | ||
| 8935 | static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
| 8936 | { | ||
| 8937 | struct btrfs_trans_handle *trans; | ||
| 8938 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 8939 | struct inode *inode = NULL; | ||
| 8940 | u64 objectid; | ||
| 8941 | u64 index; | ||
| 8942 | int ret = 0; | ||
| 8943 | |||
| 8944 | /* | ||
| 8945 | * 5 units required for adding orphan entry | ||
| 8946 | */ | ||
| 8947 | trans = btrfs_start_transaction(root, 5); | ||
| 8948 | if (IS_ERR(trans)) | ||
| 8949 | return PTR_ERR(trans); | ||
| 8950 | |||
| 8951 | ret = btrfs_find_free_ino(root, &objectid); | ||
| 8952 | if (ret) | ||
| 8953 | goto out; | ||
| 8954 | |||
| 8955 | inode = btrfs_new_inode(trans, root, dir, NULL, 0, | ||
| 8956 | btrfs_ino(dir), objectid, mode, &index); | ||
| 8957 | if (IS_ERR(inode)) { | ||
| 8958 | ret = PTR_ERR(inode); | ||
| 8959 | inode = NULL; | ||
| 8960 | goto out; | ||
| 8961 | } | ||
| 8962 | |||
| 8963 | ret = btrfs_init_inode_security(trans, inode, dir, NULL); | ||
| 8964 | if (ret) | ||
| 8965 | goto out; | ||
| 8966 | |||
| 8967 | ret = btrfs_update_inode(trans, root, inode); | ||
| 8968 | if (ret) | ||
| 8969 | goto out; | ||
| 8970 | |||
| 8971 | inode->i_fop = &btrfs_file_operations; | ||
| 8972 | inode->i_op = &btrfs_file_inode_operations; | ||
| 8973 | |||
| 8974 | inode->i_mapping->a_ops = &btrfs_aops; | ||
| 8975 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
| 8976 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | ||
| 8977 | |||
| 8978 | ret = btrfs_orphan_add(trans, inode); | ||
| 8979 | if (ret) | ||
| 8980 | goto out; | ||
| 8981 | |||
| 8982 | d_tmpfile(dentry, inode); | ||
| 8983 | mark_inode_dirty(inode); | ||
| 8984 | |||
| 8985 | out: | ||
| 8986 | btrfs_end_transaction(trans, root); | ||
| 8987 | if (ret) | ||
| 8988 | iput(inode); | ||
| 8989 | btrfs_balance_delayed_items(root); | ||
| 8990 | btrfs_btree_balance_dirty(root); | ||
| 8991 | |||
| 8992 | return ret; | ||
| 8993 | } | ||
| 8994 | |||
| 8892 | static const struct inode_operations btrfs_dir_inode_operations = { | 8995 | static const struct inode_operations btrfs_dir_inode_operations = { |
| 8893 | .getattr = btrfs_getattr, | 8996 | .getattr = btrfs_getattr, |
| 8894 | .lookup = btrfs_lookup, | 8997 | .lookup = btrfs_lookup, |
| @@ -8909,6 +9012,7 @@ static const struct inode_operations btrfs_dir_inode_operations = { | |||
| 8909 | .get_acl = btrfs_get_acl, | 9012 | .get_acl = btrfs_get_acl, |
| 8910 | .set_acl = btrfs_set_acl, | 9013 | .set_acl = btrfs_set_acl, |
| 8911 | .update_time = btrfs_update_time, | 9014 | .update_time = btrfs_update_time, |
| 9015 | .tmpfile = btrfs_tmpfile, | ||
| 8912 | }; | 9016 | }; |
| 8913 | static const struct inode_operations btrfs_dir_ro_inode_operations = { | 9017 | static const struct inode_operations btrfs_dir_ro_inode_operations = { |
| 8914 | .lookup = btrfs_lookup, | 9018 | .lookup = btrfs_lookup, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e79ff6b90cb7..0d321c23069a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -58,6 +58,7 @@ | |||
| 58 | #include "dev-replace.h" | 58 | #include "dev-replace.h" |
| 59 | #include "props.h" | 59 | #include "props.h" |
| 60 | #include "sysfs.h" | 60 | #include "sysfs.h" |
| 61 | #include "qgroup.h" | ||
| 61 | 62 | ||
| 62 | #ifdef CONFIG_64BIT | 63 | #ifdef CONFIG_64BIT |
| 63 | /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI | 64 | /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI |
| @@ -638,11 +639,11 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
| 638 | struct btrfs_trans_handle *trans; | 639 | struct btrfs_trans_handle *trans; |
| 639 | int ret; | 640 | int ret; |
| 640 | 641 | ||
| 641 | if (!root->ref_cows) | 642 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
| 642 | return -EINVAL; | 643 | return -EINVAL; |
| 643 | 644 | ||
| 644 | atomic_inc(&root->will_be_snapshoted); | 645 | atomic_inc(&root->will_be_snapshoted); |
| 645 | smp_mb__after_atomic_inc(); | 646 | smp_mb__after_atomic(); |
| 646 | btrfs_wait_nocow_write(root); | 647 | btrfs_wait_nocow_write(root); |
| 647 | 648 | ||
| 648 | ret = btrfs_start_delalloc_inodes(root, 0); | 649 | ret = btrfs_start_delalloc_inodes(root, 0); |
| @@ -711,6 +712,35 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
| 711 | if (ret) | 712 | if (ret) |
| 712 | goto fail; | 713 | goto fail; |
| 713 | 714 | ||
| 715 | /* | ||
| 716 | * If orphan cleanup did remove any orphans, it means the tree was | ||
| 717 | * modified and therefore the commit root is not the same as the | ||
| 718 | * current root anymore. This is a problem, because send uses the | ||
| 719 | * commit root and therefore can see inode items that don't exist | ||
| 720 | * in the current root anymore, and for example make calls to | ||
| 721 | * btrfs_iget, which will do tree lookups based on the current root | ||
| 722 | * and not on the commit root. Those lookups will fail, returning a | ||
| 723 | * -ESTALE error, and making send fail with that error. So make sure | ||
| 724 | * a send does not see any orphans we have just removed, and that it | ||
| 725 | * will see the same inodes regardless of whether a transaction | ||
| 726 | * commit happened before it started (meaning that the commit root | ||
| 727 | * will be the same as the current root) or not. | ||
| 728 | */ | ||
| 729 | if (readonly && pending_snapshot->snap->node != | ||
| 730 | pending_snapshot->snap->commit_root) { | ||
| 731 | trans = btrfs_join_transaction(pending_snapshot->snap); | ||
| 732 | if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) { | ||
| 733 | ret = PTR_ERR(trans); | ||
| 734 | goto fail; | ||
| 735 | } | ||
| 736 | if (!IS_ERR(trans)) { | ||
| 737 | ret = btrfs_commit_transaction(trans, | ||
| 738 | pending_snapshot->snap); | ||
| 739 | if (ret) | ||
| 740 | goto fail; | ||
| 741 | } | ||
| 742 | } | ||
| 743 | |||
| 714 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | 744 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); |
| 715 | if (IS_ERR(inode)) { | 745 | if (IS_ERR(inode)) { |
| 716 | ret = PTR_ERR(inode); | 746 | ret = PTR_ERR(inode); |
| @@ -1502,11 +1532,12 @@ static noinline int btrfs_ioctl_resize(struct file *file, | |||
| 1502 | sizestr = vol_args->name; | 1532 | sizestr = vol_args->name; |
| 1503 | devstr = strchr(sizestr, ':'); | 1533 | devstr = strchr(sizestr, ':'); |
| 1504 | if (devstr) { | 1534 | if (devstr) { |
| 1505 | char *end; | ||
| 1506 | sizestr = devstr + 1; | 1535 | sizestr = devstr + 1; |
| 1507 | *devstr = '\0'; | 1536 | *devstr = '\0'; |
| 1508 | devstr = vol_args->name; | 1537 | devstr = vol_args->name; |
| 1509 | devid = simple_strtoull(devstr, &end, 10); | 1538 | ret = kstrtoull(devstr, 10, &devid); |
| 1539 | if (ret) | ||
| 1540 | goto out_free; | ||
| 1510 | if (!devid) { | 1541 | if (!devid) { |
| 1511 | ret = -EINVAL; | 1542 | ret = -EINVAL; |
| 1512 | goto out_free; | 1543 | goto out_free; |
| @@ -1562,7 +1593,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, | |||
| 1562 | new_size = old_size - new_size; | 1593 | new_size = old_size - new_size; |
| 1563 | } else if (mod > 0) { | 1594 | } else if (mod > 0) { |
| 1564 | if (new_size > ULLONG_MAX - old_size) { | 1595 | if (new_size > ULLONG_MAX - old_size) { |
| 1565 | ret = -EINVAL; | 1596 | ret = -ERANGE; |
| 1566 | goto out_free; | 1597 | goto out_free; |
| 1567 | } | 1598 | } |
| 1568 | new_size = old_size + new_size; | 1599 | new_size = old_size + new_size; |
| @@ -1926,7 +1957,8 @@ static noinline int copy_to_sk(struct btrfs_root *root, | |||
| 1926 | struct btrfs_path *path, | 1957 | struct btrfs_path *path, |
| 1927 | struct btrfs_key *key, | 1958 | struct btrfs_key *key, |
| 1928 | struct btrfs_ioctl_search_key *sk, | 1959 | struct btrfs_ioctl_search_key *sk, |
| 1929 | char *buf, | 1960 | size_t *buf_size, |
| 1961 | char __user *ubuf, | ||
| 1930 | unsigned long *sk_offset, | 1962 | unsigned long *sk_offset, |
| 1931 | int *num_found) | 1963 | int *num_found) |
| 1932 | { | 1964 | { |
| @@ -1958,13 +1990,25 @@ static noinline int copy_to_sk(struct btrfs_root *root, | |||
| 1958 | if (!key_in_sk(key, sk)) | 1990 | if (!key_in_sk(key, sk)) |
| 1959 | continue; | 1991 | continue; |
| 1960 | 1992 | ||
| 1961 | if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) | 1993 | if (sizeof(sh) + item_len > *buf_size) { |
| 1994 | if (*num_found) { | ||
| 1995 | ret = 1; | ||
| 1996 | goto out; | ||
| 1997 | } | ||
| 1998 | |||
| 1999 | /* | ||
| 2000 | * return one empty item back for v1, which does not | ||
| 2001 | * handle -EOVERFLOW | ||
| 2002 | */ | ||
| 2003 | |||
| 2004 | *buf_size = sizeof(sh) + item_len; | ||
| 1962 | item_len = 0; | 2005 | item_len = 0; |
| 2006 | ret = -EOVERFLOW; | ||
| 2007 | } | ||
| 1963 | 2008 | ||
| 1964 | if (sizeof(sh) + item_len + *sk_offset > | 2009 | if (sizeof(sh) + item_len + *sk_offset > *buf_size) { |
| 1965 | BTRFS_SEARCH_ARGS_BUFSIZE) { | ||
| 1966 | ret = 1; | 2010 | ret = 1; |
| 1967 | goto overflow; | 2011 | goto out; |
| 1968 | } | 2012 | } |
| 1969 | 2013 | ||
| 1970 | sh.objectid = key->objectid; | 2014 | sh.objectid = key->objectid; |
| @@ -1974,20 +2018,33 @@ static noinline int copy_to_sk(struct btrfs_root *root, | |||
| 1974 | sh.transid = found_transid; | 2018 | sh.transid = found_transid; |
| 1975 | 2019 | ||
| 1976 | /* copy search result header */ | 2020 | /* copy search result header */ |
| 1977 | memcpy(buf + *sk_offset, &sh, sizeof(sh)); | 2021 | if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) { |
| 2022 | ret = -EFAULT; | ||
| 2023 | goto out; | ||
| 2024 | } | ||
| 2025 | |||
| 1978 | *sk_offset += sizeof(sh); | 2026 | *sk_offset += sizeof(sh); |
| 1979 | 2027 | ||
| 1980 | if (item_len) { | 2028 | if (item_len) { |
| 1981 | char *p = buf + *sk_offset; | 2029 | char __user *up = ubuf + *sk_offset; |
| 1982 | /* copy the item */ | 2030 | /* copy the item */ |
| 1983 | read_extent_buffer(leaf, p, | 2031 | if (read_extent_buffer_to_user(leaf, up, |
| 1984 | item_off, item_len); | 2032 | item_off, item_len)) { |
| 2033 | ret = -EFAULT; | ||
| 2034 | goto out; | ||
| 2035 | } | ||
| 2036 | |||
| 1985 | *sk_offset += item_len; | 2037 | *sk_offset += item_len; |
| 1986 | } | 2038 | } |
| 1987 | (*num_found)++; | 2039 | (*num_found)++; |
| 1988 | 2040 | ||
| 1989 | if (*num_found >= sk->nr_items) | 2041 | if (ret) /* -EOVERFLOW from above */ |
| 1990 | break; | 2042 | goto out; |
| 2043 | |||
| 2044 | if (*num_found >= sk->nr_items) { | ||
| 2045 | ret = 1; | ||
| 2046 | goto out; | ||
| 2047 | } | ||
| 1991 | } | 2048 | } |
| 1992 | advance_key: | 2049 | advance_key: |
| 1993 | ret = 0; | 2050 | ret = 0; |
| @@ -2002,22 +2059,37 @@ advance_key: | |||
| 2002 | key->objectid++; | 2059 | key->objectid++; |
| 2003 | } else | 2060 | } else |
| 2004 | ret = 1; | 2061 | ret = 1; |
| 2005 | overflow: | 2062 | out: |
| 2063 | /* | ||
| 2064 | * 0: all items from this leaf copied, continue with next | ||
| 2065 | * 1: * more items can be copied, but unused buffer is too small | ||
| 2066 | * * all items were found | ||
| 2067 | * Either way, it will stops the loop which iterates to the next | ||
| 2068 | * leaf | ||
| 2069 | * -EOVERFLOW: item was to large for buffer | ||
| 2070 | * -EFAULT: could not copy extent buffer back to userspace | ||
| 2071 | */ | ||
| 2006 | return ret; | 2072 | return ret; |
| 2007 | } | 2073 | } |
| 2008 | 2074 | ||
| 2009 | static noinline int search_ioctl(struct inode *inode, | 2075 | static noinline int search_ioctl(struct inode *inode, |
| 2010 | struct btrfs_ioctl_search_args *args) | 2076 | struct btrfs_ioctl_search_key *sk, |
| 2077 | size_t *buf_size, | ||
| 2078 | char __user *ubuf) | ||
| 2011 | { | 2079 | { |
| 2012 | struct btrfs_root *root; | 2080 | struct btrfs_root *root; |
| 2013 | struct btrfs_key key; | 2081 | struct btrfs_key key; |
| 2014 | struct btrfs_path *path; | 2082 | struct btrfs_path *path; |
| 2015 | struct btrfs_ioctl_search_key *sk = &args->key; | ||
| 2016 | struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; | 2083 | struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; |
| 2017 | int ret; | 2084 | int ret; |
| 2018 | int num_found = 0; | 2085 | int num_found = 0; |
| 2019 | unsigned long sk_offset = 0; | 2086 | unsigned long sk_offset = 0; |
| 2020 | 2087 | ||
| 2088 | if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) { | ||
| 2089 | *buf_size = sizeof(struct btrfs_ioctl_search_header); | ||
| 2090 | return -EOVERFLOW; | ||
| 2091 | } | ||
| 2092 | |||
| 2021 | path = btrfs_alloc_path(); | 2093 | path = btrfs_alloc_path(); |
| 2022 | if (!path) | 2094 | if (!path) |
| 2023 | return -ENOMEM; | 2095 | return -ENOMEM; |
| @@ -2051,14 +2123,15 @@ static noinline int search_ioctl(struct inode *inode, | |||
| 2051 | ret = 0; | 2123 | ret = 0; |
| 2052 | goto err; | 2124 | goto err; |
| 2053 | } | 2125 | } |
| 2054 | ret = copy_to_sk(root, path, &key, sk, args->buf, | 2126 | ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf, |
| 2055 | &sk_offset, &num_found); | 2127 | &sk_offset, &num_found); |
| 2056 | btrfs_release_path(path); | 2128 | btrfs_release_path(path); |
| 2057 | if (ret || num_found >= sk->nr_items) | 2129 | if (ret) |
| 2058 | break; | 2130 | break; |
| 2059 | 2131 | ||
| 2060 | } | 2132 | } |
| 2061 | ret = 0; | 2133 | if (ret > 0) |
| 2134 | ret = 0; | ||
| 2062 | err: | 2135 | err: |
| 2063 | sk->nr_items = num_found; | 2136 | sk->nr_items = num_found; |
| 2064 | btrfs_free_path(path); | 2137 | btrfs_free_path(path); |
| @@ -2068,22 +2141,73 @@ err: | |||
| 2068 | static noinline int btrfs_ioctl_tree_search(struct file *file, | 2141 | static noinline int btrfs_ioctl_tree_search(struct file *file, |
| 2069 | void __user *argp) | 2142 | void __user *argp) |
| 2070 | { | 2143 | { |
| 2071 | struct btrfs_ioctl_search_args *args; | 2144 | struct btrfs_ioctl_search_args __user *uargs; |
| 2072 | struct inode *inode; | 2145 | struct btrfs_ioctl_search_key sk; |
| 2073 | int ret; | 2146 | struct inode *inode; |
| 2147 | int ret; | ||
| 2148 | size_t buf_size; | ||
| 2074 | 2149 | ||
| 2075 | if (!capable(CAP_SYS_ADMIN)) | 2150 | if (!capable(CAP_SYS_ADMIN)) |
| 2076 | return -EPERM; | 2151 | return -EPERM; |
| 2077 | 2152 | ||
| 2078 | args = memdup_user(argp, sizeof(*args)); | 2153 | uargs = (struct btrfs_ioctl_search_args __user *)argp; |
| 2079 | if (IS_ERR(args)) | 2154 | |
| 2080 | return PTR_ERR(args); | 2155 | if (copy_from_user(&sk, &uargs->key, sizeof(sk))) |
| 2156 | return -EFAULT; | ||
| 2157 | |||
| 2158 | buf_size = sizeof(uargs->buf); | ||
| 2081 | 2159 | ||
| 2082 | inode = file_inode(file); | 2160 | inode = file_inode(file); |
| 2083 | ret = search_ioctl(inode, args); | 2161 | ret = search_ioctl(inode, &sk, &buf_size, uargs->buf); |
| 2084 | if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) | 2162 | |
| 2163 | /* | ||
| 2164 | * In the origin implementation an overflow is handled by returning a | ||
| 2165 | * search header with a len of zero, so reset ret. | ||
| 2166 | */ | ||
| 2167 | if (ret == -EOVERFLOW) | ||
| 2168 | ret = 0; | ||
| 2169 | |||
| 2170 | if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk))) | ||
| 2085 | ret = -EFAULT; | 2171 | ret = -EFAULT; |
| 2086 | kfree(args); | 2172 | return ret; |
| 2173 | } | ||
| 2174 | |||
| 2175 | static noinline int btrfs_ioctl_tree_search_v2(struct file *file, | ||
| 2176 | void __user *argp) | ||
| 2177 | { | ||
| 2178 | struct btrfs_ioctl_search_args_v2 __user *uarg; | ||
| 2179 | struct btrfs_ioctl_search_args_v2 args; | ||
| 2180 | struct inode *inode; | ||
| 2181 | int ret; | ||
| 2182 | size_t buf_size; | ||
| 2183 | const size_t buf_limit = 16 * 1024 * 1024; | ||
| 2184 | |||
| 2185 | if (!capable(CAP_SYS_ADMIN)) | ||
| 2186 | return -EPERM; | ||
| 2187 | |||
| 2188 | /* copy search header and buffer size */ | ||
| 2189 | uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp; | ||
| 2190 | if (copy_from_user(&args, uarg, sizeof(args))) | ||
| 2191 | return -EFAULT; | ||
| 2192 | |||
| 2193 | buf_size = args.buf_size; | ||
| 2194 | |||
| 2195 | if (buf_size < sizeof(struct btrfs_ioctl_search_header)) | ||
| 2196 | return -EOVERFLOW; | ||
| 2197 | |||
| 2198 | /* limit result size to 16MB */ | ||
| 2199 | if (buf_size > buf_limit) | ||
| 2200 | buf_size = buf_limit; | ||
| 2201 | |||
| 2202 | inode = file_inode(file); | ||
| 2203 | ret = search_ioctl(inode, &args.key, &buf_size, | ||
| 2204 | (char *)(&uarg->buf[0])); | ||
| 2205 | if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) | ||
| 2206 | ret = -EFAULT; | ||
| 2207 | else if (ret == -EOVERFLOW && | ||
| 2208 | copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size))) | ||
| 2209 | ret = -EFAULT; | ||
| 2210 | |||
| 2087 | return ret; | 2211 | return ret; |
| 2088 | } | 2212 | } |
| 2089 | 2213 | ||
| @@ -2219,6 +2343,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
| 2219 | struct btrfs_ioctl_vol_args *vol_args; | 2343 | struct btrfs_ioctl_vol_args *vol_args; |
| 2220 | struct btrfs_trans_handle *trans; | 2344 | struct btrfs_trans_handle *trans; |
| 2221 | struct btrfs_block_rsv block_rsv; | 2345 | struct btrfs_block_rsv block_rsv; |
| 2346 | u64 root_flags; | ||
| 2222 | u64 qgroup_reserved; | 2347 | u64 qgroup_reserved; |
| 2223 | int namelen; | 2348 | int namelen; |
| 2224 | int ret; | 2349 | int ret; |
| @@ -2240,6 +2365,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
| 2240 | if (err) | 2365 | if (err) |
| 2241 | goto out; | 2366 | goto out; |
| 2242 | 2367 | ||
| 2368 | |||
| 2243 | err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); | 2369 | err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); |
| 2244 | if (err == -EINTR) | 2370 | if (err == -EINTR) |
| 2245 | goto out_drop_write; | 2371 | goto out_drop_write; |
| @@ -2301,6 +2427,27 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
| 2301 | } | 2427 | } |
| 2302 | 2428 | ||
| 2303 | mutex_lock(&inode->i_mutex); | 2429 | mutex_lock(&inode->i_mutex); |
| 2430 | |||
| 2431 | /* | ||
| 2432 | * Don't allow to delete a subvolume with send in progress. This is | ||
| 2433 | * inside the i_mutex so the error handling that has to drop the bit | ||
| 2434 | * again is not run concurrently. | ||
| 2435 | */ | ||
| 2436 | spin_lock(&dest->root_item_lock); | ||
| 2437 | root_flags = btrfs_root_flags(&dest->root_item); | ||
| 2438 | if (dest->send_in_progress == 0) { | ||
| 2439 | btrfs_set_root_flags(&dest->root_item, | ||
| 2440 | root_flags | BTRFS_ROOT_SUBVOL_DEAD); | ||
| 2441 | spin_unlock(&dest->root_item_lock); | ||
| 2442 | } else { | ||
| 2443 | spin_unlock(&dest->root_item_lock); | ||
| 2444 | btrfs_warn(root->fs_info, | ||
| 2445 | "Attempt to delete subvolume %llu during send", | ||
| 2446 | dest->root_key.objectid); | ||
| 2447 | err = -EPERM; | ||
| 2448 | goto out_dput; | ||
| 2449 | } | ||
| 2450 | |||
| 2304 | err = d_invalidate(dentry); | 2451 | err = d_invalidate(dentry); |
| 2305 | if (err) | 2452 | if (err) |
| 2306 | goto out_unlock; | 2453 | goto out_unlock; |
| @@ -2346,7 +2493,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
| 2346 | dest->root_item.drop_level = 0; | 2493 | dest->root_item.drop_level = 0; |
| 2347 | btrfs_set_root_refs(&dest->root_item, 0); | 2494 | btrfs_set_root_refs(&dest->root_item, 0); |
| 2348 | 2495 | ||
| 2349 | if (!xchg(&dest->orphan_item_inserted, 1)) { | 2496 | if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) { |
| 2350 | ret = btrfs_insert_orphan_item(trans, | 2497 | ret = btrfs_insert_orphan_item(trans, |
| 2351 | root->fs_info->tree_root, | 2498 | root->fs_info->tree_root, |
| 2352 | dest->root_key.objectid); | 2499 | dest->root_key.objectid); |
| @@ -2389,11 +2536,19 @@ out_release: | |||
| 2389 | out_up_write: | 2536 | out_up_write: |
| 2390 | up_write(&root->fs_info->subvol_sem); | 2537 | up_write(&root->fs_info->subvol_sem); |
| 2391 | out_unlock: | 2538 | out_unlock: |
| 2539 | if (err) { | ||
| 2540 | spin_lock(&dest->root_item_lock); | ||
| 2541 | root_flags = btrfs_root_flags(&dest->root_item); | ||
| 2542 | btrfs_set_root_flags(&dest->root_item, | ||
| 2543 | root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); | ||
| 2544 | spin_unlock(&dest->root_item_lock); | ||
| 2545 | } | ||
| 2392 | mutex_unlock(&inode->i_mutex); | 2546 | mutex_unlock(&inode->i_mutex); |
| 2393 | if (!err) { | 2547 | if (!err) { |
| 2394 | shrink_dcache_sb(root->fs_info->sb); | 2548 | shrink_dcache_sb(root->fs_info->sb); |
| 2395 | btrfs_invalidate_inodes(dest); | 2549 | btrfs_invalidate_inodes(dest); |
| 2396 | d_delete(dentry); | 2550 | d_delete(dentry); |
| 2551 | ASSERT(dest->send_in_progress == 0); | ||
| 2397 | 2552 | ||
| 2398 | /* the last ref */ | 2553 | /* the last ref */ |
| 2399 | if (dest->cache_inode) { | 2554 | if (dest->cache_inode) { |
| @@ -2557,9 +2712,6 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) | |||
| 2557 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 2712 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
| 2558 | int ret = 0; | 2713 | int ret = 0; |
| 2559 | 2714 | ||
| 2560 | if (!capable(CAP_SYS_ADMIN)) | ||
| 2561 | return -EPERM; | ||
| 2562 | |||
| 2563 | fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); | 2715 | fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); |
| 2564 | if (!fi_args) | 2716 | if (!fi_args) |
| 2565 | return -ENOMEM; | 2717 | return -ENOMEM; |
| @@ -2574,6 +2726,10 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) | |||
| 2574 | } | 2726 | } |
| 2575 | mutex_unlock(&fs_devices->device_list_mutex); | 2727 | mutex_unlock(&fs_devices->device_list_mutex); |
| 2576 | 2728 | ||
| 2729 | fi_args->nodesize = root->fs_info->super_copy->nodesize; | ||
| 2730 | fi_args->sectorsize = root->fs_info->super_copy->sectorsize; | ||
| 2731 | fi_args->clone_alignment = root->fs_info->super_copy->sectorsize; | ||
| 2732 | |||
| 2577 | if (copy_to_user(arg, fi_args, sizeof(*fi_args))) | 2733 | if (copy_to_user(arg, fi_args, sizeof(*fi_args))) |
| 2578 | ret = -EFAULT; | 2734 | ret = -EFAULT; |
| 2579 | 2735 | ||
| @@ -2589,9 +2745,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) | |||
| 2589 | int ret = 0; | 2745 | int ret = 0; |
| 2590 | char *s_uuid = NULL; | 2746 | char *s_uuid = NULL; |
| 2591 | 2747 | ||
| 2592 | if (!capable(CAP_SYS_ADMIN)) | ||
| 2593 | return -EPERM; | ||
| 2594 | |||
| 2595 | di_args = memdup_user(arg, sizeof(*di_args)); | 2748 | di_args = memdup_user(arg, sizeof(*di_args)); |
| 2596 | if (IS_ERR(di_args)) | 2749 | if (IS_ERR(di_args)) |
| 2597 | return PTR_ERR(di_args); | 2750 | return PTR_ERR(di_args); |
| @@ -2669,10 +2822,15 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) | |||
| 2669 | lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); | 2822 | lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); |
| 2670 | ordered = btrfs_lookup_first_ordered_extent(inode, | 2823 | ordered = btrfs_lookup_first_ordered_extent(inode, |
| 2671 | off + len - 1); | 2824 | off + len - 1); |
| 2672 | if (!ordered && | 2825 | if ((!ordered || |
| 2826 | ordered->file_offset + ordered->len <= off || | ||
| 2827 | ordered->file_offset >= off + len) && | ||
| 2673 | !test_range_bit(&BTRFS_I(inode)->io_tree, off, | 2828 | !test_range_bit(&BTRFS_I(inode)->io_tree, off, |
| 2674 | off + len - 1, EXTENT_DELALLOC, 0, NULL)) | 2829 | off + len - 1, EXTENT_DELALLOC, 0, NULL)) { |
| 2830 | if (ordered) | ||
| 2831 | btrfs_put_ordered_extent(ordered); | ||
| 2675 | break; | 2832 | break; |
| 2833 | } | ||
| 2676 | unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); | 2834 | unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); |
| 2677 | if (ordered) | 2835 | if (ordered) |
| 2678 | btrfs_put_ordered_extent(ordered); | 2836 | btrfs_put_ordered_extent(ordered); |
| @@ -2912,6 +3070,126 @@ out: | |||
| 2912 | return ret; | 3070 | return ret; |
| 2913 | } | 3071 | } |
| 2914 | 3072 | ||
| 3073 | /* Helper to check and see if this root currently has a ref on the given disk | ||
| 3074 | * bytenr. If it does then we need to update the quota for this root. This | ||
| 3075 | * doesn't do anything if quotas aren't enabled. | ||
| 3076 | */ | ||
| 3077 | static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
| 3078 | u64 disko) | ||
| 3079 | { | ||
| 3080 | struct seq_list tree_mod_seq_elem = {}; | ||
| 3081 | struct ulist *roots; | ||
| 3082 | struct ulist_iterator uiter; | ||
| 3083 | struct ulist_node *root_node = NULL; | ||
| 3084 | int ret; | ||
| 3085 | |||
| 3086 | if (!root->fs_info->quota_enabled) | ||
| 3087 | return 1; | ||
| 3088 | |||
| 3089 | btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem); | ||
| 3090 | ret = btrfs_find_all_roots(trans, root->fs_info, disko, | ||
| 3091 | tree_mod_seq_elem.seq, &roots); | ||
| 3092 | if (ret < 0) | ||
| 3093 | goto out; | ||
| 3094 | ret = 0; | ||
| 3095 | ULIST_ITER_INIT(&uiter); | ||
| 3096 | while ((root_node = ulist_next(roots, &uiter))) { | ||
| 3097 | if (root_node->val == root->objectid) { | ||
| 3098 | ret = 1; | ||
| 3099 | break; | ||
| 3100 | } | ||
| 3101 | } | ||
| 3102 | ulist_free(roots); | ||
| 3103 | out: | ||
| 3104 | btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem); | ||
| 3105 | return ret; | ||
| 3106 | } | ||
| 3107 | |||
| 3108 | static int clone_finish_inode_update(struct btrfs_trans_handle *trans, | ||
| 3109 | struct inode *inode, | ||
| 3110 | u64 endoff, | ||
| 3111 | const u64 destoff, | ||
| 3112 | const u64 olen) | ||
| 3113 | { | ||
| 3114 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3115 | int ret; | ||
| 3116 | |||
| 3117 | inode_inc_iversion(inode); | ||
| 3118 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
| 3119 | /* | ||
| 3120 | * We round up to the block size at eof when determining which | ||
| 3121 | * extents to clone above, but shouldn't round up the file size. | ||
| 3122 | */ | ||
| 3123 | if (endoff > destoff + olen) | ||
| 3124 | endoff = destoff + olen; | ||
| 3125 | if (endoff > inode->i_size) | ||
| 3126 | btrfs_i_size_write(inode, endoff); | ||
| 3127 | |||
| 3128 | ret = btrfs_update_inode(trans, root, inode); | ||
| 3129 | if (ret) { | ||
| 3130 | btrfs_abort_transaction(trans, root, ret); | ||
| 3131 | btrfs_end_transaction(trans, root); | ||
| 3132 | goto out; | ||
| 3133 | } | ||
| 3134 | ret = btrfs_end_transaction(trans, root); | ||
| 3135 | out: | ||
| 3136 | return ret; | ||
| 3137 | } | ||
| 3138 | |||
| 3139 | static void clone_update_extent_map(struct inode *inode, | ||
| 3140 | const struct btrfs_trans_handle *trans, | ||
| 3141 | const struct btrfs_path *path, | ||
| 3142 | struct btrfs_file_extent_item *fi, | ||
| 3143 | const u64 hole_offset, | ||
| 3144 | const u64 hole_len) | ||
| 3145 | { | ||
| 3146 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 3147 | struct extent_map *em; | ||
| 3148 | int ret; | ||
| 3149 | |||
| 3150 | em = alloc_extent_map(); | ||
| 3151 | if (!em) { | ||
| 3152 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 3153 | &BTRFS_I(inode)->runtime_flags); | ||
| 3154 | return; | ||
| 3155 | } | ||
| 3156 | |||
| 3157 | if (fi) { | ||
| 3158 | btrfs_extent_item_to_extent_map(inode, path, fi, false, em); | ||
| 3159 | em->generation = -1; | ||
| 3160 | if (btrfs_file_extent_type(path->nodes[0], fi) == | ||
| 3161 | BTRFS_FILE_EXTENT_INLINE) | ||
| 3162 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 3163 | &BTRFS_I(inode)->runtime_flags); | ||
| 3164 | } else { | ||
| 3165 | em->start = hole_offset; | ||
| 3166 | em->len = hole_len; | ||
| 3167 | em->ram_bytes = em->len; | ||
| 3168 | em->orig_start = hole_offset; | ||
| 3169 | em->block_start = EXTENT_MAP_HOLE; | ||
| 3170 | em->block_len = 0; | ||
| 3171 | em->orig_block_len = 0; | ||
| 3172 | em->compress_type = BTRFS_COMPRESS_NONE; | ||
| 3173 | em->generation = trans->transid; | ||
| 3174 | } | ||
| 3175 | |||
| 3176 | while (1) { | ||
| 3177 | write_lock(&em_tree->lock); | ||
| 3178 | ret = add_extent_mapping(em_tree, em, 1); | ||
| 3179 | write_unlock(&em_tree->lock); | ||
| 3180 | if (ret != -EEXIST) { | ||
| 3181 | free_extent_map(em); | ||
| 3182 | break; | ||
| 3183 | } | ||
| 3184 | btrfs_drop_extent_cache(inode, em->start, | ||
| 3185 | em->start + em->len - 1, 0); | ||
| 3186 | } | ||
| 3187 | |||
| 3188 | if (unlikely(ret)) | ||
| 3189 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 3190 | &BTRFS_I(inode)->runtime_flags); | ||
| 3191 | } | ||
| 3192 | |||
| 2915 | /** | 3193 | /** |
| 2916 | * btrfs_clone() - clone a range from inode file to another | 3194 | * btrfs_clone() - clone a range from inode file to another |
| 2917 | * | 3195 | * |
| @@ -2924,7 +3202,8 @@ out: | |||
| 2924 | * @destoff: Offset within @inode to start clone | 3202 | * @destoff: Offset within @inode to start clone |
| 2925 | */ | 3203 | */ |
| 2926 | static int btrfs_clone(struct inode *src, struct inode *inode, | 3204 | static int btrfs_clone(struct inode *src, struct inode *inode, |
| 2927 | u64 off, u64 olen, u64 olen_aligned, u64 destoff) | 3205 | const u64 off, const u64 olen, const u64 olen_aligned, |
| 3206 | const u64 destoff) | ||
| 2928 | { | 3207 | { |
| 2929 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3208 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 2930 | struct btrfs_path *path = NULL; | 3209 | struct btrfs_path *path = NULL; |
| @@ -2935,7 +3214,10 @@ static int btrfs_clone(struct inode *src, struct inode *inode, | |||
| 2935 | u32 nritems; | 3214 | u32 nritems; |
| 2936 | int slot; | 3215 | int slot; |
| 2937 | int ret; | 3216 | int ret; |
| 2938 | u64 len = olen_aligned; | 3217 | int no_quota; |
| 3218 | const u64 len = olen_aligned; | ||
| 3219 | u64 last_disko = 0; | ||
| 3220 | u64 last_dest_end = destoff; | ||
| 2939 | 3221 | ||
| 2940 | ret = -ENOMEM; | 3222 | ret = -ENOMEM; |
| 2941 | buf = vmalloc(btrfs_level_size(root, 0)); | 3223 | buf = vmalloc(btrfs_level_size(root, 0)); |
| @@ -2952,7 +3234,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, | |||
| 2952 | /* clone data */ | 3234 | /* clone data */ |
| 2953 | key.objectid = btrfs_ino(src); | 3235 | key.objectid = btrfs_ino(src); |
| 2954 | key.type = BTRFS_EXTENT_DATA_KEY; | 3236 | key.type = BTRFS_EXTENT_DATA_KEY; |
| 2955 | key.offset = 0; | 3237 | key.offset = off; |
| 2956 | 3238 | ||
| 2957 | while (1) { | 3239 | while (1) { |
| 2958 | /* | 3240 | /* |
| @@ -2964,9 +3246,21 @@ static int btrfs_clone(struct inode *src, struct inode *inode, | |||
| 2964 | 0, 0); | 3246 | 0, 0); |
| 2965 | if (ret < 0) | 3247 | if (ret < 0) |
| 2966 | goto out; | 3248 | goto out; |
| 3249 | /* | ||
| 3250 | * First search, if no extent item that starts at offset off was | ||
| 3251 | * found but the previous item is an extent item, it's possible | ||
| 3252 | * it might overlap our target range, therefore process it. | ||
| 3253 | */ | ||
| 3254 | if (key.offset == off && ret > 0 && path->slots[0] > 0) { | ||
| 3255 | btrfs_item_key_to_cpu(path->nodes[0], &key, | ||
| 3256 | path->slots[0] - 1); | ||
| 3257 | if (key.type == BTRFS_EXTENT_DATA_KEY) | ||
| 3258 | path->slots[0]--; | ||
| 3259 | } | ||
| 2967 | 3260 | ||
| 2968 | nritems = btrfs_header_nritems(path->nodes[0]); | 3261 | nritems = btrfs_header_nritems(path->nodes[0]); |
| 2969 | process_slot: | 3262 | process_slot: |
| 3263 | no_quota = 1; | ||
| 2970 | if (path->slots[0] >= nritems) { | 3264 | if (path->slots[0] >= nritems) { |
| 2971 | ret = btrfs_next_leaf(BTRFS_I(src)->root, path); | 3265 | ret = btrfs_next_leaf(BTRFS_I(src)->root, path); |
| 2972 | if (ret < 0) | 3266 | if (ret < 0) |
| @@ -2991,7 +3285,7 @@ process_slot: | |||
| 2991 | u64 disko = 0, diskl = 0; | 3285 | u64 disko = 0, diskl = 0; |
| 2992 | u64 datao = 0, datal = 0; | 3286 | u64 datao = 0, datal = 0; |
| 2993 | u8 comp; | 3287 | u8 comp; |
| 2994 | u64 endoff; | 3288 | u64 drop_start; |
| 2995 | 3289 | ||
| 2996 | extent = btrfs_item_ptr(leaf, slot, | 3290 | extent = btrfs_item_ptr(leaf, slot, |
| 2997 | struct btrfs_file_extent_item); | 3291 | struct btrfs_file_extent_item); |
| @@ -3012,10 +3306,16 @@ process_slot: | |||
| 3012 | extent); | 3306 | extent); |
| 3013 | } | 3307 | } |
| 3014 | 3308 | ||
| 3015 | if (key.offset + datal <= off || | 3309 | /* |
| 3016 | key.offset >= off + len - 1) { | 3310 | * The first search might have left us at an extent |
| 3311 | * item that ends before our target range's start, can | ||
| 3312 | * happen if we have holes and NO_HOLES feature enabled. | ||
| 3313 | */ | ||
| 3314 | if (key.offset + datal <= off) { | ||
| 3017 | path->slots[0]++; | 3315 | path->slots[0]++; |
| 3018 | goto process_slot; | 3316 | goto process_slot; |
| 3317 | } else if (key.offset >= off + len) { | ||
| 3318 | break; | ||
| 3019 | } | 3319 | } |
| 3020 | 3320 | ||
| 3021 | size = btrfs_item_size_nr(leaf, slot); | 3321 | size = btrfs_item_size_nr(leaf, slot); |
| @@ -3034,6 +3334,18 @@ process_slot: | |||
| 3034 | new_key.offset = destoff; | 3334 | new_key.offset = destoff; |
| 3035 | 3335 | ||
| 3036 | /* | 3336 | /* |
| 3337 | * Deal with a hole that doesn't have an extent item | ||
| 3338 | * that represents it (NO_HOLES feature enabled). | ||
| 3339 | * This hole is either in the middle of the cloning | ||
| 3340 | * range or at the beginning (fully overlaps it or | ||
| 3341 | * partially overlaps it). | ||
| 3342 | */ | ||
| 3343 | if (new_key.offset != last_dest_end) | ||
| 3344 | drop_start = last_dest_end; | ||
| 3345 | else | ||
| 3346 | drop_start = new_key.offset; | ||
| 3347 | |||
| 3348 | /* | ||
| 3037 | * 1 - adjusting old extent (we may have to split it) | 3349 | * 1 - adjusting old extent (we may have to split it) |
| 3038 | * 1 - add new extent | 3350 | * 1 - add new extent |
| 3039 | * 1 - inode update | 3351 | * 1 - inode update |
| @@ -3051,22 +3363,22 @@ process_slot: | |||
| 3051 | * | ------------- extent ------------- | | 3363 | * | ------------- extent ------------- | |
| 3052 | */ | 3364 | */ |
| 3053 | 3365 | ||
| 3054 | /* substract range b */ | 3366 | /* subtract range b */ |
| 3055 | if (key.offset + datal > off + len) | 3367 | if (key.offset + datal > off + len) |
| 3056 | datal = off + len - key.offset; | 3368 | datal = off + len - key.offset; |
| 3057 | 3369 | ||
| 3058 | /* substract range a */ | 3370 | /* subtract range a */ |
| 3059 | if (off > key.offset) { | 3371 | if (off > key.offset) { |
| 3060 | datao += off - key.offset; | 3372 | datao += off - key.offset; |
| 3061 | datal -= off - key.offset; | 3373 | datal -= off - key.offset; |
| 3062 | } | 3374 | } |
| 3063 | 3375 | ||
| 3064 | ret = btrfs_drop_extents(trans, root, inode, | 3376 | ret = btrfs_drop_extents(trans, root, inode, |
| 3065 | new_key.offset, | 3377 | drop_start, |
| 3066 | new_key.offset + datal, | 3378 | new_key.offset + datal, |
| 3067 | 1); | 3379 | 1); |
| 3068 | if (ret) { | 3380 | if (ret) { |
| 3069 | if (ret != -EINVAL) | 3381 | if (ret != -EOPNOTSUPP) |
| 3070 | btrfs_abort_transaction(trans, | 3382 | btrfs_abort_transaction(trans, |
| 3071 | root, ret); | 3383 | root, ret); |
| 3072 | btrfs_end_transaction(trans, root); | 3384 | btrfs_end_transaction(trans, root); |
| @@ -3099,6 +3411,28 @@ process_slot: | |||
| 3099 | datao); | 3411 | datao); |
| 3100 | btrfs_set_file_extent_num_bytes(leaf, extent, | 3412 | btrfs_set_file_extent_num_bytes(leaf, extent, |
| 3101 | datal); | 3413 | datal); |
| 3414 | |||
| 3415 | /* | ||
| 3416 | * We need to look up the roots that point at | ||
| 3417 | * this bytenr and see if the new root does. If | ||
| 3418 | * it does not we need to make sure we update | ||
| 3419 | * quotas appropriately. | ||
| 3420 | */ | ||
| 3421 | if (disko && root != BTRFS_I(src)->root && | ||
| 3422 | disko != last_disko) { | ||
| 3423 | no_quota = check_ref(trans, root, | ||
| 3424 | disko); | ||
| 3425 | if (no_quota < 0) { | ||
| 3426 | btrfs_abort_transaction(trans, | ||
| 3427 | root, | ||
| 3428 | ret); | ||
| 3429 | btrfs_end_transaction(trans, | ||
| 3430 | root); | ||
| 3431 | ret = no_quota; | ||
| 3432 | goto out; | ||
| 3433 | } | ||
| 3434 | } | ||
| 3435 | |||
| 3102 | if (disko) { | 3436 | if (disko) { |
| 3103 | inode_add_bytes(inode, datal); | 3437 | inode_add_bytes(inode, datal); |
| 3104 | ret = btrfs_inc_extent_ref(trans, root, | 3438 | ret = btrfs_inc_extent_ref(trans, root, |
| @@ -3106,7 +3440,7 @@ process_slot: | |||
| 3106 | root->root_key.objectid, | 3440 | root->root_key.objectid, |
| 3107 | btrfs_ino(inode), | 3441 | btrfs_ino(inode), |
| 3108 | new_key.offset - datao, | 3442 | new_key.offset - datao, |
| 3109 | 0); | 3443 | no_quota); |
| 3110 | if (ret) { | 3444 | if (ret) { |
| 3111 | btrfs_abort_transaction(trans, | 3445 | btrfs_abort_transaction(trans, |
| 3112 | root, | 3446 | root, |
| @@ -3120,6 +3454,8 @@ process_slot: | |||
| 3120 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { | 3454 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { |
| 3121 | u64 skip = 0; | 3455 | u64 skip = 0; |
| 3122 | u64 trim = 0; | 3456 | u64 trim = 0; |
| 3457 | u64 aligned_end = 0; | ||
| 3458 | |||
| 3123 | if (off > key.offset) { | 3459 | if (off > key.offset) { |
| 3124 | skip = off - key.offset; | 3460 | skip = off - key.offset; |
| 3125 | new_key.offset += skip; | 3461 | new_key.offset += skip; |
| @@ -3136,12 +3472,14 @@ process_slot: | |||
| 3136 | size -= skip + trim; | 3472 | size -= skip + trim; |
| 3137 | datal -= skip + trim; | 3473 | datal -= skip + trim; |
| 3138 | 3474 | ||
| 3475 | aligned_end = ALIGN(new_key.offset + datal, | ||
| 3476 | root->sectorsize); | ||
| 3139 | ret = btrfs_drop_extents(trans, root, inode, | 3477 | ret = btrfs_drop_extents(trans, root, inode, |
| 3140 | new_key.offset, | 3478 | drop_start, |
| 3141 | new_key.offset + datal, | 3479 | aligned_end, |
| 3142 | 1); | 3480 | 1); |
| 3143 | if (ret) { | 3481 | if (ret) { |
| 3144 | if (ret != -EINVAL) | 3482 | if (ret != -EOPNOTSUPP) |
| 3145 | btrfs_abort_transaction(trans, | 3483 | btrfs_abort_transaction(trans, |
| 3146 | root, ret); | 3484 | root, ret); |
| 3147 | btrfs_end_transaction(trans, root); | 3485 | btrfs_end_transaction(trans, root); |
| @@ -3170,40 +3508,69 @@ process_slot: | |||
| 3170 | btrfs_item_ptr_offset(leaf, slot), | 3508 | btrfs_item_ptr_offset(leaf, slot), |
| 3171 | size); | 3509 | size); |
| 3172 | inode_add_bytes(inode, datal); | 3510 | inode_add_bytes(inode, datal); |
| 3511 | extent = btrfs_item_ptr(leaf, slot, | ||
| 3512 | struct btrfs_file_extent_item); | ||
| 3173 | } | 3513 | } |
| 3174 | 3514 | ||
| 3515 | /* If we have an implicit hole (NO_HOLES feature). */ | ||
| 3516 | if (drop_start < new_key.offset) | ||
| 3517 | clone_update_extent_map(inode, trans, | ||
| 3518 | path, NULL, drop_start, | ||
| 3519 | new_key.offset - drop_start); | ||
| 3520 | |||
| 3521 | clone_update_extent_map(inode, trans, path, | ||
| 3522 | extent, 0, 0); | ||
| 3523 | |||
| 3175 | btrfs_mark_buffer_dirty(leaf); | 3524 | btrfs_mark_buffer_dirty(leaf); |
| 3176 | btrfs_release_path(path); | 3525 | btrfs_release_path(path); |
| 3177 | 3526 | ||
| 3178 | inode_inc_iversion(inode); | 3527 | last_dest_end = new_key.offset + datal; |
| 3179 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 3528 | ret = clone_finish_inode_update(trans, inode, |
| 3180 | 3529 | last_dest_end, | |
| 3181 | /* | 3530 | destoff, olen); |
| 3182 | * we round up to the block size at eof when | 3531 | if (ret) |
| 3183 | * determining which extents to clone above, | ||
| 3184 | * but shouldn't round up the file size | ||
| 3185 | */ | ||
| 3186 | endoff = new_key.offset + datal; | ||
| 3187 | if (endoff > destoff+olen) | ||
| 3188 | endoff = destoff+olen; | ||
| 3189 | if (endoff > inode->i_size) | ||
| 3190 | btrfs_i_size_write(inode, endoff); | ||
| 3191 | |||
| 3192 | ret = btrfs_update_inode(trans, root, inode); | ||
| 3193 | if (ret) { | ||
| 3194 | btrfs_abort_transaction(trans, root, ret); | ||
| 3195 | btrfs_end_transaction(trans, root); | ||
| 3196 | goto out; | 3532 | goto out; |
| 3197 | } | 3533 | if (new_key.offset + datal >= destoff + len) |
| 3198 | ret = btrfs_end_transaction(trans, root); | 3534 | break; |
| 3199 | } | 3535 | } |
| 3200 | btrfs_release_path(path); | 3536 | btrfs_release_path(path); |
| 3201 | key.offset++; | 3537 | key.offset++; |
| 3202 | } | 3538 | } |
| 3203 | ret = 0; | 3539 | ret = 0; |
| 3204 | 3540 | ||
| 3541 | if (last_dest_end < destoff + len) { | ||
| 3542 | /* | ||
| 3543 | * We have an implicit hole (NO_HOLES feature is enabled) that | ||
| 3544 | * fully or partially overlaps our cloning range at its end. | ||
| 3545 | */ | ||
| 3546 | btrfs_release_path(path); | ||
| 3547 | |||
| 3548 | /* | ||
| 3549 | * 1 - remove extent(s) | ||
| 3550 | * 1 - inode update | ||
| 3551 | */ | ||
| 3552 | trans = btrfs_start_transaction(root, 2); | ||
| 3553 | if (IS_ERR(trans)) { | ||
| 3554 | ret = PTR_ERR(trans); | ||
| 3555 | goto out; | ||
| 3556 | } | ||
| 3557 | ret = btrfs_drop_extents(trans, root, inode, | ||
| 3558 | last_dest_end, destoff + len, 1); | ||
| 3559 | if (ret) { | ||
| 3560 | if (ret != -EOPNOTSUPP) | ||
| 3561 | btrfs_abort_transaction(trans, root, ret); | ||
| 3562 | btrfs_end_transaction(trans, root); | ||
| 3563 | goto out; | ||
| 3564 | } | ||
| 3565 | ret = clone_finish_inode_update(trans, inode, destoff + len, | ||
| 3566 | destoff, olen); | ||
| 3567 | if (ret) | ||
| 3568 | goto out; | ||
| 3569 | clone_update_extent_map(inode, trans, path, NULL, last_dest_end, | ||
| 3570 | destoff + len - last_dest_end); | ||
| 3571 | } | ||
| 3572 | |||
| 3205 | out: | 3573 | out: |
| 3206 | btrfs_release_path(path); | ||
| 3207 | btrfs_free_path(path); | 3574 | btrfs_free_path(path); |
| 3208 | vfree(buf); | 3575 | vfree(buf); |
| 3209 | return ret; | 3576 | return ret; |
| @@ -3315,15 +3682,41 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 3315 | goto out_unlock; | 3682 | goto out_unlock; |
| 3316 | } | 3683 | } |
| 3317 | 3684 | ||
| 3318 | /* truncate page cache pages from target inode range */ | 3685 | /* |
| 3319 | truncate_inode_pages_range(&inode->i_data, destoff, | 3686 | * Lock the target range too. Right after we replace the file extent |
| 3320 | PAGE_CACHE_ALIGN(destoff + len) - 1); | 3687 | * items in the fs tree (which now point to the cloned data), we might |
| 3688 | * have a worker replace them with extent items relative to a write | ||
| 3689 | * operation that was issued before this clone operation (i.e. confront | ||
| 3690 | * with inode.c:btrfs_finish_ordered_io). | ||
| 3691 | */ | ||
| 3692 | if (same_inode) { | ||
| 3693 | u64 lock_start = min_t(u64, off, destoff); | ||
| 3694 | u64 lock_len = max_t(u64, off, destoff) + len - lock_start; | ||
| 3321 | 3695 | ||
| 3322 | lock_extent_range(src, off, len); | 3696 | lock_extent_range(src, lock_start, lock_len); |
| 3697 | } else { | ||
| 3698 | lock_extent_range(src, off, len); | ||
| 3699 | lock_extent_range(inode, destoff, len); | ||
| 3700 | } | ||
| 3323 | 3701 | ||
| 3324 | ret = btrfs_clone(src, inode, off, olen, len, destoff); | 3702 | ret = btrfs_clone(src, inode, off, olen, len, destoff); |
| 3325 | 3703 | ||
| 3326 | unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); | 3704 | if (same_inode) { |
| 3705 | u64 lock_start = min_t(u64, off, destoff); | ||
| 3706 | u64 lock_end = max_t(u64, off, destoff) + len - 1; | ||
| 3707 | |||
| 3708 | unlock_extent(&BTRFS_I(src)->io_tree, lock_start, lock_end); | ||
| 3709 | } else { | ||
| 3710 | unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); | ||
| 3711 | unlock_extent(&BTRFS_I(inode)->io_tree, destoff, | ||
| 3712 | destoff + len - 1); | ||
| 3713 | } | ||
| 3714 | /* | ||
| 3715 | * Truncate page cache pages so that future reads will see the cloned | ||
| 3716 | * data immediately and not the previous data. | ||
| 3717 | */ | ||
| 3718 | truncate_inode_pages_range(&inode->i_data, destoff, | ||
| 3719 | PAGE_CACHE_ALIGN(destoff + len) - 1); | ||
| 3327 | out_unlock: | 3720 | out_unlock: |
| 3328 | if (!same_inode) { | 3721 | if (!same_inode) { |
| 3329 | if (inode < src) { | 3722 | if (inode < src) { |
| @@ -4898,6 +5291,8 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 4898 | return btrfs_ioctl_trans_end(file); | 5291 | return btrfs_ioctl_trans_end(file); |
| 4899 | case BTRFS_IOC_TREE_SEARCH: | 5292 | case BTRFS_IOC_TREE_SEARCH: |
| 4900 | return btrfs_ioctl_tree_search(file, argp); | 5293 | return btrfs_ioctl_tree_search(file, argp); |
| 5294 | case BTRFS_IOC_TREE_SEARCH_V2: | ||
| 5295 | return btrfs_ioctl_tree_search_v2(file, argp); | ||
| 4901 | case BTRFS_IOC_INO_LOOKUP: | 5296 | case BTRFS_IOC_INO_LOOKUP: |
| 4902 | return btrfs_ioctl_ino_lookup(file, argp); | 5297 | return btrfs_ioctl_ino_lookup(file, argp); |
| 4903 | case BTRFS_IOC_INO_PATHS: | 5298 | case BTRFS_IOC_INO_PATHS: |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index b47f669aca75..dfad8514f0da 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c | |||
| @@ -143,7 +143,7 @@ static int lzo_compress_pages(struct list_head *ws, | |||
| 143 | if (ret != LZO_E_OK) { | 143 | if (ret != LZO_E_OK) { |
| 144 | printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n", | 144 | printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n", |
| 145 | ret); | 145 | ret); |
| 146 | ret = -1; | 146 | ret = -EIO; |
| 147 | goto out; | 147 | goto out; |
| 148 | } | 148 | } |
| 149 | 149 | ||
| @@ -189,7 +189,7 @@ static int lzo_compress_pages(struct list_head *ws, | |||
| 189 | kunmap(out_page); | 189 | kunmap(out_page); |
| 190 | if (nr_pages == nr_dest_pages) { | 190 | if (nr_pages == nr_dest_pages) { |
| 191 | out_page = NULL; | 191 | out_page = NULL; |
| 192 | ret = -1; | 192 | ret = -E2BIG; |
| 193 | goto out; | 193 | goto out; |
| 194 | } | 194 | } |
| 195 | 195 | ||
| @@ -208,7 +208,7 @@ static int lzo_compress_pages(struct list_head *ws, | |||
| 208 | 208 | ||
| 209 | /* we're making it bigger, give up */ | 209 | /* we're making it bigger, give up */ |
| 210 | if (tot_in > 8192 && tot_in < tot_out) { | 210 | if (tot_in > 8192 && tot_in < tot_out) { |
| 211 | ret = -1; | 211 | ret = -E2BIG; |
| 212 | goto out; | 212 | goto out; |
| 213 | } | 213 | } |
| 214 | 214 | ||
| @@ -335,7 +335,7 @@ cont: | |||
| 335 | break; | 335 | break; |
| 336 | 336 | ||
| 337 | if (page_in_index + 1 >= total_pages_in) { | 337 | if (page_in_index + 1 >= total_pages_in) { |
| 338 | ret = -1; | 338 | ret = -EIO; |
| 339 | goto done; | 339 | goto done; |
| 340 | } | 340 | } |
| 341 | 341 | ||
| @@ -358,7 +358,7 @@ cont: | |||
| 358 | kunmap(pages_in[page_in_index - 1]); | 358 | kunmap(pages_in[page_in_index - 1]); |
| 359 | if (ret != LZO_E_OK) { | 359 | if (ret != LZO_E_OK) { |
| 360 | printk(KERN_WARNING "BTRFS: decompress failed\n"); | 360 | printk(KERN_WARNING "BTRFS: decompress failed\n"); |
| 361 | ret = -1; | 361 | ret = -EIO; |
| 362 | break; | 362 | break; |
| 363 | } | 363 | } |
| 364 | 364 | ||
| @@ -402,12 +402,12 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in, | |||
| 402 | ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); | 402 | ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); |
| 403 | if (ret != LZO_E_OK) { | 403 | if (ret != LZO_E_OK) { |
| 404 | printk(KERN_WARNING "BTRFS: decompress failed!\n"); | 404 | printk(KERN_WARNING "BTRFS: decompress failed!\n"); |
| 405 | ret = -1; | 405 | ret = -EIO; |
| 406 | goto out; | 406 | goto out; |
| 407 | } | 407 | } |
| 408 | 408 | ||
| 409 | if (out_len < start_byte) { | 409 | if (out_len < start_byte) { |
| 410 | ret = -1; | 410 | ret = -EIO; |
| 411 | goto out; | 411 | goto out; |
| 412 | } | 412 | } |
| 413 | 413 | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a94b05f72869..e12441c7cf1d 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -67,7 +67,7 @@ static void ordered_data_tree_panic(struct inode *inode, int errno, | |||
| 67 | { | 67 | { |
| 68 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 68 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
| 69 | btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset " | 69 | btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset " |
| 70 | "%llu\n", offset); | 70 | "%llu", offset); |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | /* | 73 | /* |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 2cf905877aaf..98cb6b2630f9 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include "ulist.h" | 32 | #include "ulist.h" |
| 33 | #include "backref.h" | 33 | #include "backref.h" |
| 34 | #include "extent_io.h" | 34 | #include "extent_io.h" |
| 35 | #include "qgroup.h" | ||
| 35 | 36 | ||
| 36 | /* TODO XXX FIXME | 37 | /* TODO XXX FIXME |
| 37 | * - subvol delete -> delete when ref goes to 0? delete limits also? | 38 | * - subvol delete -> delete when ref goes to 0? delete limits also? |
| @@ -84,8 +85,8 @@ struct btrfs_qgroup { | |||
| 84 | /* | 85 | /* |
| 85 | * temp variables for accounting operations | 86 | * temp variables for accounting operations |
| 86 | */ | 87 | */ |
| 87 | u64 tag; | 88 | u64 old_refcnt; |
| 88 | u64 refcnt; | 89 | u64 new_refcnt; |
| 89 | }; | 90 | }; |
| 90 | 91 | ||
| 91 | /* | 92 | /* |
| @@ -98,6 +99,9 @@ struct btrfs_qgroup_list { | |||
| 98 | struct btrfs_qgroup *member; | 99 | struct btrfs_qgroup *member; |
| 99 | }; | 100 | }; |
| 100 | 101 | ||
| 102 | #define ptr_to_u64(x) ((u64)(uintptr_t)x) | ||
| 103 | #define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x) | ||
| 104 | |||
| 101 | static int | 105 | static int |
| 102 | qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, | 106 | qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, |
| 103 | int init_flags); | 107 | int init_flags); |
| @@ -242,6 +246,21 @@ static int del_relation_rb(struct btrfs_fs_info *fs_info, | |||
| 242 | return -ENOENT; | 246 | return -ENOENT; |
| 243 | } | 247 | } |
| 244 | 248 | ||
| 249 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 250 | int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
| 251 | u64 rfer, u64 excl) | ||
| 252 | { | ||
| 253 | struct btrfs_qgroup *qgroup; | ||
| 254 | |||
| 255 | qgroup = find_qgroup_rb(fs_info, qgroupid); | ||
| 256 | if (!qgroup) | ||
| 257 | return -EINVAL; | ||
| 258 | if (qgroup->rfer != rfer || qgroup->excl != excl) | ||
| 259 | return -EINVAL; | ||
| 260 | return 0; | ||
| 261 | } | ||
| 262 | #endif | ||
| 263 | |||
| 245 | /* | 264 | /* |
| 246 | * The full config is read in one go, only called from open_ctree() | 265 | * The full config is read in one go, only called from open_ctree() |
| 247 | * It doesn't use any locking, as at this point we're still single-threaded | 266 | * It doesn't use any locking, as at this point we're still single-threaded |
| @@ -520,6 +539,10 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, | |||
| 520 | struct extent_buffer *leaf; | 539 | struct extent_buffer *leaf; |
| 521 | struct btrfs_key key; | 540 | struct btrfs_key key; |
| 522 | 541 | ||
| 542 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 543 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, "a_root->state))) | ||
| 544 | return 0; | ||
| 545 | #endif | ||
| 523 | path = btrfs_alloc_path(); | 546 | path = btrfs_alloc_path(); |
| 524 | if (!path) | 547 | if (!path) |
| 525 | return -ENOMEM; | 548 | return -ENOMEM; |
| @@ -669,6 +692,10 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans, | |||
| 669 | int ret; | 692 | int ret; |
| 670 | int slot; | 693 | int slot; |
| 671 | 694 | ||
| 695 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 696 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
| 697 | return 0; | ||
| 698 | #endif | ||
| 672 | key.objectid = 0; | 699 | key.objectid = 0; |
| 673 | key.type = BTRFS_QGROUP_INFO_KEY; | 700 | key.type = BTRFS_QGROUP_INFO_KEY; |
| 674 | key.offset = qgroup->qgroupid; | 701 | key.offset = qgroup->qgroupid; |
| @@ -1174,33 +1201,198 @@ out: | |||
| 1174 | mutex_unlock(&fs_info->qgroup_ioctl_lock); | 1201 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
| 1175 | return ret; | 1202 | return ret; |
| 1176 | } | 1203 | } |
| 1204 | static int comp_oper(struct btrfs_qgroup_operation *oper1, | ||
| 1205 | struct btrfs_qgroup_operation *oper2) | ||
| 1206 | { | ||
| 1207 | if (oper1->bytenr < oper2->bytenr) | ||
| 1208 | return -1; | ||
| 1209 | if (oper1->bytenr > oper2->bytenr) | ||
| 1210 | return 1; | ||
| 1211 | if (oper1->seq < oper2->seq) | ||
| 1212 | return -1; | ||
| 1213 | if (oper1->seq > oper2->seq) | ||
| 1214 | return -1; | ||
| 1215 | if (oper1->ref_root < oper2->ref_root) | ||
| 1216 | return -1; | ||
| 1217 | if (oper1->ref_root > oper2->ref_root) | ||
| 1218 | return 1; | ||
| 1219 | if (oper1->type < oper2->type) | ||
| 1220 | return -1; | ||
| 1221 | if (oper1->type > oper2->type) | ||
| 1222 | return 1; | ||
| 1223 | return 0; | ||
| 1224 | } | ||
| 1225 | |||
| 1226 | static int insert_qgroup_oper(struct btrfs_fs_info *fs_info, | ||
| 1227 | struct btrfs_qgroup_operation *oper) | ||
| 1228 | { | ||
| 1229 | struct rb_node **p; | ||
| 1230 | struct rb_node *parent = NULL; | ||
| 1231 | struct btrfs_qgroup_operation *cur; | ||
| 1232 | int cmp; | ||
| 1233 | |||
| 1234 | spin_lock(&fs_info->qgroup_op_lock); | ||
| 1235 | p = &fs_info->qgroup_op_tree.rb_node; | ||
| 1236 | while (*p) { | ||
| 1237 | parent = *p; | ||
| 1238 | cur = rb_entry(parent, struct btrfs_qgroup_operation, n); | ||
| 1239 | cmp = comp_oper(cur, oper); | ||
| 1240 | if (cmp < 0) { | ||
| 1241 | p = &(*p)->rb_right; | ||
| 1242 | } else if (cmp) { | ||
| 1243 | p = &(*p)->rb_left; | ||
| 1244 | } else { | ||
| 1245 | spin_unlock(&fs_info->qgroup_op_lock); | ||
| 1246 | return -EEXIST; | ||
| 1247 | } | ||
| 1248 | } | ||
| 1249 | rb_link_node(&oper->n, parent, p); | ||
| 1250 | rb_insert_color(&oper->n, &fs_info->qgroup_op_tree); | ||
| 1251 | spin_unlock(&fs_info->qgroup_op_lock); | ||
| 1252 | return 0; | ||
| 1253 | } | ||
| 1177 | 1254 | ||
| 1178 | /* | 1255 | /* |
| 1179 | * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts | 1256 | * Record a quota operation for processing later on. |
| 1180 | * the modification into a list that's later used by btrfs_end_transaction to | 1257 | * @trans: the transaction we are adding the delayed op to. |
| 1181 | * pass the recorded modifications on to btrfs_qgroup_account_ref. | 1258 | * @fs_info: the fs_info for this fs. |
| 1259 | * @ref_root: the root of the reference we are acting on, | ||
| 1260 | * @bytenr: the bytenr we are acting on. | ||
| 1261 | * @num_bytes: the number of bytes in the reference. | ||
| 1262 | * @type: the type of operation this is. | ||
| 1263 | * @mod_seq: do we need to get a sequence number for looking up roots. | ||
| 1264 | * | ||
| 1265 | * We just add it to our trans qgroup_ref_list and carry on and process these | ||
| 1266 | * operations in order at some later point. If the reference root isn't a fs | ||
| 1267 | * root then we don't bother with doing anything. | ||
| 1268 | * | ||
| 1269 | * MUST BE HOLDING THE REF LOCK. | ||
| 1182 | */ | 1270 | */ |
| 1183 | int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, | 1271 | int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, |
| 1184 | struct btrfs_delayed_ref_node *node, | 1272 | struct btrfs_fs_info *fs_info, u64 ref_root, |
| 1185 | struct btrfs_delayed_extent_op *extent_op) | 1273 | u64 bytenr, u64 num_bytes, |
| 1274 | enum btrfs_qgroup_operation_type type, int mod_seq) | ||
| 1186 | { | 1275 | { |
| 1187 | struct qgroup_update *u; | 1276 | struct btrfs_qgroup_operation *oper; |
| 1277 | int ret; | ||
| 1278 | |||
| 1279 | if (!is_fstree(ref_root) || !fs_info->quota_enabled) | ||
| 1280 | return 0; | ||
| 1188 | 1281 | ||
| 1189 | BUG_ON(!trans->delayed_ref_elem.seq); | 1282 | oper = kmalloc(sizeof(*oper), GFP_NOFS); |
| 1190 | u = kmalloc(sizeof(*u), GFP_NOFS); | 1283 | if (!oper) |
| 1191 | if (!u) | ||
| 1192 | return -ENOMEM; | 1284 | return -ENOMEM; |
| 1193 | 1285 | ||
| 1194 | u->node = node; | 1286 | oper->ref_root = ref_root; |
| 1195 | u->extent_op = extent_op; | 1287 | oper->bytenr = bytenr; |
| 1196 | list_add_tail(&u->list, &trans->qgroup_ref_list); | 1288 | oper->num_bytes = num_bytes; |
| 1289 | oper->type = type; | ||
| 1290 | oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); | ||
| 1291 | INIT_LIST_HEAD(&oper->elem.list); | ||
| 1292 | oper->elem.seq = 0; | ||
| 1293 | ret = insert_qgroup_oper(fs_info, oper); | ||
| 1294 | if (ret) { | ||
| 1295 | /* Shouldn't happen so have an assert for developers */ | ||
| 1296 | ASSERT(0); | ||
| 1297 | kfree(oper); | ||
| 1298 | return ret; | ||
| 1299 | } | ||
| 1300 | list_add_tail(&oper->list, &trans->qgroup_ref_list); | ||
| 1301 | |||
| 1302 | if (mod_seq) | ||
| 1303 | btrfs_get_tree_mod_seq(fs_info, &oper->elem); | ||
| 1197 | 1304 | ||
| 1198 | return 0; | 1305 | return 0; |
| 1199 | } | 1306 | } |
| 1200 | 1307 | ||
| 1201 | static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info, | 1308 | /* |
| 1202 | struct ulist *roots, struct ulist *tmp, | 1309 | * The easy accounting, if we are adding/removing the only ref for an extent |
| 1203 | u64 seq) | 1310 | * then this qgroup and all of the parent qgroups get their refrence and |
| 1311 | * exclusive counts adjusted. | ||
| 1312 | */ | ||
| 1313 | static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, | ||
| 1314 | struct btrfs_qgroup_operation *oper) | ||
| 1315 | { | ||
| 1316 | struct btrfs_qgroup *qgroup; | ||
| 1317 | struct ulist *tmp; | ||
| 1318 | struct btrfs_qgroup_list *glist; | ||
| 1319 | struct ulist_node *unode; | ||
| 1320 | struct ulist_iterator uiter; | ||
| 1321 | int sign = 0; | ||
| 1322 | int ret = 0; | ||
| 1323 | |||
| 1324 | tmp = ulist_alloc(GFP_NOFS); | ||
| 1325 | if (!tmp) | ||
| 1326 | return -ENOMEM; | ||
| 1327 | |||
| 1328 | spin_lock(&fs_info->qgroup_lock); | ||
| 1329 | if (!fs_info->quota_root) | ||
| 1330 | goto out; | ||
| 1331 | qgroup = find_qgroup_rb(fs_info, oper->ref_root); | ||
| 1332 | if (!qgroup) | ||
| 1333 | goto out; | ||
| 1334 | switch (oper->type) { | ||
| 1335 | case BTRFS_QGROUP_OPER_ADD_EXCL: | ||
| 1336 | sign = 1; | ||
| 1337 | break; | ||
| 1338 | case BTRFS_QGROUP_OPER_SUB_EXCL: | ||
| 1339 | sign = -1; | ||
| 1340 | break; | ||
| 1341 | default: | ||
| 1342 | ASSERT(0); | ||
| 1343 | } | ||
| 1344 | qgroup->rfer += sign * oper->num_bytes; | ||
| 1345 | qgroup->rfer_cmpr += sign * oper->num_bytes; | ||
| 1346 | |||
| 1347 | WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); | ||
| 1348 | qgroup->excl += sign * oper->num_bytes; | ||
| 1349 | qgroup->excl_cmpr += sign * oper->num_bytes; | ||
| 1350 | |||
| 1351 | qgroup_dirty(fs_info, qgroup); | ||
| 1352 | |||
| 1353 | /* Get all of the parent groups that contain this qgroup */ | ||
| 1354 | list_for_each_entry(glist, &qgroup->groups, next_group) { | ||
| 1355 | ret = ulist_add(tmp, glist->group->qgroupid, | ||
| 1356 | ptr_to_u64(glist->group), GFP_ATOMIC); | ||
| 1357 | if (ret < 0) | ||
| 1358 | goto out; | ||
| 1359 | } | ||
| 1360 | |||
| 1361 | /* Iterate all of the parents and adjust their reference counts */ | ||
| 1362 | ULIST_ITER_INIT(&uiter); | ||
| 1363 | while ((unode = ulist_next(tmp, &uiter))) { | ||
| 1364 | qgroup = u64_to_ptr(unode->aux); | ||
| 1365 | qgroup->rfer += sign * oper->num_bytes; | ||
| 1366 | qgroup->rfer_cmpr += sign * oper->num_bytes; | ||
| 1367 | qgroup->excl += sign * oper->num_bytes; | ||
| 1368 | if (sign < 0) | ||
| 1369 | WARN_ON(qgroup->excl < oper->num_bytes); | ||
| 1370 | qgroup->excl_cmpr += sign * oper->num_bytes; | ||
| 1371 | qgroup_dirty(fs_info, qgroup); | ||
| 1372 | |||
| 1373 | /* Add any parents of the parents */ | ||
| 1374 | list_for_each_entry(glist, &qgroup->groups, next_group) { | ||
| 1375 | ret = ulist_add(tmp, glist->group->qgroupid, | ||
| 1376 | ptr_to_u64(glist->group), GFP_ATOMIC); | ||
| 1377 | if (ret < 0) | ||
| 1378 | goto out; | ||
| 1379 | } | ||
| 1380 | } | ||
| 1381 | ret = 0; | ||
| 1382 | out: | ||
| 1383 | spin_unlock(&fs_info->qgroup_lock); | ||
| 1384 | ulist_free(tmp); | ||
| 1385 | return ret; | ||
| 1386 | } | ||
| 1387 | |||
| 1388 | /* | ||
| 1389 | * Walk all of the roots that pointed to our bytenr and adjust their refcnts as | ||
| 1390 | * properly. | ||
| 1391 | */ | ||
| 1392 | static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info, | ||
| 1393 | u64 root_to_skip, struct ulist *tmp, | ||
| 1394 | struct ulist *roots, struct ulist *qgroups, | ||
| 1395 | u64 seq, int *old_roots, int rescan) | ||
| 1204 | { | 1396 | { |
| 1205 | struct ulist_node *unode; | 1397 | struct ulist_node *unode; |
| 1206 | struct ulist_iterator uiter; | 1398 | struct ulist_iterator uiter; |
| @@ -1211,256 +1403,551 @@ static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info, | |||
| 1211 | 1403 | ||
| 1212 | ULIST_ITER_INIT(&uiter); | 1404 | ULIST_ITER_INIT(&uiter); |
| 1213 | while ((unode = ulist_next(roots, &uiter))) { | 1405 | while ((unode = ulist_next(roots, &uiter))) { |
| 1406 | /* We don't count our current root here */ | ||
| 1407 | if (unode->val == root_to_skip) | ||
| 1408 | continue; | ||
| 1214 | qg = find_qgroup_rb(fs_info, unode->val); | 1409 | qg = find_qgroup_rb(fs_info, unode->val); |
| 1215 | if (!qg) | 1410 | if (!qg) |
| 1216 | continue; | 1411 | continue; |
| 1412 | /* | ||
| 1413 | * We could have a pending removal of this same ref so we may | ||
| 1414 | * not have actually found our ref root when doing | ||
| 1415 | * btrfs_find_all_roots, so we need to keep track of how many | ||
| 1416 | * old roots we find in case we removed ours and added a | ||
| 1417 | * different one at the same time. I don't think this could | ||
| 1418 | * happen in practice but that sort of thinking leads to pain | ||
| 1419 | * and suffering and to the dark side. | ||
| 1420 | */ | ||
| 1421 | (*old_roots)++; | ||
| 1217 | 1422 | ||
| 1218 | ulist_reinit(tmp); | 1423 | ulist_reinit(tmp); |
| 1219 | /* XXX id not needed */ | 1424 | ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), |
| 1220 | ret = ulist_add(tmp, qg->qgroupid, | 1425 | GFP_ATOMIC); |
| 1221 | (u64)(uintptr_t)qg, GFP_ATOMIC); | 1426 | if (ret < 0) |
| 1427 | return ret; | ||
| 1428 | ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC); | ||
| 1222 | if (ret < 0) | 1429 | if (ret < 0) |
| 1223 | return ret; | 1430 | return ret; |
| 1224 | ULIST_ITER_INIT(&tmp_uiter); | 1431 | ULIST_ITER_INIT(&tmp_uiter); |
| 1225 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { | 1432 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { |
| 1226 | struct btrfs_qgroup_list *glist; | 1433 | struct btrfs_qgroup_list *glist; |
| 1227 | 1434 | ||
| 1228 | qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; | 1435 | qg = u64_to_ptr(tmp_unode->aux); |
| 1229 | if (qg->refcnt < seq) | 1436 | /* |
| 1230 | qg->refcnt = seq + 1; | 1437 | * We use this sequence number to keep from having to |
| 1438 | * run the whole list and 0 out the refcnt every time. | ||
| 1439 | * We basically use sequnce as the known 0 count and | ||
| 1440 | * then add 1 everytime we see a qgroup. This is how we | ||
| 1441 | * get how many of the roots actually point up to the | ||
| 1442 | * upper level qgroups in order to determine exclusive | ||
| 1443 | * counts. | ||
| 1444 | * | ||
| 1445 | * For rescan we want to set old_refcnt to seq so our | ||
| 1446 | * exclusive calculations end up correct. | ||
| 1447 | */ | ||
| 1448 | if (rescan) | ||
| 1449 | qg->old_refcnt = seq; | ||
| 1450 | else if (qg->old_refcnt < seq) | ||
| 1451 | qg->old_refcnt = seq + 1; | ||
| 1231 | else | 1452 | else |
| 1232 | ++qg->refcnt; | 1453 | qg->old_refcnt++; |
| 1233 | 1454 | ||
| 1455 | if (qg->new_refcnt < seq) | ||
| 1456 | qg->new_refcnt = seq + 1; | ||
| 1457 | else | ||
| 1458 | qg->new_refcnt++; | ||
| 1234 | list_for_each_entry(glist, &qg->groups, next_group) { | 1459 | list_for_each_entry(glist, &qg->groups, next_group) { |
| 1460 | ret = ulist_add(qgroups, glist->group->qgroupid, | ||
| 1461 | ptr_to_u64(glist->group), | ||
| 1462 | GFP_ATOMIC); | ||
| 1463 | if (ret < 0) | ||
| 1464 | return ret; | ||
| 1235 | ret = ulist_add(tmp, glist->group->qgroupid, | 1465 | ret = ulist_add(tmp, glist->group->qgroupid, |
| 1236 | (u64)(uintptr_t)glist->group, | 1466 | ptr_to_u64(glist->group), |
| 1237 | GFP_ATOMIC); | 1467 | GFP_ATOMIC); |
| 1238 | if (ret < 0) | 1468 | if (ret < 0) |
| 1239 | return ret; | 1469 | return ret; |
| 1240 | } | 1470 | } |
| 1241 | } | 1471 | } |
| 1242 | } | 1472 | } |
| 1473 | return 0; | ||
| 1474 | } | ||
| 1475 | |||
| 1476 | /* | ||
| 1477 | * We need to walk forward in our operation tree and account for any roots that | ||
| 1478 | * were deleted after we made this operation. | ||
| 1479 | */ | ||
| 1480 | static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info, | ||
| 1481 | struct btrfs_qgroup_operation *oper, | ||
| 1482 | struct ulist *tmp, | ||
| 1483 | struct ulist *qgroups, u64 seq, | ||
| 1484 | int *old_roots) | ||
| 1485 | { | ||
| 1486 | struct ulist_node *unode; | ||
| 1487 | struct ulist_iterator uiter; | ||
| 1488 | struct btrfs_qgroup *qg; | ||
| 1489 | struct btrfs_qgroup_operation *tmp_oper; | ||
| 1490 | struct rb_node *n; | ||
| 1491 | int ret; | ||
| 1492 | |||
| 1493 | ulist_reinit(tmp); | ||
| 1243 | 1494 | ||
| 1495 | /* | ||
| 1496 | * We only walk forward in the tree since we're only interested in | ||
| 1497 | * removals that happened _after_ our operation. | ||
| 1498 | */ | ||
| 1499 | spin_lock(&fs_info->qgroup_op_lock); | ||
| 1500 | n = rb_next(&oper->n); | ||
| 1501 | spin_unlock(&fs_info->qgroup_op_lock); | ||
| 1502 | if (!n) | ||
| 1503 | return 0; | ||
| 1504 | tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); | ||
| 1505 | while (tmp_oper->bytenr == oper->bytenr) { | ||
| 1506 | /* | ||
| 1507 | * If it's not a removal we don't care, additions work out | ||
| 1508 | * properly with our refcnt tracking. | ||
| 1509 | */ | ||
| 1510 | if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED && | ||
| 1511 | tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL) | ||
| 1512 | goto next; | ||
| 1513 | qg = find_qgroup_rb(fs_info, tmp_oper->ref_root); | ||
| 1514 | if (!qg) | ||
| 1515 | goto next; | ||
| 1516 | ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), | ||
| 1517 | GFP_ATOMIC); | ||
| 1518 | if (ret) { | ||
| 1519 | if (ret < 0) | ||
| 1520 | return ret; | ||
| 1521 | /* | ||
| 1522 | * We only want to increase old_roots if this qgroup is | ||
| 1523 | * not already in the list of qgroups. If it is already | ||
| 1524 | * there then that means it must have been re-added or | ||
| 1525 | * the delete will be discarded because we had an | ||
| 1526 | * existing ref that we haven't looked up yet. In this | ||
| 1527 | * case we don't want to increase old_roots. So if ret | ||
| 1528 | * == 1 then we know that this is the first time we've | ||
| 1529 | * seen this qgroup and we can bump the old_roots. | ||
| 1530 | */ | ||
| 1531 | (*old_roots)++; | ||
| 1532 | ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), | ||
| 1533 | GFP_ATOMIC); | ||
| 1534 | if (ret < 0) | ||
| 1535 | return ret; | ||
| 1536 | } | ||
| 1537 | next: | ||
| 1538 | spin_lock(&fs_info->qgroup_op_lock); | ||
| 1539 | n = rb_next(&tmp_oper->n); | ||
| 1540 | spin_unlock(&fs_info->qgroup_op_lock); | ||
| 1541 | if (!n) | ||
| 1542 | break; | ||
| 1543 | tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); | ||
| 1544 | } | ||
| 1545 | |||
| 1546 | /* Ok now process the qgroups we found */ | ||
| 1547 | ULIST_ITER_INIT(&uiter); | ||
| 1548 | while ((unode = ulist_next(tmp, &uiter))) { | ||
| 1549 | struct btrfs_qgroup_list *glist; | ||
| 1550 | |||
| 1551 | qg = u64_to_ptr(unode->aux); | ||
| 1552 | if (qg->old_refcnt < seq) | ||
| 1553 | qg->old_refcnt = seq + 1; | ||
| 1554 | else | ||
| 1555 | qg->old_refcnt++; | ||
| 1556 | if (qg->new_refcnt < seq) | ||
| 1557 | qg->new_refcnt = seq + 1; | ||
| 1558 | else | ||
| 1559 | qg->new_refcnt++; | ||
| 1560 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
| 1561 | ret = ulist_add(qgroups, glist->group->qgroupid, | ||
| 1562 | ptr_to_u64(glist->group), GFP_ATOMIC); | ||
| 1563 | if (ret < 0) | ||
| 1564 | return ret; | ||
| 1565 | ret = ulist_add(tmp, glist->group->qgroupid, | ||
| 1566 | ptr_to_u64(glist->group), GFP_ATOMIC); | ||
| 1567 | if (ret < 0) | ||
| 1568 | return ret; | ||
| 1569 | } | ||
| 1570 | } | ||
| 1244 | return 0; | 1571 | return 0; |
| 1245 | } | 1572 | } |
| 1246 | 1573 | ||
| 1247 | static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info, | 1574 | /* Add refcnt for the newly added reference. */ |
| 1248 | struct ulist *roots, struct ulist *tmp, | 1575 | static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info, |
| 1249 | u64 seq, int sgn, u64 num_bytes, | 1576 | struct btrfs_qgroup_operation *oper, |
| 1250 | struct btrfs_qgroup *qgroup) | 1577 | struct btrfs_qgroup *qgroup, |
| 1578 | struct ulist *tmp, struct ulist *qgroups, | ||
| 1579 | u64 seq) | ||
| 1251 | { | 1580 | { |
| 1252 | struct ulist_node *unode; | 1581 | struct ulist_node *unode; |
| 1253 | struct ulist_iterator uiter; | 1582 | struct ulist_iterator uiter; |
| 1254 | struct btrfs_qgroup *qg; | 1583 | struct btrfs_qgroup *qg; |
| 1255 | struct btrfs_qgroup_list *glist; | ||
| 1256 | int ret; | 1584 | int ret; |
| 1257 | 1585 | ||
| 1258 | ulist_reinit(tmp); | 1586 | ulist_reinit(tmp); |
| 1259 | ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); | 1587 | ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup), |
| 1588 | GFP_ATOMIC); | ||
| 1589 | if (ret < 0) | ||
| 1590 | return ret; | ||
| 1591 | ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup), | ||
| 1592 | GFP_ATOMIC); | ||
| 1260 | if (ret < 0) | 1593 | if (ret < 0) |
| 1261 | return ret; | 1594 | return ret; |
| 1262 | |||
| 1263 | ULIST_ITER_INIT(&uiter); | 1595 | ULIST_ITER_INIT(&uiter); |
| 1264 | while ((unode = ulist_next(tmp, &uiter))) { | 1596 | while ((unode = ulist_next(tmp, &uiter))) { |
| 1265 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; | 1597 | struct btrfs_qgroup_list *glist; |
| 1266 | if (qg->refcnt < seq) { | ||
| 1267 | /* not visited by step 1 */ | ||
| 1268 | qg->rfer += sgn * num_bytes; | ||
| 1269 | qg->rfer_cmpr += sgn * num_bytes; | ||
| 1270 | if (roots->nnodes == 0) { | ||
| 1271 | qg->excl += sgn * num_bytes; | ||
| 1272 | qg->excl_cmpr += sgn * num_bytes; | ||
| 1273 | } | ||
| 1274 | qgroup_dirty(fs_info, qg); | ||
| 1275 | } | ||
| 1276 | WARN_ON(qg->tag >= seq); | ||
| 1277 | qg->tag = seq; | ||
| 1278 | 1598 | ||
| 1599 | qg = u64_to_ptr(unode->aux); | ||
| 1600 | if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { | ||
| 1601 | if (qg->new_refcnt < seq) | ||
| 1602 | qg->new_refcnt = seq + 1; | ||
| 1603 | else | ||
| 1604 | qg->new_refcnt++; | ||
| 1605 | } else { | ||
| 1606 | if (qg->old_refcnt < seq) | ||
| 1607 | qg->old_refcnt = seq + 1; | ||
| 1608 | else | ||
| 1609 | qg->old_refcnt++; | ||
| 1610 | } | ||
| 1279 | list_for_each_entry(glist, &qg->groups, next_group) { | 1611 | list_for_each_entry(glist, &qg->groups, next_group) { |
| 1280 | ret = ulist_add(tmp, glist->group->qgroupid, | 1612 | ret = ulist_add(tmp, glist->group->qgroupid, |
| 1281 | (uintptr_t)glist->group, GFP_ATOMIC); | 1613 | ptr_to_u64(glist->group), GFP_ATOMIC); |
| 1614 | if (ret < 0) | ||
| 1615 | return ret; | ||
| 1616 | ret = ulist_add(qgroups, glist->group->qgroupid, | ||
| 1617 | ptr_to_u64(glist->group), GFP_ATOMIC); | ||
| 1282 | if (ret < 0) | 1618 | if (ret < 0) |
| 1283 | return ret; | 1619 | return ret; |
| 1284 | } | 1620 | } |
| 1285 | } | 1621 | } |
| 1286 | |||
| 1287 | return 0; | 1622 | return 0; |
| 1288 | } | 1623 | } |
| 1289 | 1624 | ||
| 1290 | static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info, | 1625 | /* |
| 1291 | struct ulist *roots, struct ulist *tmp, | 1626 | * This adjusts the counters for all referenced qgroups if need be. |
| 1292 | u64 seq, int sgn, u64 num_bytes) | 1627 | */ |
| 1628 | static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info, | ||
| 1629 | u64 root_to_skip, u64 num_bytes, | ||
| 1630 | struct ulist *qgroups, u64 seq, | ||
| 1631 | int old_roots, int new_roots, int rescan) | ||
| 1293 | { | 1632 | { |
| 1294 | struct ulist_node *unode; | 1633 | struct ulist_node *unode; |
| 1295 | struct ulist_iterator uiter; | 1634 | struct ulist_iterator uiter; |
| 1296 | struct btrfs_qgroup *qg; | 1635 | struct btrfs_qgroup *qg; |
| 1297 | struct ulist_node *tmp_unode; | 1636 | u64 cur_new_count, cur_old_count; |
| 1298 | struct ulist_iterator tmp_uiter; | ||
| 1299 | int ret; | ||
| 1300 | 1637 | ||
| 1301 | ULIST_ITER_INIT(&uiter); | 1638 | ULIST_ITER_INIT(&uiter); |
| 1302 | while ((unode = ulist_next(roots, &uiter))) { | 1639 | while ((unode = ulist_next(qgroups, &uiter))) { |
| 1303 | qg = find_qgroup_rb(fs_info, unode->val); | 1640 | bool dirty = false; |
| 1304 | if (!qg) | ||
| 1305 | continue; | ||
| 1306 | 1641 | ||
| 1307 | ulist_reinit(tmp); | 1642 | qg = u64_to_ptr(unode->aux); |
| 1308 | ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC); | 1643 | /* |
| 1309 | if (ret < 0) | 1644 | * Wasn't referenced before but is now, add to the reference |
| 1310 | return ret; | 1645 | * counters. |
| 1646 | */ | ||
| 1647 | if (qg->old_refcnt <= seq && qg->new_refcnt > seq) { | ||
| 1648 | qg->rfer += num_bytes; | ||
| 1649 | qg->rfer_cmpr += num_bytes; | ||
| 1650 | dirty = true; | ||
| 1651 | } | ||
| 1311 | 1652 | ||
| 1312 | ULIST_ITER_INIT(&tmp_uiter); | 1653 | /* |
| 1313 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { | 1654 | * Was referenced before but isn't now, subtract from the |
| 1314 | struct btrfs_qgroup_list *glist; | 1655 | * reference counters. |
| 1656 | */ | ||
| 1657 | if (qg->old_refcnt > seq && qg->new_refcnt <= seq) { | ||
| 1658 | qg->rfer -= num_bytes; | ||
| 1659 | qg->rfer_cmpr -= num_bytes; | ||
| 1660 | dirty = true; | ||
| 1661 | } | ||
| 1315 | 1662 | ||
| 1316 | qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; | 1663 | if (qg->old_refcnt < seq) |
| 1317 | if (qg->tag == seq) | 1664 | cur_old_count = 0; |
| 1318 | continue; | 1665 | else |
| 1666 | cur_old_count = qg->old_refcnt - seq; | ||
| 1667 | if (qg->new_refcnt < seq) | ||
| 1668 | cur_new_count = 0; | ||
| 1669 | else | ||
| 1670 | cur_new_count = qg->new_refcnt - seq; | ||
| 1319 | 1671 | ||
| 1320 | if (qg->refcnt - seq == roots->nnodes) { | 1672 | /* |
| 1321 | qg->excl -= sgn * num_bytes; | 1673 | * If our refcount was the same as the roots previously but our |
| 1322 | qg->excl_cmpr -= sgn * num_bytes; | 1674 | * new count isn't the same as the number of roots now then we |
| 1323 | qgroup_dirty(fs_info, qg); | 1675 | * went from having a exclusive reference on this range to not. |
| 1324 | } | 1676 | */ |
| 1677 | if (old_roots && cur_old_count == old_roots && | ||
| 1678 | (cur_new_count != new_roots || new_roots == 0)) { | ||
| 1679 | WARN_ON(cur_new_count != new_roots && new_roots == 0); | ||
| 1680 | qg->excl -= num_bytes; | ||
| 1681 | qg->excl_cmpr -= num_bytes; | ||
| 1682 | dirty = true; | ||
| 1683 | } | ||
| 1325 | 1684 | ||
| 1326 | list_for_each_entry(glist, &qg->groups, next_group) { | 1685 | /* |
| 1327 | ret = ulist_add(tmp, glist->group->qgroupid, | 1686 | * If we didn't reference all the roots before but now we do we |
| 1328 | (uintptr_t)glist->group, | 1687 | * have an exclusive reference to this range. |
| 1329 | GFP_ATOMIC); | 1688 | */ |
| 1330 | if (ret < 0) | 1689 | if ((!old_roots || (old_roots && cur_old_count != old_roots)) |
| 1331 | return ret; | 1690 | && cur_new_count == new_roots) { |
| 1332 | } | 1691 | qg->excl += num_bytes; |
| 1692 | qg->excl_cmpr += num_bytes; | ||
| 1693 | dirty = true; | ||
| 1333 | } | 1694 | } |
| 1334 | } | ||
| 1335 | 1695 | ||
| 1696 | if (dirty) | ||
| 1697 | qgroup_dirty(fs_info, qg); | ||
| 1698 | } | ||
| 1336 | return 0; | 1699 | return 0; |
| 1337 | } | 1700 | } |
| 1338 | 1701 | ||
| 1339 | /* | 1702 | /* |
| 1340 | * btrfs_qgroup_account_ref is called for every ref that is added to or deleted | 1703 | * If we removed a data extent and there were other references for that bytenr |
| 1341 | * from the fs. First, all roots referencing the extent are searched, and | 1704 | * then we need to lookup all referenced roots to make sure we still don't |
| 1342 | * then the space is accounted accordingly to the different roots. The | 1705 | * reference this bytenr. If we do then we can just discard this operation. |
| 1343 | * accounting algorithm works in 3 steps documented inline. | ||
| 1344 | */ | 1706 | */ |
| 1345 | int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | 1707 | static int check_existing_refs(struct btrfs_trans_handle *trans, |
| 1346 | struct btrfs_fs_info *fs_info, | 1708 | struct btrfs_fs_info *fs_info, |
| 1347 | struct btrfs_delayed_ref_node *node, | 1709 | struct btrfs_qgroup_operation *oper) |
| 1348 | struct btrfs_delayed_extent_op *extent_op) | ||
| 1349 | { | 1710 | { |
| 1350 | struct btrfs_root *quota_root; | ||
| 1351 | u64 ref_root; | ||
| 1352 | struct btrfs_qgroup *qgroup; | ||
| 1353 | struct ulist *roots = NULL; | 1711 | struct ulist *roots = NULL; |
| 1354 | u64 seq; | 1712 | struct ulist_node *unode; |
| 1713 | struct ulist_iterator uiter; | ||
| 1355 | int ret = 0; | 1714 | int ret = 0; |
| 1356 | int sgn; | ||
| 1357 | 1715 | ||
| 1358 | if (!fs_info->quota_enabled) | 1716 | ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, |
| 1359 | return 0; | 1717 | oper->elem.seq, &roots); |
| 1360 | 1718 | if (ret < 0) | |
| 1361 | BUG_ON(!fs_info->quota_root); | 1719 | return ret; |
| 1720 | ret = 0; | ||
| 1362 | 1721 | ||
| 1363 | if (node->type == BTRFS_TREE_BLOCK_REF_KEY || | 1722 | ULIST_ITER_INIT(&uiter); |
| 1364 | node->type == BTRFS_SHARED_BLOCK_REF_KEY) { | 1723 | while ((unode = ulist_next(roots, &uiter))) { |
| 1365 | struct btrfs_delayed_tree_ref *ref; | 1724 | if (unode->val == oper->ref_root) { |
| 1366 | ref = btrfs_delayed_node_to_tree_ref(node); | 1725 | ret = 1; |
| 1367 | ref_root = ref->root; | 1726 | break; |
| 1368 | } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || | 1727 | } |
| 1369 | node->type == BTRFS_SHARED_DATA_REF_KEY) { | ||
| 1370 | struct btrfs_delayed_data_ref *ref; | ||
| 1371 | ref = btrfs_delayed_node_to_data_ref(node); | ||
| 1372 | ref_root = ref->root; | ||
| 1373 | } else { | ||
| 1374 | BUG(); | ||
| 1375 | } | 1728 | } |
| 1729 | ulist_free(roots); | ||
| 1730 | btrfs_put_tree_mod_seq(fs_info, &oper->elem); | ||
| 1376 | 1731 | ||
| 1377 | if (!is_fstree(ref_root)) { | 1732 | return ret; |
| 1378 | /* | 1733 | } |
| 1379 | * non-fs-trees are not being accounted | ||
| 1380 | */ | ||
| 1381 | return 0; | ||
| 1382 | } | ||
| 1383 | 1734 | ||
| 1384 | switch (node->action) { | 1735 | /* |
| 1385 | case BTRFS_ADD_DELAYED_REF: | 1736 | * If we share a reference across multiple roots then we may need to adjust |
| 1386 | case BTRFS_ADD_DELAYED_EXTENT: | 1737 | * various qgroups referenced and exclusive counters. The basic premise is this |
| 1387 | sgn = 1; | 1738 | * |
| 1388 | seq = btrfs_tree_mod_seq_prev(node->seq); | 1739 | * 1) We have seq to represent a 0 count. Instead of looping through all of the |
| 1389 | break; | 1740 | * qgroups and resetting their refcount to 0 we just constantly bump this |
| 1390 | case BTRFS_DROP_DELAYED_REF: | 1741 | * sequence number to act as the base reference count. This means that if |
| 1391 | sgn = -1; | 1742 | * anybody is equal to or below this sequence they were never referenced. We |
| 1392 | seq = node->seq; | 1743 | * jack this sequence up by the number of roots we found each time in order to |
| 1393 | break; | 1744 | * make sure we don't have any overlap. |
| 1394 | case BTRFS_UPDATE_DELAYED_HEAD: | 1745 | * |
| 1395 | return 0; | 1746 | * 2) We first search all the roots that reference the area _except_ the root |
| 1396 | default: | 1747 | * we're acting on currently. This makes up the old_refcnt of all the qgroups |
| 1397 | BUG(); | 1748 | * before. |
| 1398 | } | 1749 | * |
| 1750 | * 3) We walk all of the qgroups referenced by the root we are currently acting | ||
| 1751 | * on, and will either adjust old_refcnt in the case of a removal or the | ||
| 1752 | * new_refcnt in the case of an addition. | ||
| 1753 | * | ||
| 1754 | * 4) Finally we walk all the qgroups that are referenced by this range | ||
| 1755 | * including the root we are acting on currently. We will adjust the counters | ||
| 1756 | * based on the number of roots we had and will have after this operation. | ||
| 1757 | * | ||
| 1758 | * Take this example as an illustration | ||
| 1759 | * | ||
| 1760 | * [qgroup 1/0] | ||
| 1761 | * / | \ | ||
| 1762 | * [qg 0/0] [qg 0/1] [qg 0/2] | ||
| 1763 | * \ | / | ||
| 1764 | * [ extent ] | ||
| 1765 | * | ||
| 1766 | * Say we are adding a reference that is covered by qg 0/0. The first step | ||
| 1767 | * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with | ||
| 1768 | * old_roots being 2. Because it is adding new_roots will be 1. We then go | ||
| 1769 | * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's | ||
| 1770 | * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we | ||
| 1771 | * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a | ||
| 1772 | * reference and thus must add the size to the referenced bytes. Everything | ||
| 1773 | * else is the same so nothing else changes. | ||
| 1774 | */ | ||
| 1775 | static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, | ||
| 1776 | struct btrfs_fs_info *fs_info, | ||
| 1777 | struct btrfs_qgroup_operation *oper) | ||
| 1778 | { | ||
| 1779 | struct ulist *roots = NULL; | ||
| 1780 | struct ulist *qgroups, *tmp; | ||
| 1781 | struct btrfs_qgroup *qgroup; | ||
| 1782 | struct seq_list elem = {}; | ||
| 1783 | u64 seq; | ||
| 1784 | int old_roots = 0; | ||
| 1785 | int new_roots = 0; | ||
| 1786 | int ret = 0; | ||
| 1399 | 1787 | ||
| 1400 | mutex_lock(&fs_info->qgroup_rescan_lock); | 1788 | if (oper->elem.seq) { |
| 1401 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { | 1789 | ret = check_existing_refs(trans, fs_info, oper); |
| 1402 | if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { | 1790 | if (ret < 0) |
| 1403 | mutex_unlock(&fs_info->qgroup_rescan_lock); | 1791 | return ret; |
| 1792 | if (ret) | ||
| 1404 | return 0; | 1793 | return 0; |
| 1405 | } | ||
| 1406 | } | 1794 | } |
| 1407 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
| 1408 | 1795 | ||
| 1409 | /* | 1796 | qgroups = ulist_alloc(GFP_NOFS); |
| 1410 | * the delayed ref sequence number we pass depends on the direction of | 1797 | if (!qgroups) |
| 1411 | * the operation. for add operations, we pass | 1798 | return -ENOMEM; |
| 1412 | * tree_mod_log_prev_seq(node->seq) to skip | ||
| 1413 | * the delayed ref's current sequence number, because we need the state | ||
| 1414 | * of the tree before the add operation. for delete operations, we pass | ||
| 1415 | * (node->seq) to include the delayed ref's current sequence number, | ||
| 1416 | * because we need the state of the tree after the delete operation. | ||
| 1417 | */ | ||
| 1418 | ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots); | ||
| 1419 | if (ret < 0) | ||
| 1420 | return ret; | ||
| 1421 | |||
| 1422 | spin_lock(&fs_info->qgroup_lock); | ||
| 1423 | 1799 | ||
| 1424 | quota_root = fs_info->quota_root; | 1800 | tmp = ulist_alloc(GFP_NOFS); |
| 1425 | if (!quota_root) | 1801 | if (!tmp) { |
| 1426 | goto unlock; | 1802 | ulist_free(qgroups); |
| 1803 | return -ENOMEM; | ||
| 1804 | } | ||
| 1427 | 1805 | ||
| 1428 | qgroup = find_qgroup_rb(fs_info, ref_root); | 1806 | btrfs_get_tree_mod_seq(fs_info, &elem); |
| 1807 | ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, | ||
| 1808 | &roots); | ||
| 1809 | btrfs_put_tree_mod_seq(fs_info, &elem); | ||
| 1810 | if (ret < 0) { | ||
| 1811 | ulist_free(qgroups); | ||
| 1812 | ulist_free(tmp); | ||
| 1813 | return ret; | ||
| 1814 | } | ||
| 1815 | spin_lock(&fs_info->qgroup_lock); | ||
| 1816 | qgroup = find_qgroup_rb(fs_info, oper->ref_root); | ||
| 1429 | if (!qgroup) | 1817 | if (!qgroup) |
| 1430 | goto unlock; | 1818 | goto out; |
| 1819 | seq = fs_info->qgroup_seq; | ||
| 1431 | 1820 | ||
| 1432 | /* | 1821 | /* |
| 1433 | * step 1: for each old ref, visit all nodes once and inc refcnt | 1822 | * So roots is the list of all the roots currently pointing at the |
| 1823 | * bytenr, including the ref we are adding if we are adding, or not if | ||
| 1824 | * we are removing a ref. So we pass in the ref_root to skip that root | ||
| 1825 | * in our calculations. We set old_refnct and new_refcnt cause who the | ||
| 1826 | * hell knows what everything looked like before, and it doesn't matter | ||
| 1827 | * except... | ||
| 1434 | */ | 1828 | */ |
| 1435 | ulist_reinit(fs_info->qgroup_ulist); | 1829 | ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups, |
| 1436 | seq = fs_info->qgroup_seq; | 1830 | seq, &old_roots, 0); |
| 1437 | fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ | 1831 | if (ret < 0) |
| 1832 | goto out; | ||
| 1438 | 1833 | ||
| 1439 | ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist, | 1834 | /* |
| 1440 | seq); | 1835 | * Now adjust the refcounts of the qgroups that care about this |
| 1441 | if (ret) | 1836 | * reference, either the old_count in the case of removal or new_count |
| 1442 | goto unlock; | 1837 | * in the case of an addition. |
| 1838 | */ | ||
| 1839 | ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups, | ||
| 1840 | seq); | ||
| 1841 | if (ret < 0) | ||
| 1842 | goto out; | ||
| 1443 | 1843 | ||
| 1444 | /* | 1844 | /* |
| 1445 | * step 2: walk from the new root | 1845 | * ...in the case of removals. If we had a removal before we got around |
| 1846 | * to processing this operation then we need to find that guy and count | ||
| 1847 | * his references as if they really existed so we don't end up screwing | ||
| 1848 | * up the exclusive counts. Then whenever we go to process the delete | ||
| 1849 | * everything will be grand and we can account for whatever exclusive | ||
| 1850 | * changes need to be made there. We also have to pass in old_roots so | ||
| 1851 | * we have an accurate count of the roots as it pertains to this | ||
| 1852 | * operations view of the world. | ||
| 1446 | */ | 1853 | */ |
| 1447 | ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist, | 1854 | ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq, |
| 1448 | seq, sgn, node->num_bytes, qgroup); | 1855 | &old_roots); |
| 1449 | if (ret) | 1856 | if (ret < 0) |
| 1450 | goto unlock; | 1857 | goto out; |
| 1451 | 1858 | ||
| 1452 | /* | 1859 | /* |
| 1453 | * step 3: walk again from old refs | 1860 | * We are adding our root, need to adjust up the number of roots, |
| 1861 | * otherwise old_roots is the number of roots we want. | ||
| 1454 | */ | 1862 | */ |
| 1455 | ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist, | 1863 | if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { |
| 1456 | seq, sgn, node->num_bytes); | 1864 | new_roots = old_roots + 1; |
| 1457 | if (ret) | 1865 | } else { |
| 1458 | goto unlock; | 1866 | new_roots = old_roots; |
| 1867 | old_roots++; | ||
| 1868 | } | ||
| 1869 | fs_info->qgroup_seq += old_roots + 1; | ||
| 1459 | 1870 | ||
| 1460 | unlock: | 1871 | |
| 1872 | /* | ||
| 1873 | * And now the magic happens, bless Arne for having a pretty elegant | ||
| 1874 | * solution for this. | ||
| 1875 | */ | ||
| 1876 | qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes, | ||
| 1877 | qgroups, seq, old_roots, new_roots, 0); | ||
| 1878 | out: | ||
| 1461 | spin_unlock(&fs_info->qgroup_lock); | 1879 | spin_unlock(&fs_info->qgroup_lock); |
| 1880 | ulist_free(qgroups); | ||
| 1462 | ulist_free(roots); | 1881 | ulist_free(roots); |
| 1882 | ulist_free(tmp); | ||
| 1883 | return ret; | ||
| 1884 | } | ||
| 1885 | |||
| 1886 | /* | ||
| 1887 | * btrfs_qgroup_account_ref is called for every ref that is added to or deleted | ||
| 1888 | * from the fs. First, all roots referencing the extent are searched, and | ||
| 1889 | * then the space is accounted accordingly to the different roots. The | ||
| 1890 | * accounting algorithm works in 3 steps documented inline. | ||
| 1891 | */ | ||
| 1892 | static int btrfs_qgroup_account(struct btrfs_trans_handle *trans, | ||
| 1893 | struct btrfs_fs_info *fs_info, | ||
| 1894 | struct btrfs_qgroup_operation *oper) | ||
| 1895 | { | ||
| 1896 | int ret = 0; | ||
| 1897 | |||
| 1898 | if (!fs_info->quota_enabled) | ||
| 1899 | return 0; | ||
| 1900 | |||
| 1901 | BUG_ON(!fs_info->quota_root); | ||
| 1902 | |||
| 1903 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
| 1904 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { | ||
| 1905 | if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) { | ||
| 1906 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
| 1907 | return 0; | ||
| 1908 | } | ||
| 1909 | } | ||
| 1910 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
| 1911 | |||
| 1912 | ASSERT(is_fstree(oper->ref_root)); | ||
| 1913 | |||
| 1914 | switch (oper->type) { | ||
| 1915 | case BTRFS_QGROUP_OPER_ADD_EXCL: | ||
| 1916 | case BTRFS_QGROUP_OPER_SUB_EXCL: | ||
| 1917 | ret = qgroup_excl_accounting(fs_info, oper); | ||
| 1918 | break; | ||
| 1919 | case BTRFS_QGROUP_OPER_ADD_SHARED: | ||
| 1920 | case BTRFS_QGROUP_OPER_SUB_SHARED: | ||
| 1921 | ret = qgroup_shared_accounting(trans, fs_info, oper); | ||
| 1922 | break; | ||
| 1923 | default: | ||
| 1924 | ASSERT(0); | ||
| 1925 | } | ||
| 1926 | return ret; | ||
| 1927 | } | ||
| 1928 | |||
| 1929 | /* | ||
| 1930 | * Needs to be called everytime we run delayed refs, even if there is an error | ||
| 1931 | * in order to cleanup outstanding operations. | ||
| 1932 | */ | ||
| 1933 | int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans, | ||
| 1934 | struct btrfs_fs_info *fs_info) | ||
| 1935 | { | ||
| 1936 | struct btrfs_qgroup_operation *oper; | ||
| 1937 | int ret = 0; | ||
| 1463 | 1938 | ||
| 1939 | while (!list_empty(&trans->qgroup_ref_list)) { | ||
| 1940 | oper = list_first_entry(&trans->qgroup_ref_list, | ||
| 1941 | struct btrfs_qgroup_operation, list); | ||
| 1942 | list_del_init(&oper->list); | ||
| 1943 | if (!ret || !trans->aborted) | ||
| 1944 | ret = btrfs_qgroup_account(trans, fs_info, oper); | ||
| 1945 | spin_lock(&fs_info->qgroup_op_lock); | ||
| 1946 | rb_erase(&oper->n, &fs_info->qgroup_op_tree); | ||
| 1947 | spin_unlock(&fs_info->qgroup_op_lock); | ||
| 1948 | btrfs_put_tree_mod_seq(fs_info, &oper->elem); | ||
| 1949 | kfree(oper); | ||
| 1950 | } | ||
| 1464 | return ret; | 1951 | return ret; |
| 1465 | } | 1952 | } |
| 1466 | 1953 | ||
| @@ -1629,8 +2116,16 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | |||
| 1629 | srcgroup = find_qgroup_rb(fs_info, srcid); | 2116 | srcgroup = find_qgroup_rb(fs_info, srcid); |
| 1630 | if (!srcgroup) | 2117 | if (!srcgroup) |
| 1631 | goto unlock; | 2118 | goto unlock; |
| 1632 | dstgroup->rfer = srcgroup->rfer - level_size; | 2119 | |
| 1633 | dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; | 2120 | /* |
| 2121 | * We call inherit after we clone the root in order to make sure | ||
| 2122 | * our counts don't go crazy, so at this point the only | ||
| 2123 | * difference between the two roots should be the root node. | ||
| 2124 | */ | ||
| 2125 | dstgroup->rfer = srcgroup->rfer; | ||
| 2126 | dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; | ||
| 2127 | dstgroup->excl = level_size; | ||
| 2128 | dstgroup->excl_cmpr = level_size; | ||
| 1634 | srcgroup->excl = level_size; | 2129 | srcgroup->excl = level_size; |
| 1635 | srcgroup->excl_cmpr = level_size; | 2130 | srcgroup->excl_cmpr = level_size; |
| 1636 | qgroup_dirty(fs_info, dstgroup); | 2131 | qgroup_dirty(fs_info, dstgroup); |
| @@ -1734,7 +2229,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
| 1734 | struct btrfs_qgroup *qg; | 2229 | struct btrfs_qgroup *qg; |
| 1735 | struct btrfs_qgroup_list *glist; | 2230 | struct btrfs_qgroup_list *glist; |
| 1736 | 2231 | ||
| 1737 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; | 2232 | qg = u64_to_ptr(unode->aux); |
| 1738 | 2233 | ||
| 1739 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && | 2234 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && |
| 1740 | qg->reserved + (s64)qg->rfer + num_bytes > | 2235 | qg->reserved + (s64)qg->rfer + num_bytes > |
| @@ -1766,7 +2261,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
| 1766 | while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { | 2261 | while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { |
| 1767 | struct btrfs_qgroup *qg; | 2262 | struct btrfs_qgroup *qg; |
| 1768 | 2263 | ||
| 1769 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; | 2264 | qg = u64_to_ptr(unode->aux); |
| 1770 | 2265 | ||
| 1771 | qg->reserved += num_bytes; | 2266 | qg->reserved += num_bytes; |
| 1772 | } | 2267 | } |
| @@ -1812,7 +2307,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | |||
| 1812 | struct btrfs_qgroup *qg; | 2307 | struct btrfs_qgroup *qg; |
| 1813 | struct btrfs_qgroup_list *glist; | 2308 | struct btrfs_qgroup_list *glist; |
| 1814 | 2309 | ||
| 1815 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; | 2310 | qg = u64_to_ptr(unode->aux); |
| 1816 | 2311 | ||
| 1817 | qg->reserved -= num_bytes; | 2312 | qg->reserved -= num_bytes; |
| 1818 | 2313 | ||
| @@ -1848,15 +2343,15 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) | |||
| 1848 | */ | 2343 | */ |
| 1849 | static int | 2344 | static int |
| 1850 | qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, | 2345 | qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, |
| 1851 | struct btrfs_trans_handle *trans, struct ulist *tmp, | 2346 | struct btrfs_trans_handle *trans, struct ulist *qgroups, |
| 1852 | struct extent_buffer *scratch_leaf) | 2347 | struct ulist *tmp, struct extent_buffer *scratch_leaf) |
| 1853 | { | 2348 | { |
| 1854 | struct btrfs_key found; | 2349 | struct btrfs_key found; |
| 1855 | struct ulist *roots = NULL; | 2350 | struct ulist *roots = NULL; |
| 1856 | struct ulist_node *unode; | ||
| 1857 | struct ulist_iterator uiter; | ||
| 1858 | struct seq_list tree_mod_seq_elem = {}; | 2351 | struct seq_list tree_mod_seq_elem = {}; |
| 2352 | u64 num_bytes; | ||
| 1859 | u64 seq; | 2353 | u64 seq; |
| 2354 | int new_roots; | ||
| 1860 | int slot; | 2355 | int slot; |
| 1861 | int ret; | 2356 | int ret; |
| 1862 | 2357 | ||
| @@ -1897,8 +2392,6 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, | |||
| 1897 | mutex_unlock(&fs_info->qgroup_rescan_lock); | 2392 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
| 1898 | 2393 | ||
| 1899 | for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { | 2394 | for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { |
| 1900 | u64 num_bytes; | ||
| 1901 | |||
| 1902 | btrfs_item_key_to_cpu(scratch_leaf, &found, slot); | 2395 | btrfs_item_key_to_cpu(scratch_leaf, &found, slot); |
| 1903 | if (found.type != BTRFS_EXTENT_ITEM_KEY && | 2396 | if (found.type != BTRFS_EXTENT_ITEM_KEY && |
| 1904 | found.type != BTRFS_METADATA_ITEM_KEY) | 2397 | found.type != BTRFS_METADATA_ITEM_KEY) |
| @@ -1908,76 +2401,34 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, | |||
| 1908 | else | 2401 | else |
| 1909 | num_bytes = found.offset; | 2402 | num_bytes = found.offset; |
| 1910 | 2403 | ||
| 1911 | ret = btrfs_find_all_roots(trans, fs_info, found.objectid, | 2404 | ulist_reinit(qgroups); |
| 1912 | tree_mod_seq_elem.seq, &roots); | 2405 | ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, |
| 2406 | &roots); | ||
| 1913 | if (ret < 0) | 2407 | if (ret < 0) |
| 1914 | goto out; | 2408 | goto out; |
| 1915 | spin_lock(&fs_info->qgroup_lock); | 2409 | spin_lock(&fs_info->qgroup_lock); |
| 1916 | seq = fs_info->qgroup_seq; | 2410 | seq = fs_info->qgroup_seq; |
| 1917 | fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ | 2411 | fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ |
| 1918 | 2412 | ||
| 1919 | ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq); | 2413 | new_roots = 0; |
| 1920 | if (ret) { | 2414 | ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups, |
| 2415 | seq, &new_roots, 1); | ||
| 2416 | if (ret < 0) { | ||
| 1921 | spin_unlock(&fs_info->qgroup_lock); | 2417 | spin_unlock(&fs_info->qgroup_lock); |
| 1922 | ulist_free(roots); | 2418 | ulist_free(roots); |
| 1923 | goto out; | 2419 | goto out; |
| 1924 | } | 2420 | } |
| 1925 | 2421 | ||
| 1926 | /* | 2422 | ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups, |
| 1927 | * step2 of btrfs_qgroup_account_ref works from a single root, | 2423 | seq, 0, new_roots, 1); |
| 1928 | * we're doing all at once here. | 2424 | if (ret < 0) { |
| 1929 | */ | 2425 | spin_unlock(&fs_info->qgroup_lock); |
| 1930 | ulist_reinit(tmp); | 2426 | ulist_free(roots); |
| 1931 | ULIST_ITER_INIT(&uiter); | 2427 | goto out; |
| 1932 | while ((unode = ulist_next(roots, &uiter))) { | ||
| 1933 | struct btrfs_qgroup *qg; | ||
| 1934 | |||
| 1935 | qg = find_qgroup_rb(fs_info, unode->val); | ||
| 1936 | if (!qg) | ||
| 1937 | continue; | ||
| 1938 | |||
| 1939 | ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, | ||
| 1940 | GFP_ATOMIC); | ||
| 1941 | if (ret < 0) { | ||
| 1942 | spin_unlock(&fs_info->qgroup_lock); | ||
| 1943 | ulist_free(roots); | ||
| 1944 | goto out; | ||
| 1945 | } | ||
| 1946 | } | ||
| 1947 | |||
| 1948 | /* this loop is similar to step 2 of btrfs_qgroup_account_ref */ | ||
| 1949 | ULIST_ITER_INIT(&uiter); | ||
| 1950 | while ((unode = ulist_next(tmp, &uiter))) { | ||
| 1951 | struct btrfs_qgroup *qg; | ||
| 1952 | struct btrfs_qgroup_list *glist; | ||
| 1953 | |||
| 1954 | qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux; | ||
| 1955 | qg->rfer += num_bytes; | ||
| 1956 | qg->rfer_cmpr += num_bytes; | ||
| 1957 | WARN_ON(qg->tag >= seq); | ||
| 1958 | if (qg->refcnt - seq == roots->nnodes) { | ||
| 1959 | qg->excl += num_bytes; | ||
| 1960 | qg->excl_cmpr += num_bytes; | ||
| 1961 | } | ||
| 1962 | qgroup_dirty(fs_info, qg); | ||
| 1963 | |||
| 1964 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
| 1965 | ret = ulist_add(tmp, glist->group->qgroupid, | ||
| 1966 | (uintptr_t)glist->group, | ||
| 1967 | GFP_ATOMIC); | ||
| 1968 | if (ret < 0) { | ||
| 1969 | spin_unlock(&fs_info->qgroup_lock); | ||
| 1970 | ulist_free(roots); | ||
| 1971 | goto out; | ||
| 1972 | } | ||
| 1973 | } | ||
| 1974 | } | 2428 | } |
| 1975 | |||
| 1976 | spin_unlock(&fs_info->qgroup_lock); | 2429 | spin_unlock(&fs_info->qgroup_lock); |
| 1977 | ulist_free(roots); | 2430 | ulist_free(roots); |
| 1978 | ret = 0; | ||
| 1979 | } | 2431 | } |
| 1980 | |||
| 1981 | out: | 2432 | out: |
| 1982 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); | 2433 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); |
| 1983 | 2434 | ||
| @@ -1990,13 +2441,16 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) | |||
| 1990 | qgroup_rescan_work); | 2441 | qgroup_rescan_work); |
| 1991 | struct btrfs_path *path; | 2442 | struct btrfs_path *path; |
| 1992 | struct btrfs_trans_handle *trans = NULL; | 2443 | struct btrfs_trans_handle *trans = NULL; |
| 1993 | struct ulist *tmp = NULL; | 2444 | struct ulist *tmp = NULL, *qgroups = NULL; |
| 1994 | struct extent_buffer *scratch_leaf = NULL; | 2445 | struct extent_buffer *scratch_leaf = NULL; |
| 1995 | int err = -ENOMEM; | 2446 | int err = -ENOMEM; |
| 1996 | 2447 | ||
| 1997 | path = btrfs_alloc_path(); | 2448 | path = btrfs_alloc_path(); |
| 1998 | if (!path) | 2449 | if (!path) |
| 1999 | goto out; | 2450 | goto out; |
| 2451 | qgroups = ulist_alloc(GFP_NOFS); | ||
| 2452 | if (!qgroups) | ||
| 2453 | goto out; | ||
| 2000 | tmp = ulist_alloc(GFP_NOFS); | 2454 | tmp = ulist_alloc(GFP_NOFS); |
| 2001 | if (!tmp) | 2455 | if (!tmp) |
| 2002 | goto out; | 2456 | goto out; |
| @@ -2015,7 +2469,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) | |||
| 2015 | err = -EINTR; | 2469 | err = -EINTR; |
| 2016 | } else { | 2470 | } else { |
| 2017 | err = qgroup_rescan_leaf(fs_info, path, trans, | 2471 | err = qgroup_rescan_leaf(fs_info, path, trans, |
| 2018 | tmp, scratch_leaf); | 2472 | qgroups, tmp, scratch_leaf); |
| 2019 | } | 2473 | } |
| 2020 | if (err > 0) | 2474 | if (err > 0) |
| 2021 | btrfs_commit_transaction(trans, fs_info->fs_root); | 2475 | btrfs_commit_transaction(trans, fs_info->fs_root); |
| @@ -2025,6 +2479,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) | |||
| 2025 | 2479 | ||
| 2026 | out: | 2480 | out: |
| 2027 | kfree(scratch_leaf); | 2481 | kfree(scratch_leaf); |
| 2482 | ulist_free(qgroups); | ||
| 2028 | ulist_free(tmp); | 2483 | ulist_free(tmp); |
| 2029 | btrfs_free_path(path); | 2484 | btrfs_free_path(path); |
| 2030 | 2485 | ||
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h new file mode 100644 index 000000000000..5952ff1fbd7a --- /dev/null +++ b/fs/btrfs/qgroup.h | |||
| @@ -0,0 +1,107 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2014 Facebook. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __BTRFS_QGROUP__ | ||
| 20 | #define __BTRFS_QGROUP__ | ||
| 21 | |||
| 22 | /* | ||
| 23 | * A description of the operations, all of these operations only happen when we | ||
| 24 | * are adding the 1st reference for that subvolume in the case of adding space | ||
| 25 | * or on the last reference delete in the case of subtraction. The only | ||
| 26 | * exception is the last one, which is added for confusion. | ||
| 27 | * | ||
| 28 | * BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only | ||
| 29 | * one pointing at the bytes we are adding. This is called on the first | ||
| 30 | * allocation. | ||
| 31 | * | ||
| 32 | * BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be | ||
| 33 | * shared between subvols. This is called on the creation of a ref that already | ||
| 34 | * has refs from a different subvolume, so basically reflink. | ||
| 35 | * | ||
| 36 | * BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only | ||
| 37 | * one referencing the range. | ||
| 38 | * | ||
| 39 | * BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with | ||
| 40 | * refs with other subvolumes. | ||
| 41 | */ | ||
| 42 | enum btrfs_qgroup_operation_type { | ||
| 43 | BTRFS_QGROUP_OPER_ADD_EXCL, | ||
| 44 | BTRFS_QGROUP_OPER_ADD_SHARED, | ||
| 45 | BTRFS_QGROUP_OPER_SUB_EXCL, | ||
| 46 | BTRFS_QGROUP_OPER_SUB_SHARED, | ||
| 47 | }; | ||
| 48 | |||
| 49 | struct btrfs_qgroup_operation { | ||
| 50 | u64 ref_root; | ||
| 51 | u64 bytenr; | ||
| 52 | u64 num_bytes; | ||
| 53 | u64 seq; | ||
| 54 | enum btrfs_qgroup_operation_type type; | ||
| 55 | struct seq_list elem; | ||
| 56 | struct rb_node n; | ||
| 57 | struct list_head list; | ||
| 58 | }; | ||
| 59 | |||
| 60 | int btrfs_quota_enable(struct btrfs_trans_handle *trans, | ||
| 61 | struct btrfs_fs_info *fs_info); | ||
| 62 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, | ||
| 63 | struct btrfs_fs_info *fs_info); | ||
| 64 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); | ||
| 65 | void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); | ||
| 66 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); | ||
| 67 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | ||
| 68 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | ||
| 69 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | ||
| 70 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | ||
| 71 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, | ||
| 72 | struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
| 73 | char *name); | ||
| 74 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, | ||
| 75 | struct btrfs_fs_info *fs_info, u64 qgroupid); | ||
| 76 | int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, | ||
| 77 | struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
| 78 | struct btrfs_qgroup_limit *limit); | ||
| 79 | int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); | ||
| 80 | void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); | ||
| 81 | struct btrfs_delayed_extent_op; | ||
| 82 | int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, | ||
| 83 | struct btrfs_fs_info *fs_info, u64 ref_root, | ||
| 84 | u64 bytenr, u64 num_bytes, | ||
| 85 | enum btrfs_qgroup_operation_type type, | ||
| 86 | int mod_seq); | ||
| 87 | int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans, | ||
| 88 | struct btrfs_fs_info *fs_info); | ||
| 89 | void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans, | ||
| 90 | struct btrfs_fs_info *fs_info, | ||
| 91 | struct btrfs_qgroup_operation *oper); | ||
| 92 | int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | ||
| 93 | struct btrfs_fs_info *fs_info); | ||
| 94 | int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | ||
| 95 | struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, | ||
| 96 | struct btrfs_qgroup_inherit *inherit); | ||
| 97 | int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); | ||
| 98 | void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); | ||
| 99 | |||
| 100 | void assert_qgroups_uptodate(struct btrfs_trans_handle *trans); | ||
| 101 | |||
| 102 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
| 103 | int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
| 104 | u64 rfer, u64 excl); | ||
| 105 | #endif | ||
| 106 | |||
| 107 | #endif /* __BTRFS_QGROUP__ */ | ||
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 30947f923620..09230cf3a244 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
| @@ -428,8 +428,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 428 | continue; | 428 | continue; |
| 429 | } | 429 | } |
| 430 | if (!dev->bdev) { | 430 | if (!dev->bdev) { |
| 431 | /* cannot read ahead on missing device */ | 431 | /* |
| 432 | continue; | 432 | * cannot read ahead on missing device, but for RAID5/6, |
| 433 | * REQ_GET_READ_MIRRORS return 1. So don't skip missing | ||
| 434 | * device for such case. | ||
| 435 | */ | ||
| 436 | if (nzones > 1) | ||
| 437 | continue; | ||
| 433 | } | 438 | } |
| 434 | if (dev_replace_is_ongoing && | 439 | if (dev_replace_is_ongoing && |
| 435 | dev == fs_info->dev_replace.tgtdev) { | 440 | dev == fs_info->dev_replace.tgtdev) { |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 7f92ab1daa87..65245a07275b 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -337,7 +337,7 @@ static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr) | |||
| 337 | if (bnode->root) | 337 | if (bnode->root) |
| 338 | fs_info = bnode->root->fs_info; | 338 | fs_info = bnode->root->fs_info; |
| 339 | btrfs_panic(fs_info, errno, "Inconsistency in backref cache " | 339 | btrfs_panic(fs_info, errno, "Inconsistency in backref cache " |
| 340 | "found at offset %llu\n", bytenr); | 340 | "found at offset %llu", bytenr); |
| 341 | } | 341 | } |
| 342 | 342 | ||
| 343 | /* | 343 | /* |
| @@ -528,7 +528,7 @@ static int should_ignore_root(struct btrfs_root *root) | |||
| 528 | { | 528 | { |
| 529 | struct btrfs_root *reloc_root; | 529 | struct btrfs_root *reloc_root; |
| 530 | 530 | ||
| 531 | if (!root->ref_cows) | 531 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
| 532 | return 0; | 532 | return 0; |
| 533 | 533 | ||
| 534 | reloc_root = root->reloc_root; | 534 | reloc_root = root->reloc_root; |
| @@ -610,7 +610,7 @@ struct btrfs_root *find_tree_root(struct reloc_control *rc, | |||
| 610 | root = read_fs_root(rc->extent_root->fs_info, root_objectid); | 610 | root = read_fs_root(rc->extent_root->fs_info, root_objectid); |
| 611 | BUG_ON(IS_ERR(root)); | 611 | BUG_ON(IS_ERR(root)); |
| 612 | 612 | ||
| 613 | if (root->ref_cows && | 613 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
| 614 | generation != btrfs_root_generation(&root->root_item)) | 614 | generation != btrfs_root_generation(&root->root_item)) |
| 615 | return NULL; | 615 | return NULL; |
| 616 | 616 | ||
| @@ -887,7 +887,7 @@ again: | |||
| 887 | goto out; | 887 | goto out; |
| 888 | } | 888 | } |
| 889 | 889 | ||
| 890 | if (!root->ref_cows) | 890 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
| 891 | cur->cowonly = 1; | 891 | cur->cowonly = 1; |
| 892 | 892 | ||
| 893 | if (btrfs_root_level(&root->root_item) == cur->level) { | 893 | if (btrfs_root_level(&root->root_item) == cur->level) { |
| @@ -954,7 +954,8 @@ again: | |||
| 954 | upper->bytenr = eb->start; | 954 | upper->bytenr = eb->start; |
| 955 | upper->owner = btrfs_header_owner(eb); | 955 | upper->owner = btrfs_header_owner(eb); |
| 956 | upper->level = lower->level + 1; | 956 | upper->level = lower->level + 1; |
| 957 | if (!root->ref_cows) | 957 | if (!test_bit(BTRFS_ROOT_REF_COWS, |
| 958 | &root->state)) | ||
| 958 | upper->cowonly = 1; | 959 | upper->cowonly = 1; |
| 959 | 960 | ||
| 960 | /* | 961 | /* |
| @@ -1258,7 +1259,7 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) | |||
| 1258 | if (rb_node) { | 1259 | if (rb_node) { |
| 1259 | btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " | 1260 | btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " |
| 1260 | "for start=%llu while inserting into relocation " | 1261 | "for start=%llu while inserting into relocation " |
| 1261 | "tree\n", node->bytenr); | 1262 | "tree", node->bytenr); |
| 1262 | kfree(node); | 1263 | kfree(node); |
| 1263 | return -EEXIST; | 1264 | return -EEXIST; |
| 1264 | } | 1265 | } |
| @@ -2441,7 +2442,7 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, | |||
| 2441 | next = walk_up_backref(next, edges, &index); | 2442 | next = walk_up_backref(next, edges, &index); |
| 2442 | root = next->root; | 2443 | root = next->root; |
| 2443 | BUG_ON(!root); | 2444 | BUG_ON(!root); |
| 2444 | BUG_ON(!root->ref_cows); | 2445 | BUG_ON(!test_bit(BTRFS_ROOT_REF_COWS, &root->state)); |
| 2445 | 2446 | ||
| 2446 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 2447 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { |
| 2447 | record_reloc_root_in_trans(trans, root); | 2448 | record_reloc_root_in_trans(trans, root); |
| @@ -2506,7 +2507,7 @@ struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, | |||
| 2506 | BUG_ON(!root); | 2507 | BUG_ON(!root); |
| 2507 | 2508 | ||
| 2508 | /* no other choice for non-references counted tree */ | 2509 | /* no other choice for non-references counted tree */ |
| 2509 | if (!root->ref_cows) | 2510 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
| 2510 | return root; | 2511 | return root; |
| 2511 | 2512 | ||
| 2512 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) | 2513 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) |
| @@ -2893,14 +2894,14 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, | |||
| 2893 | goto out; | 2894 | goto out; |
| 2894 | } | 2895 | } |
| 2895 | 2896 | ||
| 2896 | if (!root || root->ref_cows) { | 2897 | if (!root || test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { |
| 2897 | ret = reserve_metadata_space(trans, rc, node); | 2898 | ret = reserve_metadata_space(trans, rc, node); |
| 2898 | if (ret) | 2899 | if (ret) |
| 2899 | goto out; | 2900 | goto out; |
| 2900 | } | 2901 | } |
| 2901 | 2902 | ||
| 2902 | if (root) { | 2903 | if (root) { |
| 2903 | if (root->ref_cows) { | 2904 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { |
| 2904 | BUG_ON(node->new_bytenr); | 2905 | BUG_ON(node->new_bytenr); |
| 2905 | BUG_ON(!list_empty(&node->list)); | 2906 | BUG_ON(!list_empty(&node->list)); |
| 2906 | btrfs_record_root_in_trans(trans, root); | 2907 | btrfs_record_root_in_trans(trans, root); |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 38bb47e7d6b1..360a728a639f 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
| @@ -306,7 +306,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
| 306 | break; | 306 | break; |
| 307 | } | 307 | } |
| 308 | 308 | ||
| 309 | root->orphan_item_inserted = 1; | 309 | set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); |
| 310 | 310 | ||
| 311 | err = btrfs_insert_fs_root(root->fs_info, root); | 311 | err = btrfs_insert_fs_root(root->fs_info, root); |
| 312 | if (err) { | 312 | if (err) { |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 0be77993378e..ac80188eec88 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
| @@ -588,8 +588,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 588 | 588 | ||
| 589 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 589 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
| 590 | do { | 590 | do { |
| 591 | ret = tree_backref_for_extent(&ptr, eb, ei, item_size, | 591 | ret = tree_backref_for_extent(&ptr, eb, &found_key, ei, |
| 592 | &ref_root, &ref_level); | 592 | item_size, &ref_root, |
| 593 | &ref_level); | ||
| 593 | printk_in_rcu(KERN_WARNING | 594 | printk_in_rcu(KERN_WARNING |
| 594 | "BTRFS: %s at logical %llu on dev %s, " | 595 | "BTRFS: %s at logical %llu on dev %s, " |
| 595 | "sector %llu: metadata %s (level %d) in tree " | 596 | "sector %llu: metadata %s (level %d) in tree " |
| @@ -717,8 +718,8 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) | |||
| 717 | out: | 718 | out: |
| 718 | if (page) | 719 | if (page) |
| 719 | put_page(page); | 720 | put_page(page); |
| 720 | if (inode) | 721 | |
| 721 | iput(inode); | 722 | iput(inode); |
| 722 | 723 | ||
| 723 | if (ret < 0) | 724 | if (ret < 0) |
| 724 | return ret; | 725 | return ret; |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 1ac3ca98c429..6528aa662181 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
| @@ -349,16 +349,24 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) | |||
| 349 | if (p->buf_len >= len) | 349 | if (p->buf_len >= len) |
| 350 | return 0; | 350 | return 0; |
| 351 | 351 | ||
| 352 | if (len > PATH_MAX) { | ||
| 353 | WARN_ON(1); | ||
| 354 | return -ENOMEM; | ||
| 355 | } | ||
| 356 | |||
| 352 | path_len = p->end - p->start; | 357 | path_len = p->end - p->start; |
| 353 | old_buf_len = p->buf_len; | 358 | old_buf_len = p->buf_len; |
| 354 | 359 | ||
| 355 | /* | 360 | /* |
| 356 | * First time the inline_buf does not suffice | 361 | * First time the inline_buf does not suffice |
| 357 | */ | 362 | */ |
| 358 | if (p->buf == p->inline_buf) | 363 | if (p->buf == p->inline_buf) { |
| 359 | tmp_buf = kmalloc(len, GFP_NOFS); | 364 | tmp_buf = kmalloc(len, GFP_NOFS); |
| 360 | else | 365 | if (tmp_buf) |
| 366 | memcpy(tmp_buf, p->buf, old_buf_len); | ||
| 367 | } else { | ||
| 361 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); | 368 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); |
| 369 | } | ||
| 362 | if (!tmp_buf) | 370 | if (!tmp_buf) |
| 363 | return -ENOMEM; | 371 | return -ENOMEM; |
| 364 | p->buf = tmp_buf; | 372 | p->buf = tmp_buf; |
| @@ -967,7 +975,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 967 | struct btrfs_dir_item *di; | 975 | struct btrfs_dir_item *di; |
| 968 | struct btrfs_key di_key; | 976 | struct btrfs_key di_key; |
| 969 | char *buf = NULL; | 977 | char *buf = NULL; |
| 970 | const int buf_len = PATH_MAX; | 978 | int buf_len; |
| 971 | u32 name_len; | 979 | u32 name_len; |
| 972 | u32 data_len; | 980 | u32 data_len; |
| 973 | u32 cur; | 981 | u32 cur; |
| @@ -977,6 +985,11 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 977 | int num; | 985 | int num; |
| 978 | u8 type; | 986 | u8 type; |
| 979 | 987 | ||
| 988 | if (found_key->type == BTRFS_XATTR_ITEM_KEY) | ||
| 989 | buf_len = BTRFS_MAX_XATTR_SIZE(root); | ||
| 990 | else | ||
| 991 | buf_len = PATH_MAX; | ||
| 992 | |||
| 980 | buf = kmalloc(buf_len, GFP_NOFS); | 993 | buf = kmalloc(buf_len, GFP_NOFS); |
| 981 | if (!buf) { | 994 | if (!buf) { |
| 982 | ret = -ENOMEM; | 995 | ret = -ENOMEM; |
| @@ -998,12 +1011,23 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 998 | type = btrfs_dir_type(eb, di); | 1011 | type = btrfs_dir_type(eb, di); |
| 999 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); | 1012 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); |
| 1000 | 1013 | ||
| 1001 | /* | 1014 | if (type == BTRFS_FT_XATTR) { |
| 1002 | * Path too long | 1015 | if (name_len > XATTR_NAME_MAX) { |
| 1003 | */ | 1016 | ret = -ENAMETOOLONG; |
| 1004 | if (name_len + data_len > buf_len) { | 1017 | goto out; |
| 1005 | ret = -ENAMETOOLONG; | 1018 | } |
| 1006 | goto out; | 1019 | if (name_len + data_len > buf_len) { |
| 1020 | ret = -E2BIG; | ||
| 1021 | goto out; | ||
| 1022 | } | ||
| 1023 | } else { | ||
| 1024 | /* | ||
| 1025 | * Path too long | ||
| 1026 | */ | ||
| 1027 | if (name_len + data_len > buf_len) { | ||
| 1028 | ret = -ENAMETOOLONG; | ||
| 1029 | goto out; | ||
| 1030 | } | ||
| 1007 | } | 1031 | } |
| 1008 | 1032 | ||
| 1009 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), | 1033 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), |
| @@ -1341,7 +1365,7 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 1341 | ret = -EIO; | 1365 | ret = -EIO; |
| 1342 | btrfs_err(sctx->send_root->fs_info, "did not find backref in " | 1366 | btrfs_err(sctx->send_root->fs_info, "did not find backref in " |
| 1343 | "send_root. inode=%llu, offset=%llu, " | 1367 | "send_root. inode=%llu, offset=%llu, " |
| 1344 | "disk_byte=%llu found extent=%llu\n", | 1368 | "disk_byte=%llu found extent=%llu", |
| 1345 | ino, data_offset, disk_byte, found_key.objectid); | 1369 | ino, data_offset, disk_byte, found_key.objectid); |
| 1346 | goto out; | 1370 | goto out; |
| 1347 | } | 1371 | } |
| @@ -1620,6 +1644,10 @@ static int lookup_dir_item_inode(struct btrfs_root *root, | |||
| 1620 | goto out; | 1644 | goto out; |
| 1621 | } | 1645 | } |
| 1622 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); | 1646 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); |
| 1647 | if (key.type == BTRFS_ROOT_ITEM_KEY) { | ||
| 1648 | ret = -ENOENT; | ||
| 1649 | goto out; | ||
| 1650 | } | ||
| 1623 | *found_inode = key.objectid; | 1651 | *found_inode = key.objectid; |
| 1624 | *found_type = btrfs_dir_type(path->nodes[0], di); | 1652 | *found_type = btrfs_dir_type(path->nodes[0], di); |
| 1625 | 1653 | ||
| @@ -1663,7 +1691,7 @@ static int get_first_ref(struct btrfs_root *root, u64 ino, | |||
| 1663 | goto out; | 1691 | goto out; |
| 1664 | } | 1692 | } |
| 1665 | 1693 | ||
| 1666 | if (key.type == BTRFS_INODE_REF_KEY) { | 1694 | if (found_key.type == BTRFS_INODE_REF_KEY) { |
| 1667 | struct btrfs_inode_ref *iref; | 1695 | struct btrfs_inode_ref *iref; |
| 1668 | iref = btrfs_item_ptr(path->nodes[0], path->slots[0], | 1696 | iref = btrfs_item_ptr(path->nodes[0], path->slots[0], |
| 1669 | struct btrfs_inode_ref); | 1697 | struct btrfs_inode_ref); |
| @@ -1685,10 +1713,12 @@ static int get_first_ref(struct btrfs_root *root, u64 ino, | |||
| 1685 | goto out; | 1713 | goto out; |
| 1686 | btrfs_release_path(path); | 1714 | btrfs_release_path(path); |
| 1687 | 1715 | ||
| 1688 | ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL, | 1716 | if (dir_gen) { |
| 1689 | NULL, NULL); | 1717 | ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, |
| 1690 | if (ret < 0) | 1718 | NULL, NULL, NULL); |
| 1691 | goto out; | 1719 | if (ret < 0) |
| 1720 | goto out; | ||
| 1721 | } | ||
| 1692 | 1722 | ||
| 1693 | *dir = parent_dir; | 1723 | *dir = parent_dir; |
| 1694 | 1724 | ||
| @@ -1704,13 +1734,12 @@ static int is_first_ref(struct btrfs_root *root, | |||
| 1704 | int ret; | 1734 | int ret; |
| 1705 | struct fs_path *tmp_name; | 1735 | struct fs_path *tmp_name; |
| 1706 | u64 tmp_dir; | 1736 | u64 tmp_dir; |
| 1707 | u64 tmp_dir_gen; | ||
| 1708 | 1737 | ||
| 1709 | tmp_name = fs_path_alloc(); | 1738 | tmp_name = fs_path_alloc(); |
| 1710 | if (!tmp_name) | 1739 | if (!tmp_name) |
| 1711 | return -ENOMEM; | 1740 | return -ENOMEM; |
| 1712 | 1741 | ||
| 1713 | ret = get_first_ref(root, ino, &tmp_dir, &tmp_dir_gen, tmp_name); | 1742 | ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name); |
| 1714 | if (ret < 0) | 1743 | if (ret < 0) |
| 1715 | goto out; | 1744 | goto out; |
| 1716 | 1745 | ||
| @@ -2021,7 +2050,6 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 2021 | { | 2050 | { |
| 2022 | int ret; | 2051 | int ret; |
| 2023 | int nce_ret; | 2052 | int nce_ret; |
| 2024 | struct btrfs_path *path = NULL; | ||
| 2025 | struct name_cache_entry *nce = NULL; | 2053 | struct name_cache_entry *nce = NULL; |
| 2026 | 2054 | ||
| 2027 | /* | 2055 | /* |
| @@ -2047,10 +2075,6 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 2047 | } | 2075 | } |
| 2048 | } | 2076 | } |
| 2049 | 2077 | ||
| 2050 | path = alloc_path_for_send(); | ||
| 2051 | if (!path) | ||
| 2052 | return -ENOMEM; | ||
| 2053 | |||
| 2054 | /* | 2078 | /* |
| 2055 | * If the inode is not existent yet, add the orphan name and return 1. | 2079 | * If the inode is not existent yet, add the orphan name and return 1. |
| 2056 | * This should only happen for the parent dir that we determine in | 2080 | * This should only happen for the parent dir that we determine in |
| @@ -2126,7 +2150,6 @@ out_cache: | |||
| 2126 | name_cache_clean_unused(sctx); | 2150 | name_cache_clean_unused(sctx); |
| 2127 | 2151 | ||
| 2128 | out: | 2152 | out: |
| 2129 | btrfs_free_path(path); | ||
| 2130 | return ret; | 2153 | return ret; |
| 2131 | } | 2154 | } |
| 2132 | 2155 | ||
| @@ -2937,7 +2960,9 @@ static void free_waiting_dir_move(struct send_ctx *sctx, | |||
| 2937 | static int add_pending_dir_move(struct send_ctx *sctx, | 2960 | static int add_pending_dir_move(struct send_ctx *sctx, |
| 2938 | u64 ino, | 2961 | u64 ino, |
| 2939 | u64 ino_gen, | 2962 | u64 ino_gen, |
| 2940 | u64 parent_ino) | 2963 | u64 parent_ino, |
| 2964 | struct list_head *new_refs, | ||
| 2965 | struct list_head *deleted_refs) | ||
| 2941 | { | 2966 | { |
| 2942 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; | 2967 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; |
| 2943 | struct rb_node *parent = NULL; | 2968 | struct rb_node *parent = NULL; |
| @@ -2969,12 +2994,12 @@ static int add_pending_dir_move(struct send_ctx *sctx, | |||
| 2969 | } | 2994 | } |
| 2970 | } | 2995 | } |
| 2971 | 2996 | ||
| 2972 | list_for_each_entry(cur, &sctx->deleted_refs, list) { | 2997 | list_for_each_entry(cur, deleted_refs, list) { |
| 2973 | ret = dup_ref(cur, &pm->update_refs); | 2998 | ret = dup_ref(cur, &pm->update_refs); |
| 2974 | if (ret < 0) | 2999 | if (ret < 0) |
| 2975 | goto out; | 3000 | goto out; |
| 2976 | } | 3001 | } |
| 2977 | list_for_each_entry(cur, &sctx->new_refs, list) { | 3002 | list_for_each_entry(cur, new_refs, list) { |
| 2978 | ret = dup_ref(cur, &pm->update_refs); | 3003 | ret = dup_ref(cur, &pm->update_refs); |
| 2979 | if (ret < 0) | 3004 | if (ret < 0) |
| 2980 | goto out; | 3005 | goto out; |
| @@ -3017,6 +3042,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, | |||
| 3017 | return NULL; | 3042 | return NULL; |
| 3018 | } | 3043 | } |
| 3019 | 3044 | ||
| 3045 | static int path_loop(struct send_ctx *sctx, struct fs_path *name, | ||
| 3046 | u64 ino, u64 gen, u64 *ancestor_ino) | ||
| 3047 | { | ||
| 3048 | int ret = 0; | ||
| 3049 | u64 parent_inode = 0; | ||
| 3050 | u64 parent_gen = 0; | ||
| 3051 | u64 start_ino = ino; | ||
| 3052 | |||
| 3053 | *ancestor_ino = 0; | ||
| 3054 | while (ino != BTRFS_FIRST_FREE_OBJECTID) { | ||
| 3055 | fs_path_reset(name); | ||
| 3056 | |||
| 3057 | if (is_waiting_for_rm(sctx, ino)) | ||
| 3058 | break; | ||
| 3059 | if (is_waiting_for_move(sctx, ino)) { | ||
| 3060 | if (*ancestor_ino == 0) | ||
| 3061 | *ancestor_ino = ino; | ||
| 3062 | ret = get_first_ref(sctx->parent_root, ino, | ||
| 3063 | &parent_inode, &parent_gen, name); | ||
| 3064 | } else { | ||
| 3065 | ret = __get_cur_name_and_parent(sctx, ino, gen, | ||
| 3066 | &parent_inode, | ||
| 3067 | &parent_gen, name); | ||
| 3068 | if (ret > 0) { | ||
| 3069 | ret = 0; | ||
| 3070 | break; | ||
| 3071 | } | ||
| 3072 | } | ||
| 3073 | if (ret < 0) | ||
| 3074 | break; | ||
| 3075 | if (parent_inode == start_ino) { | ||
| 3076 | ret = 1; | ||
| 3077 | if (*ancestor_ino == 0) | ||
| 3078 | *ancestor_ino = ino; | ||
| 3079 | break; | ||
| 3080 | } | ||
| 3081 | ino = parent_inode; | ||
| 3082 | gen = parent_gen; | ||
| 3083 | } | ||
| 3084 | return ret; | ||
| 3085 | } | ||
| 3086 | |||
| 3020 | static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | 3087 | static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) |
| 3021 | { | 3088 | { |
| 3022 | struct fs_path *from_path = NULL; | 3089 | struct fs_path *from_path = NULL; |
| @@ -3028,6 +3095,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
| 3028 | struct waiting_dir_move *dm = NULL; | 3095 | struct waiting_dir_move *dm = NULL; |
| 3029 | u64 rmdir_ino = 0; | 3096 | u64 rmdir_ino = 0; |
| 3030 | int ret; | 3097 | int ret; |
| 3098 | u64 ancestor = 0; | ||
| 3031 | 3099 | ||
| 3032 | name = fs_path_alloc(); | 3100 | name = fs_path_alloc(); |
| 3033 | from_path = fs_path_alloc(); | 3101 | from_path = fs_path_alloc(); |
| @@ -3046,34 +3114,33 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
| 3046 | if (ret < 0) | 3114 | if (ret < 0) |
| 3047 | goto out; | 3115 | goto out; |
| 3048 | 3116 | ||
| 3049 | if (parent_ino == sctx->cur_ino) { | 3117 | ret = get_cur_path(sctx, parent_ino, parent_gen, |
| 3050 | /* child only renamed, not moved */ | 3118 | from_path); |
| 3051 | ASSERT(parent_gen == sctx->cur_inode_gen); | 3119 | if (ret < 0) |
| 3052 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, | 3120 | goto out; |
| 3053 | from_path); | 3121 | ret = fs_path_add_path(from_path, name); |
| 3054 | if (ret < 0) | 3122 | if (ret < 0) |
| 3055 | goto out; | 3123 | goto out; |
| 3056 | ret = fs_path_add_path(from_path, name); | 3124 | |
| 3057 | if (ret < 0) | 3125 | sctx->send_progress = sctx->cur_ino + 1; |
| 3058 | goto out; | 3126 | ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor); |
| 3059 | } else { | 3127 | if (ret) { |
| 3060 | /* child moved and maybe renamed too */ | 3128 | LIST_HEAD(deleted_refs); |
| 3061 | sctx->send_progress = pm->ino; | 3129 | ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); |
| 3062 | ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); | 3130 | ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, |
| 3131 | &pm->update_refs, &deleted_refs); | ||
| 3063 | if (ret < 0) | 3132 | if (ret < 0) |
| 3064 | goto out; | 3133 | goto out; |
| 3065 | } | 3134 | if (rmdir_ino) { |
| 3066 | 3135 | dm = get_waiting_dir_move(sctx, pm->ino); | |
| 3067 | fs_path_free(name); | 3136 | ASSERT(dm); |
| 3068 | name = NULL; | 3137 | dm->rmdir_ino = rmdir_ino; |
| 3069 | 3138 | } | |
| 3070 | to_path = fs_path_alloc(); | ||
| 3071 | if (!to_path) { | ||
| 3072 | ret = -ENOMEM; | ||
| 3073 | goto out; | 3139 | goto out; |
| 3074 | } | 3140 | } |
| 3075 | 3141 | fs_path_reset(name); | |
| 3076 | sctx->send_progress = sctx->cur_ino + 1; | 3142 | to_path = name; |
| 3143 | name = NULL; | ||
| 3077 | ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); | 3144 | ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); |
| 3078 | if (ret < 0) | 3145 | if (ret < 0) |
| 3079 | goto out; | 3146 | goto out; |
| @@ -3197,127 +3264,74 @@ out: | |||
| 3197 | static int wait_for_parent_move(struct send_ctx *sctx, | 3264 | static int wait_for_parent_move(struct send_ctx *sctx, |
| 3198 | struct recorded_ref *parent_ref) | 3265 | struct recorded_ref *parent_ref) |
| 3199 | { | 3266 | { |
| 3200 | int ret; | 3267 | int ret = 0; |
| 3201 | u64 ino = parent_ref->dir; | 3268 | u64 ino = parent_ref->dir; |
| 3202 | u64 parent_ino_before, parent_ino_after; | 3269 | u64 parent_ino_before, parent_ino_after; |
| 3203 | u64 old_gen; | ||
| 3204 | struct fs_path *path_before = NULL; | 3270 | struct fs_path *path_before = NULL; |
| 3205 | struct fs_path *path_after = NULL; | 3271 | struct fs_path *path_after = NULL; |
| 3206 | int len1, len2; | 3272 | int len1, len2; |
| 3207 | int register_upper_dirs; | ||
| 3208 | u64 gen; | ||
| 3209 | |||
| 3210 | if (is_waiting_for_move(sctx, ino)) | ||
| 3211 | return 1; | ||
| 3212 | |||
| 3213 | if (parent_ref->dir <= sctx->cur_ino) | ||
| 3214 | return 0; | ||
| 3215 | |||
| 3216 | ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, | ||
| 3217 | NULL, NULL, NULL, NULL); | ||
| 3218 | if (ret == -ENOENT) | ||
| 3219 | return 0; | ||
| 3220 | else if (ret < 0) | ||
| 3221 | return ret; | ||
| 3222 | |||
| 3223 | if (parent_ref->dir_gen != old_gen) | ||
| 3224 | return 0; | ||
| 3225 | |||
| 3226 | path_before = fs_path_alloc(); | ||
| 3227 | if (!path_before) | ||
| 3228 | return -ENOMEM; | ||
| 3229 | |||
| 3230 | ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, | ||
| 3231 | NULL, path_before); | ||
| 3232 | if (ret == -ENOENT) { | ||
| 3233 | ret = 0; | ||
| 3234 | goto out; | ||
| 3235 | } else if (ret < 0) { | ||
| 3236 | goto out; | ||
| 3237 | } | ||
| 3238 | 3273 | ||
| 3239 | path_after = fs_path_alloc(); | 3274 | path_after = fs_path_alloc(); |
| 3240 | if (!path_after) { | 3275 | path_before = fs_path_alloc(); |
| 3276 | if (!path_after || !path_before) { | ||
| 3241 | ret = -ENOMEM; | 3277 | ret = -ENOMEM; |
| 3242 | goto out; | 3278 | goto out; |
| 3243 | } | 3279 | } |
| 3244 | 3280 | ||
| 3245 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, | ||
| 3246 | &gen, path_after); | ||
| 3247 | if (ret == -ENOENT) { | ||
| 3248 | ret = 0; | ||
| 3249 | goto out; | ||
| 3250 | } else if (ret < 0) { | ||
| 3251 | goto out; | ||
| 3252 | } | ||
| 3253 | |||
| 3254 | len1 = fs_path_len(path_before); | ||
| 3255 | len2 = fs_path_len(path_after); | ||
| 3256 | if (parent_ino_before != parent_ino_after || len1 != len2 || | ||
| 3257 | memcmp(path_before->start, path_after->start, len1)) { | ||
| 3258 | ret = 1; | ||
| 3259 | goto out; | ||
| 3260 | } | ||
| 3261 | ret = 0; | ||
| 3262 | |||
| 3263 | /* | 3281 | /* |
| 3264 | * Ok, our new most direct ancestor has a higher inode number but | 3282 | * Our current directory inode may not yet be renamed/moved because some |
| 3265 | * wasn't moved/renamed. So maybe some of the new ancestors higher in | 3283 | * ancestor (immediate or not) has to be renamed/moved first. So find if |
| 3266 | * the hierarchy have an higher inode number too *and* were renamed | 3284 | * such ancestor exists and make sure our own rename/move happens after |
| 3267 | * or moved - in this case we need to wait for the ancestor's rename | 3285 | * that ancestor is processed. |
| 3268 | * or move operation before we can do the move/rename for the current | ||
| 3269 | * inode. | ||
| 3270 | */ | 3286 | */ |
| 3271 | register_upper_dirs = 0; | 3287 | while (ino > BTRFS_FIRST_FREE_OBJECTID) { |
| 3272 | ino = parent_ino_after; | 3288 | if (is_waiting_for_move(sctx, ino)) { |
| 3273 | again: | 3289 | ret = 1; |
| 3274 | while ((ret == 0 || register_upper_dirs) && ino > sctx->cur_ino) { | 3290 | break; |
| 3275 | u64 parent_gen; | 3291 | } |
| 3276 | 3292 | ||
| 3277 | fs_path_reset(path_before); | 3293 | fs_path_reset(path_before); |
| 3278 | fs_path_reset(path_after); | 3294 | fs_path_reset(path_after); |
| 3279 | 3295 | ||
| 3280 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, | 3296 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, |
| 3281 | &parent_gen, path_after); | 3297 | NULL, path_after); |
| 3282 | if (ret < 0) | 3298 | if (ret < 0) |
| 3283 | goto out; | 3299 | goto out; |
| 3284 | ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, | 3300 | ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, |
| 3285 | NULL, path_before); | 3301 | NULL, path_before); |
| 3286 | if (ret == -ENOENT) { | 3302 | if (ret < 0 && ret != -ENOENT) { |
| 3287 | ret = 0; | ||
| 3288 | break; | ||
| 3289 | } else if (ret < 0) { | ||
| 3290 | goto out; | 3303 | goto out; |
| 3304 | } else if (ret == -ENOENT) { | ||
| 3305 | ret = 1; | ||
| 3306 | break; | ||
| 3291 | } | 3307 | } |
| 3292 | 3308 | ||
| 3293 | len1 = fs_path_len(path_before); | 3309 | len1 = fs_path_len(path_before); |
| 3294 | len2 = fs_path_len(path_after); | 3310 | len2 = fs_path_len(path_after); |
| 3295 | if (parent_ino_before != parent_ino_after || len1 != len2 || | 3311 | if (ino > sctx->cur_ino && |
| 3296 | memcmp(path_before->start, path_after->start, len1)) { | 3312 | (parent_ino_before != parent_ino_after || len1 != len2 || |
| 3313 | memcmp(path_before->start, path_after->start, len1))) { | ||
| 3297 | ret = 1; | 3314 | ret = 1; |
| 3298 | if (register_upper_dirs) { | 3315 | break; |
| 3299 | break; | ||
| 3300 | } else { | ||
| 3301 | register_upper_dirs = 1; | ||
| 3302 | ino = parent_ref->dir; | ||
| 3303 | gen = parent_ref->dir_gen; | ||
| 3304 | goto again; | ||
| 3305 | } | ||
| 3306 | } else if (register_upper_dirs) { | ||
| 3307 | ret = add_pending_dir_move(sctx, ino, gen, | ||
| 3308 | parent_ino_after); | ||
| 3309 | if (ret < 0 && ret != -EEXIST) | ||
| 3310 | goto out; | ||
| 3311 | } | 3316 | } |
| 3312 | |||
| 3313 | ino = parent_ino_after; | 3317 | ino = parent_ino_after; |
| 3314 | gen = parent_gen; | ||
| 3315 | } | 3318 | } |
| 3316 | 3319 | ||
| 3317 | out: | 3320 | out: |
| 3318 | fs_path_free(path_before); | 3321 | fs_path_free(path_before); |
| 3319 | fs_path_free(path_after); | 3322 | fs_path_free(path_after); |
| 3320 | 3323 | ||
| 3324 | if (ret == 1) { | ||
| 3325 | ret = add_pending_dir_move(sctx, | ||
| 3326 | sctx->cur_ino, | ||
| 3327 | sctx->cur_inode_gen, | ||
| 3328 | ino, | ||
| 3329 | &sctx->new_refs, | ||
| 3330 | &sctx->deleted_refs); | ||
| 3331 | if (!ret) | ||
| 3332 | ret = 1; | ||
| 3333 | } | ||
| 3334 | |||
| 3321 | return ret; | 3335 | return ret; |
| 3322 | } | 3336 | } |
| 3323 | 3337 | ||
| @@ -3478,10 +3492,6 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 3478 | if (ret < 0) | 3492 | if (ret < 0) |
| 3479 | goto out; | 3493 | goto out; |
| 3480 | if (ret) { | 3494 | if (ret) { |
| 3481 | ret = add_pending_dir_move(sctx, | ||
| 3482 | sctx->cur_ino, | ||
| 3483 | sctx->cur_inode_gen, | ||
| 3484 | cur->dir); | ||
| 3485 | *pending_move = 1; | 3495 | *pending_move = 1; |
| 3486 | } else { | 3496 | } else { |
| 3487 | ret = send_rename(sctx, valid_path, | 3497 | ret = send_rename(sctx, valid_path, |
| @@ -5482,7 +5492,7 @@ static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) | |||
| 5482 | */ | 5492 | */ |
| 5483 | if (root->send_in_progress < 0) | 5493 | if (root->send_in_progress < 0) |
| 5484 | btrfs_err(root->fs_info, | 5494 | btrfs_err(root->fs_info, |
| 5485 | "send_in_progres unbalanced %d root %llu\n", | 5495 | "send_in_progres unbalanced %d root %llu", |
| 5486 | root->send_in_progress, root->root_key.objectid); | 5496 | root->send_in_progress, root->root_key.objectid); |
| 5487 | spin_unlock(&root->root_item_lock); | 5497 | spin_unlock(&root->root_item_lock); |
| 5488 | } | 5498 | } |
| @@ -5510,7 +5520,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
| 5510 | 5520 | ||
| 5511 | /* | 5521 | /* |
| 5512 | * The subvolume must remain read-only during send, protect against | 5522 | * The subvolume must remain read-only during send, protect against |
| 5513 | * making it RW. | 5523 | * making it RW. This also protects against deletion. |
| 5514 | */ | 5524 | */ |
| 5515 | spin_lock(&send_root->root_item_lock); | 5525 | spin_lock(&send_root->root_item_lock); |
| 5516 | send_root->send_in_progress++; | 5526 | send_root->send_in_progress++; |
| @@ -5570,6 +5580,15 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
| 5570 | } | 5580 | } |
| 5571 | 5581 | ||
| 5572 | sctx->send_root = send_root; | 5582 | sctx->send_root = send_root; |
| 5583 | /* | ||
| 5584 | * Unlikely but possible, if the subvolume is marked for deletion but | ||
| 5585 | * is slow to remove the directory entry, send can still be started | ||
| 5586 | */ | ||
| 5587 | if (btrfs_root_dead(sctx->send_root)) { | ||
| 5588 | ret = -EPERM; | ||
| 5589 | goto out; | ||
| 5590 | } | ||
| 5591 | |||
| 5573 | sctx->clone_roots_cnt = arg->clone_sources_count; | 5592 | sctx->clone_roots_cnt = arg->clone_sources_count; |
| 5574 | 5593 | ||
| 5575 | sctx->send_max_size = BTRFS_SEND_BUF_SIZE; | 5594 | sctx->send_max_size = BTRFS_SEND_BUF_SIZE; |
| @@ -5659,7 +5678,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
| 5659 | 5678 | ||
| 5660 | spin_lock(&sctx->parent_root->root_item_lock); | 5679 | spin_lock(&sctx->parent_root->root_item_lock); |
| 5661 | sctx->parent_root->send_in_progress++; | 5680 | sctx->parent_root->send_in_progress++; |
| 5662 | if (!btrfs_root_readonly(sctx->parent_root)) { | 5681 | if (!btrfs_root_readonly(sctx->parent_root) || |
| 5682 | btrfs_root_dead(sctx->parent_root)) { | ||
| 5663 | spin_unlock(&sctx->parent_root->root_item_lock); | 5683 | spin_unlock(&sctx->parent_root->root_item_lock); |
| 5664 | srcu_read_unlock(&fs_info->subvol_srcu, index); | 5684 | srcu_read_unlock(&fs_info->subvol_srcu, index); |
| 5665 | ret = -EPERM; | 5685 | ret = -EPERM; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5011aadacab8..4662d92a4b73 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -385,20 +385,6 @@ static match_table_t tokens = { | |||
| 385 | {Opt_err, NULL}, | 385 | {Opt_err, NULL}, |
| 386 | }; | 386 | }; |
| 387 | 387 | ||
| 388 | #define btrfs_set_and_info(root, opt, fmt, args...) \ | ||
| 389 | { \ | ||
| 390 | if (!btrfs_test_opt(root, opt)) \ | ||
| 391 | btrfs_info(root->fs_info, fmt, ##args); \ | ||
| 392 | btrfs_set_opt(root->fs_info->mount_opt, opt); \ | ||
| 393 | } | ||
| 394 | |||
| 395 | #define btrfs_clear_and_info(root, opt, fmt, args...) \ | ||
| 396 | { \ | ||
| 397 | if (btrfs_test_opt(root, opt)) \ | ||
| 398 | btrfs_info(root->fs_info, fmt, ##args); \ | ||
| 399 | btrfs_clear_opt(root->fs_info->mount_opt, opt); \ | ||
| 400 | } | ||
| 401 | |||
| 402 | /* | 388 | /* |
| 403 | * Regular mount options parser. Everything that is needed only when | 389 | * Regular mount options parser. Everything that is needed only when |
| 404 | * reading in a new superblock is parsed here. | 390 | * reading in a new superblock is parsed here. |
| @@ -525,7 +511,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 525 | } else if (compress) { | 511 | } else if (compress) { |
| 526 | if (!btrfs_test_opt(root, COMPRESS)) | 512 | if (!btrfs_test_opt(root, COMPRESS)) |
| 527 | btrfs_info(root->fs_info, | 513 | btrfs_info(root->fs_info, |
| 528 | "btrfs: use %s compression\n", | 514 | "btrfs: use %s compression", |
| 529 | compress_type); | 515 | compress_type); |
| 530 | } | 516 | } |
| 531 | break; | 517 | break; |
| @@ -594,8 +580,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 594 | } | 580 | } |
| 595 | break; | 581 | break; |
| 596 | case Opt_acl: | 582 | case Opt_acl: |
| 583 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | ||
| 597 | root->fs_info->sb->s_flags |= MS_POSIXACL; | 584 | root->fs_info->sb->s_flags |= MS_POSIXACL; |
| 598 | break; | 585 | break; |
| 586 | #else | ||
| 587 | btrfs_err(root->fs_info, | ||
| 588 | "support for ACL not compiled in!"); | ||
| 589 | ret = -EINVAL; | ||
| 590 | goto out; | ||
| 591 | #endif | ||
| 599 | case Opt_noacl: | 592 | case Opt_noacl: |
| 600 | root->fs_info->sb->s_flags &= ~MS_POSIXACL; | 593 | root->fs_info->sb->s_flags &= ~MS_POSIXACL; |
| 601 | break; | 594 | break; |
| @@ -1186,7 +1179,6 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags, | |||
| 1186 | return ERR_PTR(-ENOMEM); | 1179 | return ERR_PTR(-ENOMEM); |
| 1187 | mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, | 1180 | mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, |
| 1188 | newargs); | 1181 | newargs); |
| 1189 | kfree(newargs); | ||
| 1190 | 1182 | ||
| 1191 | if (PTR_RET(mnt) == -EBUSY) { | 1183 | if (PTR_RET(mnt) == -EBUSY) { |
| 1192 | if (flags & MS_RDONLY) { | 1184 | if (flags & MS_RDONLY) { |
| @@ -1196,17 +1188,22 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags, | |||
| 1196 | int r; | 1188 | int r; |
| 1197 | mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name, | 1189 | mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name, |
| 1198 | newargs); | 1190 | newargs); |
| 1199 | if (IS_ERR(mnt)) | 1191 | if (IS_ERR(mnt)) { |
| 1192 | kfree(newargs); | ||
| 1200 | return ERR_CAST(mnt); | 1193 | return ERR_CAST(mnt); |
| 1194 | } | ||
| 1201 | 1195 | ||
| 1202 | r = btrfs_remount(mnt->mnt_sb, &flags, NULL); | 1196 | r = btrfs_remount(mnt->mnt_sb, &flags, NULL); |
| 1203 | if (r < 0) { | 1197 | if (r < 0) { |
| 1204 | /* FIXME: release vfsmount mnt ??*/ | 1198 | /* FIXME: release vfsmount mnt ??*/ |
| 1199 | kfree(newargs); | ||
| 1205 | return ERR_PTR(r); | 1200 | return ERR_PTR(r); |
| 1206 | } | 1201 | } |
| 1207 | } | 1202 | } |
| 1208 | } | 1203 | } |
| 1209 | 1204 | ||
| 1205 | kfree(newargs); | ||
| 1206 | |||
| 1210 | if (IS_ERR(mnt)) | 1207 | if (IS_ERR(mnt)) |
| 1211 | return ERR_CAST(mnt); | 1208 | return ERR_CAST(mnt); |
| 1212 | 1209 | ||
| @@ -1423,6 +1420,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1423 | * this also happens on 'umount -rf' or on shutdown, when | 1420 | * this also happens on 'umount -rf' or on shutdown, when |
| 1424 | * the filesystem is busy. | 1421 | * the filesystem is busy. |
| 1425 | */ | 1422 | */ |
| 1423 | cancel_work_sync(&fs_info->async_reclaim_work); | ||
| 1426 | 1424 | ||
| 1427 | /* wait for the uuid_scan task to finish */ | 1425 | /* wait for the uuid_scan task to finish */ |
| 1428 | down(&fs_info->uuid_tree_rescan_sem); | 1426 | down(&fs_info->uuid_tree_rescan_sem); |
| @@ -1904,6 +1902,9 @@ static int btrfs_run_sanity_tests(void) | |||
| 1904 | if (ret) | 1902 | if (ret) |
| 1905 | goto out; | 1903 | goto out; |
| 1906 | ret = btrfs_test_inodes(); | 1904 | ret = btrfs_test_inodes(); |
| 1905 | if (ret) | ||
| 1906 | goto out; | ||
| 1907 | ret = btrfs_test_qgroups(); | ||
| 1907 | out: | 1908 | out: |
| 1908 | btrfs_destroy_test_fs(); | 1909 | btrfs_destroy_test_fs(); |
| 1909 | return ret; | 1910 | return ret; |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index c5eb2143dc66..df39458f1487 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
| @@ -254,6 +254,7 @@ static ssize_t global_rsv_reserved_show(struct kobject *kobj, | |||
| 254 | BTRFS_ATTR(global_rsv_reserved, 0444, global_rsv_reserved_show); | 254 | BTRFS_ATTR(global_rsv_reserved, 0444, global_rsv_reserved_show); |
| 255 | 255 | ||
| 256 | #define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj) | 256 | #define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj) |
| 257 | #define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj) | ||
| 257 | 258 | ||
| 258 | static ssize_t raid_bytes_show(struct kobject *kobj, | 259 | static ssize_t raid_bytes_show(struct kobject *kobj, |
| 259 | struct kobj_attribute *attr, char *buf); | 260 | struct kobj_attribute *attr, char *buf); |
| @@ -266,7 +267,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj, | |||
| 266 | { | 267 | { |
| 267 | struct btrfs_space_info *sinfo = to_space_info(kobj->parent); | 268 | struct btrfs_space_info *sinfo = to_space_info(kobj->parent); |
| 268 | struct btrfs_block_group_cache *block_group; | 269 | struct btrfs_block_group_cache *block_group; |
| 269 | int index = kobj - sinfo->block_group_kobjs; | 270 | int index = to_raid_kobj(kobj)->raid_type; |
| 270 | u64 val = 0; | 271 | u64 val = 0; |
| 271 | 272 | ||
| 272 | down_read(&sinfo->groups_sem); | 273 | down_read(&sinfo->groups_sem); |
| @@ -288,7 +289,7 @@ static struct attribute *raid_attributes[] = { | |||
| 288 | 289 | ||
| 289 | static void release_raid_kobj(struct kobject *kobj) | 290 | static void release_raid_kobj(struct kobject *kobj) |
| 290 | { | 291 | { |
| 291 | kobject_put(kobj->parent); | 292 | kfree(to_raid_kobj(kobj)); |
| 292 | } | 293 | } |
| 293 | 294 | ||
| 294 | struct kobj_type btrfs_raid_ktype = { | 295 | struct kobj_type btrfs_raid_ktype = { |
| @@ -374,11 +375,8 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
| 374 | struct btrfs_root *root = fs_info->fs_root; | 375 | struct btrfs_root *root = fs_info->fs_root; |
| 375 | int ret; | 376 | int ret; |
| 376 | 377 | ||
| 377 | if (len >= BTRFS_LABEL_SIZE) { | 378 | if (len >= BTRFS_LABEL_SIZE) |
| 378 | pr_err("BTRFS: unable to set label with more than %d bytes\n", | ||
| 379 | BTRFS_LABEL_SIZE - 1); | ||
| 380 | return -EINVAL; | 379 | return -EINVAL; |
| 381 | } | ||
| 382 | 380 | ||
| 383 | trans = btrfs_start_transaction(root, 0); | 381 | trans = btrfs_start_transaction(root, 0); |
| 384 | if (IS_ERR(trans)) | 382 | if (IS_ERR(trans)) |
| @@ -396,8 +394,48 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
| 396 | } | 394 | } |
| 397 | BTRFS_ATTR_RW(label, 0644, btrfs_label_show, btrfs_label_store); | 395 | BTRFS_ATTR_RW(label, 0644, btrfs_label_show, btrfs_label_store); |
| 398 | 396 | ||
| 397 | static ssize_t btrfs_no_store(struct kobject *kobj, | ||
| 398 | struct kobj_attribute *a, | ||
| 399 | const char *buf, size_t len) | ||
| 400 | { | ||
| 401 | return -EPERM; | ||
| 402 | } | ||
| 403 | |||
| 404 | static ssize_t btrfs_nodesize_show(struct kobject *kobj, | ||
| 405 | struct kobj_attribute *a, char *buf) | ||
| 406 | { | ||
| 407 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | ||
| 408 | |||
| 409 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize); | ||
| 410 | } | ||
| 411 | |||
| 412 | BTRFS_ATTR_RW(nodesize, 0444, btrfs_nodesize_show, btrfs_no_store); | ||
| 413 | |||
| 414 | static ssize_t btrfs_sectorsize_show(struct kobject *kobj, | ||
| 415 | struct kobj_attribute *a, char *buf) | ||
| 416 | { | ||
| 417 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | ||
| 418 | |||
| 419 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); | ||
| 420 | } | ||
| 421 | |||
| 422 | BTRFS_ATTR_RW(sectorsize, 0444, btrfs_sectorsize_show, btrfs_no_store); | ||
| 423 | |||
| 424 | static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, | ||
| 425 | struct kobj_attribute *a, char *buf) | ||
| 426 | { | ||
| 427 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | ||
| 428 | |||
| 429 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); | ||
| 430 | } | ||
| 431 | |||
| 432 | BTRFS_ATTR_RW(clone_alignment, 0444, btrfs_clone_alignment_show, btrfs_no_store); | ||
| 433 | |||
| 399 | static struct attribute *btrfs_attrs[] = { | 434 | static struct attribute *btrfs_attrs[] = { |
| 400 | BTRFS_ATTR_PTR(label), | 435 | BTRFS_ATTR_PTR(label), |
| 436 | BTRFS_ATTR_PTR(nodesize), | ||
| 437 | BTRFS_ATTR_PTR(sectorsize), | ||
| 438 | BTRFS_ATTR_PTR(clone_alignment), | ||
| 401 | NULL, | 439 | NULL, |
| 402 | }; | 440 | }; |
| 403 | 441 | ||
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 757ef00a75a4..9626252ee6b4 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c | |||
| @@ -21,6 +21,9 @@ | |||
| 21 | #include <linux/magic.h> | 21 | #include <linux/magic.h> |
| 22 | #include "btrfs-tests.h" | 22 | #include "btrfs-tests.h" |
| 23 | #include "../ctree.h" | 23 | #include "../ctree.h" |
| 24 | #include "../volumes.h" | ||
| 25 | #include "../disk-io.h" | ||
| 26 | #include "../qgroup.h" | ||
| 24 | 27 | ||
| 25 | static struct vfsmount *test_mnt = NULL; | 28 | static struct vfsmount *test_mnt = NULL; |
| 26 | 29 | ||
| @@ -72,3 +75,97 @@ void btrfs_destroy_test_fs(void) | |||
| 72 | kern_unmount(test_mnt); | 75 | kern_unmount(test_mnt); |
| 73 | unregister_filesystem(&test_type); | 76 | unregister_filesystem(&test_type); |
| 74 | } | 77 | } |
| 78 | |||
| 79 | struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void) | ||
| 80 | { | ||
| 81 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info), | ||
| 82 | GFP_NOFS); | ||
| 83 | |||
| 84 | if (!fs_info) | ||
| 85 | return fs_info; | ||
| 86 | fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices), | ||
| 87 | GFP_NOFS); | ||
| 88 | if (!fs_info->fs_devices) { | ||
| 89 | kfree(fs_info); | ||
| 90 | return NULL; | ||
| 91 | } | ||
| 92 | fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block), | ||
| 93 | GFP_NOFS); | ||
| 94 | if (!fs_info->super_copy) { | ||
| 95 | kfree(fs_info->fs_devices); | ||
| 96 | kfree(fs_info); | ||
| 97 | return NULL; | ||
| 98 | } | ||
| 99 | |||
| 100 | if (init_srcu_struct(&fs_info->subvol_srcu)) { | ||
| 101 | kfree(fs_info->fs_devices); | ||
| 102 | kfree(fs_info->super_copy); | ||
| 103 | kfree(fs_info); | ||
| 104 | return NULL; | ||
| 105 | } | ||
| 106 | |||
| 107 | spin_lock_init(&fs_info->buffer_lock); | ||
| 108 | spin_lock_init(&fs_info->qgroup_lock); | ||
| 109 | spin_lock_init(&fs_info->qgroup_op_lock); | ||
| 110 | spin_lock_init(&fs_info->super_lock); | ||
| 111 | spin_lock_init(&fs_info->fs_roots_radix_lock); | ||
| 112 | spin_lock_init(&fs_info->tree_mod_seq_lock); | ||
| 113 | mutex_init(&fs_info->qgroup_ioctl_lock); | ||
| 114 | mutex_init(&fs_info->qgroup_rescan_lock); | ||
| 115 | rwlock_init(&fs_info->tree_mod_log_lock); | ||
| 116 | fs_info->running_transaction = NULL; | ||
| 117 | fs_info->qgroup_tree = RB_ROOT; | ||
| 118 | fs_info->qgroup_ulist = NULL; | ||
| 119 | atomic64_set(&fs_info->tree_mod_seq, 0); | ||
| 120 | INIT_LIST_HEAD(&fs_info->dirty_qgroups); | ||
| 121 | INIT_LIST_HEAD(&fs_info->dead_roots); | ||
| 122 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); | ||
| 123 | INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); | ||
| 124 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | ||
| 125 | return fs_info; | ||
| 126 | } | ||
| 127 | |||
| 128 | static void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) | ||
| 129 | { | ||
| 130 | struct radix_tree_iter iter; | ||
| 131 | void **slot; | ||
| 132 | |||
| 133 | spin_lock(&fs_info->buffer_lock); | ||
| 134 | restart: | ||
| 135 | radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { | ||
| 136 | struct extent_buffer *eb; | ||
| 137 | |||
| 138 | eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock); | ||
| 139 | if (!eb) | ||
| 140 | continue; | ||
| 141 | /* Shouldn't happen but that kind of thinking creates CVE's */ | ||
| 142 | if (radix_tree_exception(eb)) { | ||
| 143 | if (radix_tree_deref_retry(eb)) | ||
| 144 | goto restart; | ||
| 145 | continue; | ||
| 146 | } | ||
| 147 | spin_unlock(&fs_info->buffer_lock); | ||
| 148 | free_extent_buffer_stale(eb); | ||
| 149 | spin_lock(&fs_info->buffer_lock); | ||
| 150 | } | ||
| 151 | spin_unlock(&fs_info->buffer_lock); | ||
| 152 | |||
| 153 | btrfs_free_qgroup_config(fs_info); | ||
| 154 | btrfs_free_fs_roots(fs_info); | ||
| 155 | cleanup_srcu_struct(&fs_info->subvol_srcu); | ||
| 156 | kfree(fs_info->super_copy); | ||
| 157 | kfree(fs_info->fs_devices); | ||
| 158 | kfree(fs_info); | ||
| 159 | } | ||
| 160 | |||
| 161 | void btrfs_free_dummy_root(struct btrfs_root *root) | ||
| 162 | { | ||
| 163 | if (!root) | ||
| 164 | return; | ||
| 165 | if (root->node) | ||
| 166 | free_extent_buffer(root->node); | ||
| 167 | if (root->fs_info) | ||
| 168 | btrfs_free_dummy_fs_info(root->fs_info); | ||
| 169 | kfree(root); | ||
| 170 | } | ||
| 171 | |||
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h index 312560a9123d..fd3954224480 100644 --- a/fs/btrfs/tests/btrfs-tests.h +++ b/fs/btrfs/tests/btrfs-tests.h | |||
| @@ -23,13 +23,18 @@ | |||
| 23 | 23 | ||
| 24 | #define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__) | 24 | #define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__) |
| 25 | 25 | ||
| 26 | struct btrfs_root; | ||
| 27 | |||
| 26 | int btrfs_test_free_space_cache(void); | 28 | int btrfs_test_free_space_cache(void); |
| 27 | int btrfs_test_extent_buffer_operations(void); | 29 | int btrfs_test_extent_buffer_operations(void); |
| 28 | int btrfs_test_extent_io(void); | 30 | int btrfs_test_extent_io(void); |
| 29 | int btrfs_test_inodes(void); | 31 | int btrfs_test_inodes(void); |
| 32 | int btrfs_test_qgroups(void); | ||
| 30 | int btrfs_init_test_fs(void); | 33 | int btrfs_init_test_fs(void); |
| 31 | void btrfs_destroy_test_fs(void); | 34 | void btrfs_destroy_test_fs(void); |
| 32 | struct inode *btrfs_new_test_inode(void); | 35 | struct inode *btrfs_new_test_inode(void); |
| 36 | struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void); | ||
| 37 | void btrfs_free_dummy_root(struct btrfs_root *root); | ||
| 33 | #else | 38 | #else |
| 34 | static inline int btrfs_test_free_space_cache(void) | 39 | static inline int btrfs_test_free_space_cache(void) |
| 35 | { | 40 | { |
| @@ -54,6 +59,10 @@ static inline int btrfs_test_inodes(void) | |||
| 54 | { | 59 | { |
| 55 | return 0; | 60 | return 0; |
| 56 | } | 61 | } |
| 62 | static inline int btrfs_test_qgroups(void) | ||
| 63 | { | ||
| 64 | return 0; | ||
| 65 | } | ||
| 57 | #endif | 66 | #endif |
| 58 | 67 | ||
| 59 | #endif | 68 | #endif |
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 397d1f99a8eb..3ae0f5b8bb80 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c | |||
| @@ -23,33 +23,6 @@ | |||
| 23 | #include "../extent_io.h" | 23 | #include "../extent_io.h" |
| 24 | #include "../volumes.h" | 24 | #include "../volumes.h" |
| 25 | 25 | ||
| 26 | static struct btrfs_fs_info *alloc_dummy_fs_info(void) | ||
| 27 | { | ||
| 28 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info), | ||
| 29 | GFP_NOFS); | ||
| 30 | if (!fs_info) | ||
| 31 | return fs_info; | ||
| 32 | fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices), | ||
| 33 | GFP_NOFS); | ||
| 34 | if (!fs_info->fs_devices) { | ||
| 35 | kfree(fs_info); | ||
| 36 | return NULL; | ||
| 37 | } | ||
| 38 | return fs_info; | ||
| 39 | } | ||
| 40 | static void free_dummy_root(struct btrfs_root *root) | ||
| 41 | { | ||
| 42 | if (!root) | ||
| 43 | return; | ||
| 44 | if (root->fs_info) { | ||
| 45 | kfree(root->fs_info->fs_devices); | ||
| 46 | kfree(root->fs_info); | ||
| 47 | } | ||
| 48 | if (root->node) | ||
| 49 | free_extent_buffer(root->node); | ||
| 50 | kfree(root); | ||
| 51 | } | ||
| 52 | |||
| 53 | static void insert_extent(struct btrfs_root *root, u64 start, u64 len, | 26 | static void insert_extent(struct btrfs_root *root, u64 start, u64 len, |
| 54 | u64 ram_bytes, u64 offset, u64 disk_bytenr, | 27 | u64 ram_bytes, u64 offset, u64 disk_bytenr, |
| 55 | u64 disk_len, u32 type, u8 compression, int slot) | 28 | u64 disk_len, u32 type, u8 compression, int slot) |
| @@ -276,7 +249,7 @@ static noinline int test_btrfs_get_extent(void) | |||
| 276 | * We do this since btrfs_get_extent wants to assign em->bdev to | 249 | * We do this since btrfs_get_extent wants to assign em->bdev to |
| 277 | * root->fs_info->fs_devices->latest_bdev. | 250 | * root->fs_info->fs_devices->latest_bdev. |
| 278 | */ | 251 | */ |
| 279 | root->fs_info = alloc_dummy_fs_info(); | 252 | root->fs_info = btrfs_alloc_dummy_fs_info(); |
| 280 | if (!root->fs_info) { | 253 | if (!root->fs_info) { |
| 281 | test_msg("Couldn't allocate dummy fs info\n"); | 254 | test_msg("Couldn't allocate dummy fs info\n"); |
| 282 | goto out; | 255 | goto out; |
| @@ -837,7 +810,7 @@ out: | |||
| 837 | if (!IS_ERR(em)) | 810 | if (!IS_ERR(em)) |
| 838 | free_extent_map(em); | 811 | free_extent_map(em); |
| 839 | iput(inode); | 812 | iput(inode); |
| 840 | free_dummy_root(root); | 813 | btrfs_free_dummy_root(root); |
| 841 | return ret; | 814 | return ret; |
| 842 | } | 815 | } |
| 843 | 816 | ||
| @@ -864,7 +837,7 @@ static int test_hole_first(void) | |||
| 864 | goto out; | 837 | goto out; |
| 865 | } | 838 | } |
| 866 | 839 | ||
| 867 | root->fs_info = alloc_dummy_fs_info(); | 840 | root->fs_info = btrfs_alloc_dummy_fs_info(); |
| 868 | if (!root->fs_info) { | 841 | if (!root->fs_info) { |
| 869 | test_msg("Couldn't allocate dummy fs info\n"); | 842 | test_msg("Couldn't allocate dummy fs info\n"); |
| 870 | goto out; | 843 | goto out; |
| @@ -934,7 +907,7 @@ out: | |||
| 934 | if (!IS_ERR(em)) | 907 | if (!IS_ERR(em)) |
| 935 | free_extent_map(em); | 908 | free_extent_map(em); |
| 936 | iput(inode); | 909 | iput(inode); |
| 937 | free_dummy_root(root); | 910 | btrfs_free_dummy_root(root); |
| 938 | return ret; | 911 | return ret; |
| 939 | } | 912 | } |
| 940 | 913 | ||
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c new file mode 100644 index 000000000000..ec3dcb202357 --- /dev/null +++ b/fs/btrfs/tests/qgroup-tests.c | |||
| @@ -0,0 +1,470 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2013 Facebook. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include "btrfs-tests.h" | ||
| 20 | #include "../ctree.h" | ||
| 21 | #include "../transaction.h" | ||
| 22 | #include "../disk-io.h" | ||
| 23 | #include "../qgroup.h" | ||
| 24 | |||
| 25 | static void init_dummy_trans(struct btrfs_trans_handle *trans) | ||
| 26 | { | ||
| 27 | memset(trans, 0, sizeof(*trans)); | ||
| 28 | trans->transid = 1; | ||
| 29 | INIT_LIST_HEAD(&trans->qgroup_ref_list); | ||
| 30 | trans->type = __TRANS_DUMMY; | ||
| 31 | } | ||
| 32 | |||
| 33 | static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr, | ||
| 34 | u64 num_bytes, u64 parent, u64 root_objectid) | ||
| 35 | { | ||
| 36 | struct btrfs_trans_handle trans; | ||
| 37 | struct btrfs_extent_item *item; | ||
| 38 | struct btrfs_extent_inline_ref *iref; | ||
| 39 | struct btrfs_tree_block_info *block_info; | ||
| 40 | struct btrfs_path *path; | ||
| 41 | struct extent_buffer *leaf; | ||
| 42 | struct btrfs_key ins; | ||
| 43 | u32 size = sizeof(*item) + sizeof(*iref) + sizeof(*block_info); | ||
| 44 | int ret; | ||
| 45 | |||
| 46 | init_dummy_trans(&trans); | ||
| 47 | |||
| 48 | ins.objectid = bytenr; | ||
| 49 | ins.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 50 | ins.offset = num_bytes; | ||
| 51 | |||
| 52 | path = btrfs_alloc_path(); | ||
| 53 | if (!path) { | ||
| 54 | test_msg("Couldn't allocate path\n"); | ||
| 55 | return -ENOMEM; | ||
| 56 | } | ||
| 57 | |||
| 58 | path->leave_spinning = 1; | ||
| 59 | ret = btrfs_insert_empty_item(&trans, root, path, &ins, size); | ||
| 60 | if (ret) { | ||
| 61 | test_msg("Couldn't insert ref %d\n", ret); | ||
| 62 | btrfs_free_path(path); | ||
| 63 | return ret; | ||
| 64 | } | ||
| 65 | |||
| 66 | leaf = path->nodes[0]; | ||
| 67 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); | ||
| 68 | btrfs_set_extent_refs(leaf, item, 1); | ||
| 69 | btrfs_set_extent_generation(leaf, item, 1); | ||
| 70 | btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_TREE_BLOCK); | ||
| 71 | block_info = (struct btrfs_tree_block_info *)(item + 1); | ||
| 72 | btrfs_set_tree_block_level(leaf, block_info, 1); | ||
| 73 | iref = (struct btrfs_extent_inline_ref *)(block_info + 1); | ||
| 74 | if (parent > 0) { | ||
| 75 | btrfs_set_extent_inline_ref_type(leaf, iref, | ||
| 76 | BTRFS_SHARED_BLOCK_REF_KEY); | ||
| 77 | btrfs_set_extent_inline_ref_offset(leaf, iref, parent); | ||
| 78 | } else { | ||
| 79 | btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY); | ||
| 80 | btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); | ||
| 81 | } | ||
| 82 | btrfs_free_path(path); | ||
| 83 | return 0; | ||
| 84 | } | ||
| 85 | |||
| 86 | static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes, | ||
| 87 | u64 parent, u64 root_objectid) | ||
| 88 | { | ||
| 89 | struct btrfs_trans_handle trans; | ||
| 90 | struct btrfs_extent_item *item; | ||
| 91 | struct btrfs_path *path; | ||
| 92 | struct btrfs_key key; | ||
| 93 | u64 refs; | ||
| 94 | int ret; | ||
| 95 | |||
| 96 | init_dummy_trans(&trans); | ||
| 97 | |||
| 98 | key.objectid = bytenr; | ||
| 99 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 100 | key.offset = num_bytes; | ||
| 101 | |||
| 102 | path = btrfs_alloc_path(); | ||
| 103 | if (!path) { | ||
| 104 | test_msg("Couldn't allocate path\n"); | ||
| 105 | return -ENOMEM; | ||
| 106 | } | ||
| 107 | |||
| 108 | path->leave_spinning = 1; | ||
| 109 | ret = btrfs_search_slot(&trans, root, &key, path, 0, 1); | ||
| 110 | if (ret) { | ||
| 111 | test_msg("Couldn't find extent ref\n"); | ||
| 112 | btrfs_free_path(path); | ||
| 113 | return ret; | ||
| 114 | } | ||
| 115 | |||
| 116 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 117 | struct btrfs_extent_item); | ||
| 118 | refs = btrfs_extent_refs(path->nodes[0], item); | ||
| 119 | btrfs_set_extent_refs(path->nodes[0], item, refs + 1); | ||
| 120 | btrfs_release_path(path); | ||
| 121 | |||
| 122 | key.objectid = bytenr; | ||
| 123 | if (parent) { | ||
| 124 | key.type = BTRFS_SHARED_BLOCK_REF_KEY; | ||
| 125 | key.offset = parent; | ||
| 126 | } else { | ||
| 127 | key.type = BTRFS_TREE_BLOCK_REF_KEY; | ||
| 128 | key.offset = root_objectid; | ||
| 129 | } | ||
| 130 | |||
| 131 | ret = btrfs_insert_empty_item(&trans, root, path, &key, 0); | ||
| 132 | if (ret) | ||
| 133 | test_msg("Failed to insert backref\n"); | ||
| 134 | btrfs_free_path(path); | ||
| 135 | return ret; | ||
| 136 | } | ||
| 137 | |||
| 138 | static int remove_extent_item(struct btrfs_root *root, u64 bytenr, | ||
| 139 | u64 num_bytes) | ||
| 140 | { | ||
| 141 | struct btrfs_trans_handle trans; | ||
| 142 | struct btrfs_key key; | ||
| 143 | struct btrfs_path *path; | ||
| 144 | int ret; | ||
| 145 | |||
| 146 | init_dummy_trans(&trans); | ||
| 147 | |||
| 148 | key.objectid = bytenr; | ||
| 149 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 150 | key.offset = num_bytes; | ||
| 151 | |||
| 152 | path = btrfs_alloc_path(); | ||
| 153 | if (!path) { | ||
| 154 | test_msg("Couldn't allocate path\n"); | ||
| 155 | return -ENOMEM; | ||
| 156 | } | ||
| 157 | path->leave_spinning = 1; | ||
| 158 | |||
| 159 | ret = btrfs_search_slot(&trans, root, &key, path, -1, 1); | ||
| 160 | if (ret) { | ||
| 161 | test_msg("Didn't find our key %d\n", ret); | ||
| 162 | btrfs_free_path(path); | ||
| 163 | return ret; | ||
| 164 | } | ||
| 165 | btrfs_del_item(&trans, root, path); | ||
| 166 | btrfs_free_path(path); | ||
| 167 | return 0; | ||
| 168 | } | ||
| 169 | |||
| 170 | static int remove_extent_ref(struct btrfs_root *root, u64 bytenr, | ||
| 171 | u64 num_bytes, u64 parent, u64 root_objectid) | ||
| 172 | { | ||
| 173 | struct btrfs_trans_handle trans; | ||
| 174 | struct btrfs_extent_item *item; | ||
| 175 | struct btrfs_path *path; | ||
| 176 | struct btrfs_key key; | ||
| 177 | u64 refs; | ||
| 178 | int ret; | ||
| 179 | |||
| 180 | init_dummy_trans(&trans); | ||
| 181 | |||
| 182 | key.objectid = bytenr; | ||
| 183 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 184 | key.offset = num_bytes; | ||
| 185 | |||
| 186 | path = btrfs_alloc_path(); | ||
| 187 | if (!path) { | ||
| 188 | test_msg("Couldn't allocate path\n"); | ||
| 189 | return -ENOMEM; | ||
| 190 | } | ||
| 191 | |||
| 192 | path->leave_spinning = 1; | ||
| 193 | ret = btrfs_search_slot(&trans, root, &key, path, 0, 1); | ||
| 194 | if (ret) { | ||
| 195 | test_msg("Couldn't find extent ref\n"); | ||
| 196 | btrfs_free_path(path); | ||
| 197 | return ret; | ||
| 198 | } | ||
| 199 | |||
| 200 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 201 | struct btrfs_extent_item); | ||
| 202 | refs = btrfs_extent_refs(path->nodes[0], item); | ||
| 203 | btrfs_set_extent_refs(path->nodes[0], item, refs - 1); | ||
| 204 | btrfs_release_path(path); | ||
| 205 | |||
| 206 | key.objectid = bytenr; | ||
| 207 | if (parent) { | ||
| 208 | key.type = BTRFS_SHARED_BLOCK_REF_KEY; | ||
| 209 | key.offset = parent; | ||
| 210 | } else { | ||
| 211 | key.type = BTRFS_TREE_BLOCK_REF_KEY; | ||
| 212 | key.offset = root_objectid; | ||
| 213 | } | ||
| 214 | |||
| 215 | ret = btrfs_search_slot(&trans, root, &key, path, -1, 1); | ||
| 216 | if (ret) { | ||
| 217 | test_msg("Couldn't find backref %d\n", ret); | ||
| 218 | btrfs_free_path(path); | ||
| 219 | return ret; | ||
| 220 | } | ||
| 221 | btrfs_del_item(&trans, root, path); | ||
| 222 | btrfs_free_path(path); | ||
| 223 | return ret; | ||
| 224 | } | ||
| 225 | |||
| 226 | static int test_no_shared_qgroup(struct btrfs_root *root) | ||
| 227 | { | ||
| 228 | struct btrfs_trans_handle trans; | ||
| 229 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 230 | int ret; | ||
| 231 | |||
| 232 | init_dummy_trans(&trans); | ||
| 233 | |||
| 234 | test_msg("Qgroup basic add\n"); | ||
| 235 | ret = btrfs_create_qgroup(NULL, fs_info, 5, NULL); | ||
| 236 | if (ret) { | ||
| 237 | test_msg("Couldn't create a qgroup %d\n", ret); | ||
| 238 | return ret; | ||
| 239 | } | ||
| 240 | |||
| 241 | ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096, | ||
| 242 | BTRFS_QGROUP_OPER_ADD_EXCL, 0); | ||
| 243 | if (ret) { | ||
| 244 | test_msg("Couldn't add space to a qgroup %d\n", ret); | ||
| 245 | return ret; | ||
| 246 | } | ||
| 247 | |||
| 248 | ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5); | ||
| 249 | if (ret) | ||
| 250 | return ret; | ||
| 251 | |||
| 252 | ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); | ||
| 253 | if (ret) { | ||
| 254 | test_msg("Delayed qgroup accounting failed %d\n", ret); | ||
| 255 | return ret; | ||
| 256 | } | ||
| 257 | |||
| 258 | if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { | ||
| 259 | test_msg("Qgroup counts didn't match expected values\n"); | ||
| 260 | return -EINVAL; | ||
| 261 | } | ||
| 262 | |||
| 263 | ret = remove_extent_item(root, 4096, 4096); | ||
| 264 | if (ret) | ||
| 265 | return -EINVAL; | ||
| 266 | |||
| 267 | ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096, | ||
| 268 | BTRFS_QGROUP_OPER_SUB_EXCL, 0); | ||
| 269 | if (ret) { | ||
| 270 | test_msg("Couldn't remove space from the qgroup %d\n", ret); | ||
| 271 | return -EINVAL; | ||
| 272 | } | ||
| 273 | |||
| 274 | ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); | ||
| 275 | if (ret) { | ||
| 276 | test_msg("Qgroup accounting failed %d\n", ret); | ||
| 277 | return -EINVAL; | ||
| 278 | } | ||
| 279 | |||
| 280 | if (btrfs_verify_qgroup_counts(fs_info, 5, 0, 0)) { | ||
| 281 | test_msg("Qgroup counts didn't match expected values\n"); | ||
| 282 | return -EINVAL; | ||
| 283 | } | ||
| 284 | |||
| 285 | return 0; | ||
| 286 | } | ||
| 287 | |||
| 288 | /* | ||
| 289 | * Add a ref for two different roots to make sure the shared value comes out | ||
| 290 | * right, also remove one of the roots and make sure the exclusive count is | ||
| 291 | * adjusted properly. | ||
| 292 | */ | ||
| 293 | static int test_multiple_refs(struct btrfs_root *root) | ||
| 294 | { | ||
| 295 | struct btrfs_trans_handle trans; | ||
| 296 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 297 | int ret; | ||
| 298 | |||
| 299 | init_dummy_trans(&trans); | ||
| 300 | |||
| 301 | test_msg("Qgroup multiple refs test\n"); | ||
| 302 | |||
| 303 | /* We have 5 created already from the previous test */ | ||
| 304 | ret = btrfs_create_qgroup(NULL, fs_info, 256, NULL); | ||
| 305 | if (ret) { | ||
| 306 | test_msg("Couldn't create a qgroup %d\n", ret); | ||
| 307 | return ret; | ||
| 308 | } | ||
| 309 | |||
| 310 | ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5); | ||
| 311 | if (ret) | ||
| 312 | return ret; | ||
| 313 | |||
| 314 | ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096, | ||
| 315 | BTRFS_QGROUP_OPER_ADD_EXCL, 0); | ||
| 316 | if (ret) { | ||
| 317 | test_msg("Couldn't add space to a qgroup %d\n", ret); | ||
| 318 | return ret; | ||
| 319 | } | ||
| 320 | |||
| 321 | ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); | ||
| 322 | if (ret) { | ||
| 323 | test_msg("Delayed qgroup accounting failed %d\n", ret); | ||
| 324 | return ret; | ||
| 325 | } | ||
| 326 | |||
| 327 | if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { | ||
| 328 | test_msg("Qgroup counts didn't match expected values\n"); | ||
| 329 | return -EINVAL; | ||
| 330 | } | ||
| 331 | |||
| 332 | ret = add_tree_ref(root, 4096, 4096, 0, 256); | ||
| 333 | if (ret) | ||
| 334 | return ret; | ||
| 335 | |||
| 336 | ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096, | ||
| 337 | BTRFS_QGROUP_OPER_ADD_SHARED, 0); | ||
| 338 | if (ret) { | ||
| 339 | test_msg("Qgroup record ref failed %d\n", ret); | ||
| 340 | return ret; | ||
| 341 | } | ||
| 342 | |||
| 343 | ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); | ||
| 344 | if (ret) { | ||
| 345 | test_msg("Qgroup accounting failed %d\n", ret); | ||
| 346 | return ret; | ||
| 347 | } | ||
| 348 | |||
| 349 | if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 0)) { | ||
| 350 | test_msg("Qgroup counts didn't match expected values\n"); | ||
| 351 | return -EINVAL; | ||
| 352 | } | ||
| 353 | |||
| 354 | if (btrfs_verify_qgroup_counts(fs_info, 256, 4096, 0)) { | ||
| 355 | test_msg("Qgroup counts didn't match expected values\n"); | ||
| 356 | return -EINVAL; | ||
| 357 | } | ||
| 358 | |||
| 359 | ret = remove_extent_ref(root, 4096, 4096, 0, 256); | ||
| 360 | if (ret) | ||
| 361 | return ret; | ||
| 362 | |||
| 363 | ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096, | ||
| 364 | BTRFS_QGROUP_OPER_SUB_SHARED, 0); | ||
| 365 | if (ret) { | ||
| 366 | test_msg("Qgroup record ref failed %d\n", ret); | ||
| 367 | return ret; | ||
| 368 | } | ||
| 369 | |||
| 370 | ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); | ||
| 371 | if (ret) { | ||
| 372 | test_msg("Qgroup accounting failed %d\n", ret); | ||
| 373 | return ret; | ||
| 374 | } | ||
| 375 | |||
| 376 | if (btrfs_verify_qgroup_counts(fs_info, 256, 0, 0)) { | ||
| 377 | test_msg("Qgroup counts didn't match expected values\n"); | ||
| 378 | return -EINVAL; | ||
| 379 | } | ||
| 380 | |||
| 381 | if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) { | ||
| 382 | test_msg("Qgroup counts didn't match expected values\n"); | ||
| 383 | return -EINVAL; | ||
| 384 | } | ||
| 385 | |||
| 386 | return 0; | ||
| 387 | } | ||
| 388 | |||
| 389 | int btrfs_test_qgroups(void) | ||
| 390 | { | ||
| 391 | struct btrfs_root *root; | ||
| 392 | struct btrfs_root *tmp_root; | ||
| 393 | int ret = 0; | ||
| 394 | |||
| 395 | root = btrfs_alloc_dummy_root(); | ||
| 396 | if (IS_ERR(root)) { | ||
| 397 | test_msg("Couldn't allocate root\n"); | ||
| 398 | return PTR_ERR(root); | ||
| 399 | } | ||
| 400 | |||
| 401 | root->fs_info = btrfs_alloc_dummy_fs_info(); | ||
| 402 | if (!root->fs_info) { | ||
| 403 | test_msg("Couldn't allocate dummy fs info\n"); | ||
| 404 | ret = -ENOMEM; | ||
| 405 | goto out; | ||
| 406 | } | ||
| 407 | |||
| 408 | /* | ||
| 409 | * Can't use bytenr 0, some things freak out | ||
| 410 | * *cough*backref walking code*cough* | ||
| 411 | */ | ||
| 412 | root->node = alloc_test_extent_buffer(root->fs_info, 4096, 4096); | ||
| 413 | if (!root->node) { | ||
| 414 | test_msg("Couldn't allocate dummy buffer\n"); | ||
| 415 | ret = -ENOMEM; | ||
| 416 | goto out; | ||
| 417 | } | ||
| 418 | btrfs_set_header_level(root->node, 0); | ||
| 419 | btrfs_set_header_nritems(root->node, 0); | ||
| 420 | root->alloc_bytenr += 8192; | ||
| 421 | |||
| 422 | tmp_root = btrfs_alloc_dummy_root(); | ||
| 423 | if (IS_ERR(tmp_root)) { | ||
| 424 | test_msg("Couldn't allocate a fs root\n"); | ||
| 425 | ret = PTR_ERR(tmp_root); | ||
| 426 | goto out; | ||
| 427 | } | ||
| 428 | |||
| 429 | tmp_root->root_key.objectid = 5; | ||
| 430 | root->fs_info->fs_root = tmp_root; | ||
| 431 | ret = btrfs_insert_fs_root(root->fs_info, tmp_root); | ||
| 432 | if (ret) { | ||
| 433 | test_msg("Couldn't insert fs root %d\n", ret); | ||
| 434 | goto out; | ||
| 435 | } | ||
| 436 | |||
| 437 | tmp_root = btrfs_alloc_dummy_root(); | ||
| 438 | if (IS_ERR(tmp_root)) { | ||
| 439 | test_msg("Couldn't allocate a fs root\n"); | ||
| 440 | ret = PTR_ERR(tmp_root); | ||
| 441 | goto out; | ||
| 442 | } | ||
| 443 | |||
| 444 | tmp_root->root_key.objectid = 256; | ||
| 445 | ret = btrfs_insert_fs_root(root->fs_info, tmp_root); | ||
| 446 | if (ret) { | ||
| 447 | test_msg("Couldn't insert fs root %d\n", ret); | ||
| 448 | goto out; | ||
| 449 | } | ||
| 450 | |||
| 451 | /* We are using this root as our extent root */ | ||
| 452 | root->fs_info->extent_root = root; | ||
| 453 | |||
| 454 | /* | ||
| 455 | * Some of the paths we test assume we have a filled out fs_info, so we | ||
| 456 | * just need to addt he root in there so we don't panic. | ||
| 457 | */ | ||
| 458 | root->fs_info->tree_root = root; | ||
| 459 | root->fs_info->quota_root = root; | ||
| 460 | root->fs_info->quota_enabled = 1; | ||
| 461 | |||
| 462 | test_msg("Running qgroup tests\n"); | ||
| 463 | ret = test_no_shared_qgroup(root); | ||
| 464 | if (ret) | ||
| 465 | goto out; | ||
| 466 | ret = test_multiple_refs(root); | ||
| 467 | out: | ||
| 468 | btrfs_free_dummy_root(root); | ||
| 469 | return ret; | ||
| 470 | } | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7579f6d0b854..511839c04f11 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include "inode-map.h" | 31 | #include "inode-map.h" |
| 32 | #include "volumes.h" | 32 | #include "volumes.h" |
| 33 | #include "dev-replace.h" | 33 | #include "dev-replace.h" |
| 34 | #include "qgroup.h" | ||
| 34 | 35 | ||
| 35 | #define BTRFS_ROOT_TRANS_TAG 0 | 36 | #define BTRFS_ROOT_TRANS_TAG 0 |
| 36 | 37 | ||
| @@ -241,18 +242,19 @@ loop: | |||
| 241 | static int record_root_in_trans(struct btrfs_trans_handle *trans, | 242 | static int record_root_in_trans(struct btrfs_trans_handle *trans, |
| 242 | struct btrfs_root *root) | 243 | struct btrfs_root *root) |
| 243 | { | 244 | { |
| 244 | if (root->ref_cows && root->last_trans < trans->transid) { | 245 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
| 246 | root->last_trans < trans->transid) { | ||
| 245 | WARN_ON(root == root->fs_info->extent_root); | 247 | WARN_ON(root == root->fs_info->extent_root); |
| 246 | WARN_ON(root->commit_root != root->node); | 248 | WARN_ON(root->commit_root != root->node); |
| 247 | 249 | ||
| 248 | /* | 250 | /* |
| 249 | * see below for in_trans_setup usage rules | 251 | * see below for IN_TRANS_SETUP usage rules |
| 250 | * we have the reloc mutex held now, so there | 252 | * we have the reloc mutex held now, so there |
| 251 | * is only one writer in this function | 253 | * is only one writer in this function |
| 252 | */ | 254 | */ |
| 253 | root->in_trans_setup = 1; | 255 | set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state); |
| 254 | 256 | ||
| 255 | /* make sure readers find in_trans_setup before | 257 | /* make sure readers find IN_TRANS_SETUP before |
| 256 | * they find our root->last_trans update | 258 | * they find our root->last_trans update |
| 257 | */ | 259 | */ |
| 258 | smp_wmb(); | 260 | smp_wmb(); |
| @@ -279,7 +281,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, | |||
| 279 | * But, we have to set root->last_trans before we | 281 | * But, we have to set root->last_trans before we |
| 280 | * init the relocation root, otherwise, we trip over warnings | 282 | * init the relocation root, otherwise, we trip over warnings |
| 281 | * in ctree.c. The solution used here is to flag ourselves | 283 | * in ctree.c. The solution used here is to flag ourselves |
| 282 | * with root->in_trans_setup. When this is 1, we're still | 284 | * with root IN_TRANS_SETUP. When this is 1, we're still |
| 283 | * fixing up the reloc trees and everyone must wait. | 285 | * fixing up the reloc trees and everyone must wait. |
| 284 | * | 286 | * |
| 285 | * When this is zero, they can trust root->last_trans and fly | 287 | * When this is zero, they can trust root->last_trans and fly |
| @@ -288,8 +290,8 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, | |||
| 288 | * done before we pop in the zero below | 290 | * done before we pop in the zero below |
| 289 | */ | 291 | */ |
| 290 | btrfs_init_reloc_root(trans, root); | 292 | btrfs_init_reloc_root(trans, root); |
| 291 | smp_wmb(); | 293 | smp_mb__before_atomic(); |
| 292 | root->in_trans_setup = 0; | 294 | clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state); |
| 293 | } | 295 | } |
| 294 | return 0; | 296 | return 0; |
| 295 | } | 297 | } |
| @@ -298,16 +300,16 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, | |||
| 298 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 300 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
| 299 | struct btrfs_root *root) | 301 | struct btrfs_root *root) |
| 300 | { | 302 | { |
| 301 | if (!root->ref_cows) | 303 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
| 302 | return 0; | 304 | return 0; |
| 303 | 305 | ||
| 304 | /* | 306 | /* |
| 305 | * see record_root_in_trans for comments about in_trans_setup usage | 307 | * see record_root_in_trans for comments about IN_TRANS_SETUP usage |
| 306 | * and barriers | 308 | * and barriers |
| 307 | */ | 309 | */ |
| 308 | smp_rmb(); | 310 | smp_rmb(); |
| 309 | if (root->last_trans == trans->transid && | 311 | if (root->last_trans == trans->transid && |
| 310 | !root->in_trans_setup) | 312 | !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state)) |
| 311 | return 0; | 313 | return 0; |
| 312 | 314 | ||
| 313 | mutex_lock(&root->fs_info->reloc_mutex); | 315 | mutex_lock(&root->fs_info->reloc_mutex); |
| @@ -365,7 +367,7 @@ static int may_wait_transaction(struct btrfs_root *root, int type) | |||
| 365 | static inline bool need_reserve_reloc_root(struct btrfs_root *root) | 367 | static inline bool need_reserve_reloc_root(struct btrfs_root *root) |
| 366 | { | 368 | { |
| 367 | if (!root->fs_info->reloc_ctl || | 369 | if (!root->fs_info->reloc_ctl || |
| 368 | !root->ref_cows || | 370 | !test_bit(BTRFS_ROOT_REF_COWS, &root->state) || |
| 369 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || | 371 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || |
| 370 | root->reloc_root) | 372 | root->reloc_root) |
| 371 | return false; | 373 | return false; |
| @@ -695,6 +697,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 695 | unsigned long cur = trans->delayed_ref_updates; | 697 | unsigned long cur = trans->delayed_ref_updates; |
| 696 | int lock = (trans->type != TRANS_JOIN_NOLOCK); | 698 | int lock = (trans->type != TRANS_JOIN_NOLOCK); |
| 697 | int err = 0; | 699 | int err = 0; |
| 700 | int must_run_delayed_refs = 0; | ||
| 698 | 701 | ||
| 699 | if (trans->use_count > 1) { | 702 | if (trans->use_count > 1) { |
| 700 | trans->use_count--; | 703 | trans->use_count--; |
| @@ -702,14 +705,27 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 702 | return 0; | 705 | return 0; |
| 703 | } | 706 | } |
| 704 | 707 | ||
| 705 | /* | ||
| 706 | * do the qgroup accounting as early as possible | ||
| 707 | */ | ||
| 708 | err = btrfs_delayed_refs_qgroup_accounting(trans, info); | ||
| 709 | |||
| 710 | btrfs_trans_release_metadata(trans, root); | 708 | btrfs_trans_release_metadata(trans, root); |
| 711 | trans->block_rsv = NULL; | 709 | trans->block_rsv = NULL; |
| 712 | 710 | ||
| 711 | if (!list_empty(&trans->new_bgs)) | ||
| 712 | btrfs_create_pending_block_groups(trans, root); | ||
| 713 | |||
| 714 | trans->delayed_ref_updates = 0; | ||
| 715 | if (!trans->sync) { | ||
| 716 | must_run_delayed_refs = | ||
| 717 | btrfs_should_throttle_delayed_refs(trans, root); | ||
| 718 | cur = max_t(unsigned long, cur, 32); | ||
| 719 | |||
| 720 | /* | ||
| 721 | * don't make the caller wait if they are from a NOLOCK | ||
| 722 | * or ATTACH transaction, it will deadlock with commit | ||
| 723 | */ | ||
| 724 | if (must_run_delayed_refs == 1 && | ||
| 725 | (trans->type & (__TRANS_JOIN_NOLOCK | __TRANS_ATTACH))) | ||
| 726 | must_run_delayed_refs = 2; | ||
| 727 | } | ||
| 728 | |||
| 713 | if (trans->qgroup_reserved) { | 729 | if (trans->qgroup_reserved) { |
| 714 | /* | 730 | /* |
| 715 | * the same root has to be passed here between start_transaction | 731 | * the same root has to be passed here between start_transaction |
| @@ -719,16 +735,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 719 | trans->qgroup_reserved = 0; | 735 | trans->qgroup_reserved = 0; |
| 720 | } | 736 | } |
| 721 | 737 | ||
| 722 | if (!list_empty(&trans->new_bgs)) | ||
| 723 | btrfs_create_pending_block_groups(trans, root); | ||
| 724 | |||
| 725 | trans->delayed_ref_updates = 0; | ||
| 726 | if (!trans->sync && btrfs_should_throttle_delayed_refs(trans, root)) { | ||
| 727 | cur = max_t(unsigned long, cur, 32); | ||
| 728 | trans->delayed_ref_updates = 0; | ||
| 729 | btrfs_run_delayed_refs(trans, root, cur); | ||
| 730 | } | ||
| 731 | |||
| 732 | btrfs_trans_release_metadata(trans, root); | 738 | btrfs_trans_release_metadata(trans, root); |
| 733 | trans->block_rsv = NULL; | 739 | trans->block_rsv = NULL; |
| 734 | 740 | ||
| @@ -778,6 +784,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 778 | assert_qgroups_uptodate(trans); | 784 | assert_qgroups_uptodate(trans); |
| 779 | 785 | ||
| 780 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 786 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 787 | if (must_run_delayed_refs) { | ||
| 788 | btrfs_async_run_delayed_refs(root, cur, | ||
| 789 | must_run_delayed_refs == 1); | ||
| 790 | } | ||
| 781 | return err; | 791 | return err; |
| 782 | } | 792 | } |
| 783 | 793 | ||
| @@ -1049,8 +1059,8 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
| 1049 | btrfs_save_ino_cache(root, trans); | 1059 | btrfs_save_ino_cache(root, trans); |
| 1050 | 1060 | ||
| 1051 | /* see comments in should_cow_block() */ | 1061 | /* see comments in should_cow_block() */ |
| 1052 | root->force_cow = 0; | 1062 | clear_bit(BTRFS_ROOT_FORCE_COW, &root->state); |
| 1053 | smp_wmb(); | 1063 | smp_mb__after_atomic(); |
| 1054 | 1064 | ||
| 1055 | if (root->commit_root != root->node) { | 1065 | if (root->commit_root != root->node) { |
| 1056 | list_add_tail(&root->dirty_list, | 1066 | list_add_tail(&root->dirty_list, |
| @@ -1081,7 +1091,7 @@ int btrfs_defrag_root(struct btrfs_root *root) | |||
| 1081 | struct btrfs_trans_handle *trans; | 1091 | struct btrfs_trans_handle *trans; |
| 1082 | int ret; | 1092 | int ret; |
| 1083 | 1093 | ||
| 1084 | if (xchg(&root->defrag_running, 1)) | 1094 | if (test_and_set_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state)) |
| 1085 | return 0; | 1095 | return 0; |
| 1086 | 1096 | ||
| 1087 | while (1) { | 1097 | while (1) { |
| @@ -1104,7 +1114,7 @@ int btrfs_defrag_root(struct btrfs_root *root) | |||
| 1104 | break; | 1114 | break; |
| 1105 | } | 1115 | } |
| 1106 | } | 1116 | } |
| 1107 | root->defrag_running = 0; | 1117 | clear_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state); |
| 1108 | return ret; | 1118 | return ret; |
| 1109 | } | 1119 | } |
| 1110 | 1120 | ||
| @@ -1168,12 +1178,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1168 | goto no_free_objectid; | 1178 | goto no_free_objectid; |
| 1169 | } | 1179 | } |
| 1170 | 1180 | ||
| 1171 | pending->error = btrfs_qgroup_inherit(trans, fs_info, | ||
| 1172 | root->root_key.objectid, | ||
| 1173 | objectid, pending->inherit); | ||
| 1174 | if (pending->error) | ||
| 1175 | goto no_free_objectid; | ||
| 1176 | |||
| 1177 | key.objectid = objectid; | 1181 | key.objectid = objectid; |
| 1178 | key.offset = (u64)-1; | 1182 | key.offset = (u64)-1; |
| 1179 | key.type = BTRFS_ROOT_ITEM_KEY; | 1183 | key.type = BTRFS_ROOT_ITEM_KEY; |
| @@ -1270,8 +1274,26 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1270 | goto fail; | 1274 | goto fail; |
| 1271 | } | 1275 | } |
| 1272 | 1276 | ||
| 1277 | /* | ||
| 1278 | * We need to flush delayed refs in order to make sure all of our quota | ||
| 1279 | * operations have been done before we call btrfs_qgroup_inherit. | ||
| 1280 | */ | ||
| 1281 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
| 1282 | if (ret) { | ||
| 1283 | btrfs_abort_transaction(trans, root, ret); | ||
| 1284 | goto fail; | ||
| 1285 | } | ||
| 1286 | |||
| 1287 | ret = btrfs_qgroup_inherit(trans, fs_info, | ||
| 1288 | root->root_key.objectid, | ||
| 1289 | objectid, pending->inherit); | ||
| 1290 | if (ret) { | ||
| 1291 | btrfs_abort_transaction(trans, root, ret); | ||
| 1292 | goto fail; | ||
| 1293 | } | ||
| 1294 | |||
| 1273 | /* see comments in should_cow_block() */ | 1295 | /* see comments in should_cow_block() */ |
| 1274 | root->force_cow = 1; | 1296 | set_bit(BTRFS_ROOT_FORCE_COW, &root->state); |
| 1275 | smp_wmb(); | 1297 | smp_wmb(); |
| 1276 | 1298 | ||
| 1277 | btrfs_set_root_node(new_root_item, tmp); | 1299 | btrfs_set_root_node(new_root_item, tmp); |
| @@ -1598,12 +1620,6 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | |||
| 1598 | * them now so that they hinder processing of more delayed refs | 1620 | * them now so that they hinder processing of more delayed refs |
| 1599 | * as little as possible. | 1621 | * as little as possible. |
| 1600 | */ | 1622 | */ |
| 1601 | if (ret) { | ||
| 1602 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
| 1603 | return ret; | ||
| 1604 | } | ||
| 1605 | |||
| 1606 | ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
| 1607 | if (ret) | 1623 | if (ret) |
| 1608 | return ret; | 1624 | return ret; |
| 1609 | 1625 | ||
| @@ -1984,19 +2000,6 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) | |||
| 1984 | } | 2000 | } |
| 1985 | root = list_first_entry(&fs_info->dead_roots, | 2001 | root = list_first_entry(&fs_info->dead_roots, |
| 1986 | struct btrfs_root, root_list); | 2002 | struct btrfs_root, root_list); |
| 1987 | /* | ||
| 1988 | * Make sure root is not involved in send, | ||
| 1989 | * if we fail with first root, we return | ||
| 1990 | * directly rather than continue. | ||
| 1991 | */ | ||
| 1992 | spin_lock(&root->root_item_lock); | ||
| 1993 | if (root->send_in_progress) { | ||
| 1994 | spin_unlock(&fs_info->trans_lock); | ||
| 1995 | spin_unlock(&root->root_item_lock); | ||
| 1996 | return 0; | ||
| 1997 | } | ||
| 1998 | spin_unlock(&root->root_item_lock); | ||
| 1999 | |||
| 2000 | list_del_init(&root->root_list); | 2003 | list_del_init(&root->root_list); |
| 2001 | spin_unlock(&fs_info->trans_lock); | 2004 | spin_unlock(&fs_info->trans_lock); |
| 2002 | 2005 | ||
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index b57b924e8e03..7dd558ed0716 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
| @@ -69,6 +69,7 @@ struct btrfs_transaction { | |||
| 69 | #define __TRANS_ATTACH (1U << 10) | 69 | #define __TRANS_ATTACH (1U << 10) |
| 70 | #define __TRANS_JOIN (1U << 11) | 70 | #define __TRANS_JOIN (1U << 11) |
| 71 | #define __TRANS_JOIN_NOLOCK (1U << 12) | 71 | #define __TRANS_JOIN_NOLOCK (1U << 12) |
| 72 | #define __TRANS_DUMMY (1U << 13) | ||
| 72 | 73 | ||
| 73 | #define TRANS_USERSPACE (__TRANS_USERSPACE | __TRANS_FREEZABLE) | 74 | #define TRANS_USERSPACE (__TRANS_USERSPACE | __TRANS_FREEZABLE) |
| 74 | #define TRANS_START (__TRANS_START | __TRANS_FREEZABLE) | 75 | #define TRANS_START (__TRANS_START | __TRANS_FREEZABLE) |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 76928ca97741..a63719cc9578 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
| @@ -49,7 +49,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
| 49 | goto out; | 49 | goto out; |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | if (root->ref_cows == 0) | 52 | if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
| 53 | goto out; | 53 | goto out; |
| 54 | 54 | ||
| 55 | if (btrfs_test_opt(root, SSD)) | 55 | if (btrfs_test_opt(root, SSD)) |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e2f45fc02610..9e1f2cd5e67a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -20,13 +20,11 @@ | |||
| 20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
| 21 | #include <linux/blkdev.h> | 21 | #include <linux/blkdev.h> |
| 22 | #include <linux/list_sort.h> | 22 | #include <linux/list_sort.h> |
| 23 | #include "ctree.h" | 23 | #include "tree-log.h" |
| 24 | #include "transaction.h" | ||
| 25 | #include "disk-io.h" | 24 | #include "disk-io.h" |
| 26 | #include "locking.h" | 25 | #include "locking.h" |
| 27 | #include "print-tree.h" | 26 | #include "print-tree.h" |
| 28 | #include "backref.h" | 27 | #include "backref.h" |
| 29 | #include "tree-log.h" | ||
| 30 | #include "hash.h" | 28 | #include "hash.h" |
| 31 | 29 | ||
| 32 | /* magic values for the inode_only field in btrfs_log_inode: | 30 | /* magic values for the inode_only field in btrfs_log_inode: |
| @@ -144,17 +142,15 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 144 | 142 | ||
| 145 | mutex_lock(&root->log_mutex); | 143 | mutex_lock(&root->log_mutex); |
| 146 | if (root->log_root) { | 144 | if (root->log_root) { |
| 147 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == | 145 | if (btrfs_need_log_full_commit(root->fs_info, trans)) { |
| 148 | trans->transid) { | ||
| 149 | ret = -EAGAIN; | 146 | ret = -EAGAIN; |
| 150 | goto out; | 147 | goto out; |
| 151 | } | 148 | } |
| 152 | |||
| 153 | if (!root->log_start_pid) { | 149 | if (!root->log_start_pid) { |
| 154 | root->log_start_pid = current->pid; | 150 | root->log_start_pid = current->pid; |
| 155 | root->log_multiple_pids = false; | 151 | clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); |
| 156 | } else if (root->log_start_pid != current->pid) { | 152 | } else if (root->log_start_pid != current->pid) { |
| 157 | root->log_multiple_pids = true; | 153 | set_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); |
| 158 | } | 154 | } |
| 159 | 155 | ||
| 160 | atomic_inc(&root->log_batch); | 156 | atomic_inc(&root->log_batch); |
| @@ -181,7 +177,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 181 | if (ret) | 177 | if (ret) |
| 182 | goto out; | 178 | goto out; |
| 183 | } | 179 | } |
| 184 | root->log_multiple_pids = false; | 180 | clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); |
| 185 | root->log_start_pid = current->pid; | 181 | root->log_start_pid = current->pid; |
| 186 | atomic_inc(&root->log_batch); | 182 | atomic_inc(&root->log_batch); |
| 187 | atomic_inc(&root->log_writers); | 183 | atomic_inc(&root->log_writers); |
| @@ -2500,7 +2496,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2500 | while (1) { | 2496 | while (1) { |
| 2501 | int batch = atomic_read(&root->log_batch); | 2497 | int batch = atomic_read(&root->log_batch); |
| 2502 | /* when we're on an ssd, just kick the log commit out */ | 2498 | /* when we're on an ssd, just kick the log commit out */ |
| 2503 | if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { | 2499 | if (!btrfs_test_opt(root, SSD) && |
| 2500 | test_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state)) { | ||
| 2504 | mutex_unlock(&root->log_mutex); | 2501 | mutex_unlock(&root->log_mutex); |
| 2505 | schedule_timeout_uninterruptible(1); | 2502 | schedule_timeout_uninterruptible(1); |
| 2506 | mutex_lock(&root->log_mutex); | 2503 | mutex_lock(&root->log_mutex); |
| @@ -2511,8 +2508,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2511 | } | 2508 | } |
| 2512 | 2509 | ||
| 2513 | /* bail out if we need to do a full commit */ | 2510 | /* bail out if we need to do a full commit */ |
| 2514 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == | 2511 | if (btrfs_need_log_full_commit(root->fs_info, trans)) { |
| 2515 | trans->transid) { | ||
| 2516 | ret = -EAGAIN; | 2512 | ret = -EAGAIN; |
| 2517 | btrfs_free_logged_extents(log, log_transid); | 2513 | btrfs_free_logged_extents(log, log_transid); |
| 2518 | mutex_unlock(&root->log_mutex); | 2514 | mutex_unlock(&root->log_mutex); |
| @@ -2533,8 +2529,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2533 | blk_finish_plug(&plug); | 2529 | blk_finish_plug(&plug); |
| 2534 | btrfs_abort_transaction(trans, root, ret); | 2530 | btrfs_abort_transaction(trans, root, ret); |
| 2535 | btrfs_free_logged_extents(log, log_transid); | 2531 | btrfs_free_logged_extents(log, log_transid); |
| 2536 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | 2532 | btrfs_set_log_full_commit(root->fs_info, trans); |
| 2537 | trans->transid; | ||
| 2538 | mutex_unlock(&root->log_mutex); | 2533 | mutex_unlock(&root->log_mutex); |
| 2539 | goto out; | 2534 | goto out; |
| 2540 | } | 2535 | } |
| @@ -2577,8 +2572,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2577 | list_del_init(&root_log_ctx.list); | 2572 | list_del_init(&root_log_ctx.list); |
| 2578 | 2573 | ||
| 2579 | blk_finish_plug(&plug); | 2574 | blk_finish_plug(&plug); |
| 2580 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | 2575 | btrfs_set_log_full_commit(root->fs_info, trans); |
| 2581 | trans->transid; | 2576 | |
| 2582 | if (ret != -ENOSPC) { | 2577 | if (ret != -ENOSPC) { |
| 2583 | btrfs_abort_transaction(trans, root, ret); | 2578 | btrfs_abort_transaction(trans, root, ret); |
| 2584 | mutex_unlock(&log_root_tree->log_mutex); | 2579 | mutex_unlock(&log_root_tree->log_mutex); |
| @@ -2622,8 +2617,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2622 | * now that we've moved on to the tree of log tree roots, | 2617 | * now that we've moved on to the tree of log tree roots, |
| 2623 | * check the full commit flag again | 2618 | * check the full commit flag again |
| 2624 | */ | 2619 | */ |
| 2625 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == | 2620 | if (btrfs_need_log_full_commit(root->fs_info, trans)) { |
| 2626 | trans->transid) { | ||
| 2627 | blk_finish_plug(&plug); | 2621 | blk_finish_plug(&plug); |
| 2628 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2622 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| 2629 | btrfs_free_logged_extents(log, log_transid); | 2623 | btrfs_free_logged_extents(log, log_transid); |
| @@ -2637,8 +2631,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2637 | EXTENT_DIRTY | EXTENT_NEW); | 2631 | EXTENT_DIRTY | EXTENT_NEW); |
| 2638 | blk_finish_plug(&plug); | 2632 | blk_finish_plug(&plug); |
| 2639 | if (ret) { | 2633 | if (ret) { |
| 2640 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | 2634 | btrfs_set_log_full_commit(root->fs_info, trans); |
| 2641 | trans->transid; | ||
| 2642 | btrfs_abort_transaction(trans, root, ret); | 2635 | btrfs_abort_transaction(trans, root, ret); |
| 2643 | btrfs_free_logged_extents(log, log_transid); | 2636 | btrfs_free_logged_extents(log, log_transid); |
| 2644 | mutex_unlock(&log_root_tree->log_mutex); | 2637 | mutex_unlock(&log_root_tree->log_mutex); |
| @@ -2667,8 +2660,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2667 | */ | 2660 | */ |
| 2668 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); | 2661 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); |
| 2669 | if (ret) { | 2662 | if (ret) { |
| 2670 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | 2663 | btrfs_set_log_full_commit(root->fs_info, trans); |
| 2671 | trans->transid; | ||
| 2672 | btrfs_abort_transaction(trans, root, ret); | 2664 | btrfs_abort_transaction(trans, root, ret); |
| 2673 | goto out_wake_log_root; | 2665 | goto out_wake_log_root; |
| 2674 | } | 2666 | } |
| @@ -2886,7 +2878,7 @@ fail: | |||
| 2886 | out_unlock: | 2878 | out_unlock: |
| 2887 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | 2879 | mutex_unlock(&BTRFS_I(dir)->log_mutex); |
| 2888 | if (ret == -ENOSPC) { | 2880 | if (ret == -ENOSPC) { |
| 2889 | root->fs_info->last_trans_log_full_commit = trans->transid; | 2881 | btrfs_set_log_full_commit(root->fs_info, trans); |
| 2890 | ret = 0; | 2882 | ret = 0; |
| 2891 | } else if (ret < 0) | 2883 | } else if (ret < 0) |
| 2892 | btrfs_abort_transaction(trans, root, ret); | 2884 | btrfs_abort_transaction(trans, root, ret); |
| @@ -2919,7 +2911,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
| 2919 | dirid, &index); | 2911 | dirid, &index); |
| 2920 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2912 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
| 2921 | if (ret == -ENOSPC) { | 2913 | if (ret == -ENOSPC) { |
| 2922 | root->fs_info->last_trans_log_full_commit = trans->transid; | 2914 | btrfs_set_log_full_commit(root->fs_info, trans); |
| 2923 | ret = 0; | 2915 | ret = 0; |
| 2924 | } else if (ret < 0 && ret != -ENOENT) | 2916 | } else if (ret < 0 && ret != -ENOENT) |
| 2925 | btrfs_abort_transaction(trans, root, ret); | 2917 | btrfs_abort_transaction(trans, root, ret); |
| @@ -4130,8 +4122,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
| 4130 | * make sure any commits to the log are forced | 4122 | * make sure any commits to the log are forced |
| 4131 | * to be full commits | 4123 | * to be full commits |
| 4132 | */ | 4124 | */ |
| 4133 | root->fs_info->last_trans_log_full_commit = | 4125 | btrfs_set_log_full_commit(root->fs_info, trans); |
| 4134 | trans->transid; | ||
| 4135 | ret = 1; | 4126 | ret = 1; |
| 4136 | break; | 4127 | break; |
| 4137 | } | 4128 | } |
| @@ -4177,6 +4168,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 4177 | goto end_no_trans; | 4168 | goto end_no_trans; |
| 4178 | } | 4169 | } |
| 4179 | 4170 | ||
| 4171 | /* | ||
| 4172 | * The prev transaction commit doesn't complete, we need do | ||
| 4173 | * full commit by ourselves. | ||
| 4174 | */ | ||
| 4180 | if (root->fs_info->last_trans_log_full_commit > | 4175 | if (root->fs_info->last_trans_log_full_commit > |
| 4181 | root->fs_info->last_trans_committed) { | 4176 | root->fs_info->last_trans_committed) { |
| 4182 | ret = 1; | 4177 | ret = 1; |
| @@ -4246,7 +4241,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 4246 | end_trans: | 4241 | end_trans: |
| 4247 | dput(old_parent); | 4242 | dput(old_parent); |
| 4248 | if (ret < 0) { | 4243 | if (ret < 0) { |
| 4249 | root->fs_info->last_trans_log_full_commit = trans->transid; | 4244 | btrfs_set_log_full_commit(root->fs_info, trans); |
| 4250 | ret = 1; | 4245 | ret = 1; |
| 4251 | } | 4246 | } |
| 4252 | 4247 | ||
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 91b145fce333..7f5b41bd5373 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
| @@ -19,6 +19,9 @@ | |||
| 19 | #ifndef __TREE_LOG_ | 19 | #ifndef __TREE_LOG_ |
| 20 | #define __TREE_LOG_ | 20 | #define __TREE_LOG_ |
| 21 | 21 | ||
| 22 | #include "ctree.h" | ||
| 23 | #include "transaction.h" | ||
| 24 | |||
| 22 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ | 25 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ |
| 23 | #define BTRFS_NO_LOG_SYNC 256 | 26 | #define BTRFS_NO_LOG_SYNC 256 |
| 24 | 27 | ||
| @@ -35,6 +38,19 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) | |||
| 35 | INIT_LIST_HEAD(&ctx->list); | 38 | INIT_LIST_HEAD(&ctx->list); |
| 36 | } | 39 | } |
| 37 | 40 | ||
| 41 | static inline void btrfs_set_log_full_commit(struct btrfs_fs_info *fs_info, | ||
| 42 | struct btrfs_trans_handle *trans) | ||
| 43 | { | ||
| 44 | ACCESS_ONCE(fs_info->last_trans_log_full_commit) = trans->transid; | ||
| 45 | } | ||
| 46 | |||
| 47 | static inline int btrfs_need_log_full_commit(struct btrfs_fs_info *fs_info, | ||
| 48 | struct btrfs_trans_handle *trans) | ||
| 49 | { | ||
| 50 | return ACCESS_ONCE(fs_info->last_trans_log_full_commit) == | ||
| 51 | trans->transid; | ||
| 52 | } | ||
| 53 | |||
| 38 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 54 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
| 39 | struct btrfs_root *root, struct btrfs_log_ctx *ctx); | 55 | struct btrfs_root *root, struct btrfs_log_ctx *ctx); |
| 40 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 56 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 49d7fab73360..ffeed6d6326f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -1452,6 +1452,22 @@ out: | |||
| 1452 | return ret; | 1452 | return ret; |
| 1453 | } | 1453 | } |
| 1454 | 1454 | ||
| 1455 | /* | ||
| 1456 | * Function to update ctime/mtime for a given device path. | ||
| 1457 | * Mainly used for ctime/mtime based probe like libblkid. | ||
| 1458 | */ | ||
| 1459 | static void update_dev_time(char *path_name) | ||
| 1460 | { | ||
| 1461 | struct file *filp; | ||
| 1462 | |||
| 1463 | filp = filp_open(path_name, O_RDWR, 0); | ||
| 1464 | if (!filp) | ||
| 1465 | return; | ||
| 1466 | file_update_time(filp); | ||
| 1467 | filp_close(filp, NULL); | ||
| 1468 | return; | ||
| 1469 | } | ||
| 1470 | |||
| 1455 | static int btrfs_rm_dev_item(struct btrfs_root *root, | 1471 | static int btrfs_rm_dev_item(struct btrfs_root *root, |
| 1456 | struct btrfs_device *device) | 1472 | struct btrfs_device *device) |
| 1457 | { | 1473 | { |
| @@ -1674,11 +1690,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1674 | struct btrfs_fs_devices *fs_devices; | 1690 | struct btrfs_fs_devices *fs_devices; |
| 1675 | fs_devices = root->fs_info->fs_devices; | 1691 | fs_devices = root->fs_info->fs_devices; |
| 1676 | while (fs_devices) { | 1692 | while (fs_devices) { |
| 1677 | if (fs_devices->seed == cur_devices) | 1693 | if (fs_devices->seed == cur_devices) { |
| 1694 | fs_devices->seed = cur_devices->seed; | ||
| 1678 | break; | 1695 | break; |
| 1696 | } | ||
| 1679 | fs_devices = fs_devices->seed; | 1697 | fs_devices = fs_devices->seed; |
| 1680 | } | 1698 | } |
| 1681 | fs_devices->seed = cur_devices->seed; | ||
| 1682 | cur_devices->seed = NULL; | 1699 | cur_devices->seed = NULL; |
| 1683 | lock_chunks(root); | 1700 | lock_chunks(root); |
| 1684 | __btrfs_close_devices(cur_devices); | 1701 | __btrfs_close_devices(cur_devices); |
| @@ -1694,20 +1711,55 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1694 | * remove it from the devices list and zero out the old super | 1711 | * remove it from the devices list and zero out the old super |
| 1695 | */ | 1712 | */ |
| 1696 | if (clear_super && disk_super) { | 1713 | if (clear_super && disk_super) { |
| 1714 | u64 bytenr; | ||
| 1715 | int i; | ||
| 1716 | |||
| 1697 | /* make sure this device isn't detected as part of | 1717 | /* make sure this device isn't detected as part of |
| 1698 | * the FS anymore | 1718 | * the FS anymore |
| 1699 | */ | 1719 | */ |
| 1700 | memset(&disk_super->magic, 0, sizeof(disk_super->magic)); | 1720 | memset(&disk_super->magic, 0, sizeof(disk_super->magic)); |
| 1701 | set_buffer_dirty(bh); | 1721 | set_buffer_dirty(bh); |
| 1702 | sync_dirty_buffer(bh); | 1722 | sync_dirty_buffer(bh); |
| 1723 | |||
| 1724 | /* clear the mirror copies of super block on the disk | ||
| 1725 | * being removed, 0th copy is been taken care above and | ||
| 1726 | * the below would take of the rest | ||
| 1727 | */ | ||
| 1728 | for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) { | ||
| 1729 | bytenr = btrfs_sb_offset(i); | ||
| 1730 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= | ||
| 1731 | i_size_read(bdev->bd_inode)) | ||
| 1732 | break; | ||
| 1733 | |||
| 1734 | brelse(bh); | ||
| 1735 | bh = __bread(bdev, bytenr / 4096, | ||
| 1736 | BTRFS_SUPER_INFO_SIZE); | ||
| 1737 | if (!bh) | ||
| 1738 | continue; | ||
| 1739 | |||
| 1740 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
| 1741 | |||
| 1742 | if (btrfs_super_bytenr(disk_super) != bytenr || | ||
| 1743 | btrfs_super_magic(disk_super) != BTRFS_MAGIC) { | ||
| 1744 | continue; | ||
| 1745 | } | ||
| 1746 | memset(&disk_super->magic, 0, | ||
| 1747 | sizeof(disk_super->magic)); | ||
| 1748 | set_buffer_dirty(bh); | ||
| 1749 | sync_dirty_buffer(bh); | ||
| 1750 | } | ||
| 1703 | } | 1751 | } |
| 1704 | 1752 | ||
| 1705 | ret = 0; | 1753 | ret = 0; |
| 1706 | 1754 | ||
| 1707 | /* Notify udev that device has changed */ | 1755 | if (bdev) { |
| 1708 | if (bdev) | 1756 | /* Notify udev that device has changed */ |
| 1709 | btrfs_kobject_uevent(bdev, KOBJ_CHANGE); | 1757 | btrfs_kobject_uevent(bdev, KOBJ_CHANGE); |
| 1710 | 1758 | ||
| 1759 | /* Update ctime/mtime for device path for libblkid */ | ||
| 1760 | update_dev_time(device_path); | ||
| 1761 | } | ||
| 1762 | |||
| 1711 | error_brelse: | 1763 | error_brelse: |
| 1712 | brelse(bh); | 1764 | brelse(bh); |
| 1713 | if (bdev) | 1765 | if (bdev) |
| @@ -1883,7 +1935,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) | |||
| 1883 | fs_devices->seeding = 0; | 1935 | fs_devices->seeding = 0; |
| 1884 | fs_devices->num_devices = 0; | 1936 | fs_devices->num_devices = 0; |
| 1885 | fs_devices->open_devices = 0; | 1937 | fs_devices->open_devices = 0; |
| 1886 | fs_devices->total_devices = 0; | ||
| 1887 | fs_devices->seed = seed_devices; | 1938 | fs_devices->seed = seed_devices; |
| 1888 | 1939 | ||
| 1889 | generate_random_uuid(fs_devices->fsid); | 1940 | generate_random_uuid(fs_devices->fsid); |
| @@ -2146,6 +2197,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 2146 | ret = btrfs_commit_transaction(trans, root); | 2197 | ret = btrfs_commit_transaction(trans, root); |
| 2147 | } | 2198 | } |
| 2148 | 2199 | ||
| 2200 | /* Update ctime/mtime for libblkid */ | ||
| 2201 | update_dev_time(device_path); | ||
| 2149 | return ret; | 2202 | return ret; |
| 2150 | 2203 | ||
| 2151 | error_trans: | 2204 | error_trans: |
| @@ -2922,6 +2975,16 @@ static int should_balance_chunk(struct btrfs_root *root, | |||
| 2922 | return 0; | 2975 | return 0; |
| 2923 | } | 2976 | } |
| 2924 | 2977 | ||
| 2978 | /* | ||
| 2979 | * limited by count, must be the last filter | ||
| 2980 | */ | ||
| 2981 | if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) { | ||
| 2982 | if (bargs->limit == 0) | ||
| 2983 | return 0; | ||
| 2984 | else | ||
| 2985 | bargs->limit--; | ||
| 2986 | } | ||
| 2987 | |||
| 2925 | return 1; | 2988 | return 1; |
| 2926 | } | 2989 | } |
| 2927 | 2990 | ||
| @@ -2944,6 +3007,9 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) | |||
| 2944 | int ret; | 3007 | int ret; |
| 2945 | int enospc_errors = 0; | 3008 | int enospc_errors = 0; |
| 2946 | bool counting = true; | 3009 | bool counting = true; |
| 3010 | u64 limit_data = bctl->data.limit; | ||
| 3011 | u64 limit_meta = bctl->meta.limit; | ||
| 3012 | u64 limit_sys = bctl->sys.limit; | ||
| 2947 | 3013 | ||
| 2948 | /* step one make some room on all the devices */ | 3014 | /* step one make some room on all the devices */ |
| 2949 | devices = &fs_info->fs_devices->devices; | 3015 | devices = &fs_info->fs_devices->devices; |
| @@ -2982,6 +3048,11 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) | |||
| 2982 | memset(&bctl->stat, 0, sizeof(bctl->stat)); | 3048 | memset(&bctl->stat, 0, sizeof(bctl->stat)); |
| 2983 | spin_unlock(&fs_info->balance_lock); | 3049 | spin_unlock(&fs_info->balance_lock); |
| 2984 | again: | 3050 | again: |
| 3051 | if (!counting) { | ||
| 3052 | bctl->data.limit = limit_data; | ||
| 3053 | bctl->meta.limit = limit_meta; | ||
| 3054 | bctl->sys.limit = limit_sys; | ||
| 3055 | } | ||
| 2985 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; | 3056 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; |
| 2986 | key.offset = (u64)-1; | 3057 | key.offset = (u64)-1; |
| 2987 | key.type = BTRFS_CHUNK_ITEM_KEY; | 3058 | key.type = BTRFS_CHUNK_ITEM_KEY; |
| @@ -3881,7 +3952,8 @@ static int btrfs_add_system_chunk(struct btrfs_root *root, | |||
| 3881 | u8 *ptr; | 3952 | u8 *ptr; |
| 3882 | 3953 | ||
| 3883 | array_size = btrfs_super_sys_array_size(super_copy); | 3954 | array_size = btrfs_super_sys_array_size(super_copy); |
| 3884 | if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) | 3955 | if (array_size + item_size + sizeof(disk_key) |
| 3956 | > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) | ||
| 3885 | return -EFBIG; | 3957 | return -EFBIG; |
| 3886 | 3958 | ||
| 3887 | ptr = super_copy->sys_chunk_array + array_size; | 3959 | ptr = super_copy->sys_chunk_array + array_size; |
| @@ -3986,6 +4058,16 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) | |||
| 3986 | btrfs_set_fs_incompat(info, RAID56); | 4058 | btrfs_set_fs_incompat(info, RAID56); |
| 3987 | } | 4059 | } |
| 3988 | 4060 | ||
| 4061 | #define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r) \ | ||
| 4062 | - sizeof(struct btrfs_item) \ | ||
| 4063 | - sizeof(struct btrfs_chunk)) \ | ||
| 4064 | / sizeof(struct btrfs_stripe) + 1) | ||
| 4065 | |||
| 4066 | #define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \ | ||
| 4067 | - 2 * sizeof(struct btrfs_disk_key) \ | ||
| 4068 | - 2 * sizeof(struct btrfs_chunk)) \ | ||
| 4069 | / sizeof(struct btrfs_stripe) + 1) | ||
| 4070 | |||
| 3989 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 4071 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
| 3990 | struct btrfs_root *extent_root, u64 start, | 4072 | struct btrfs_root *extent_root, u64 start, |
| 3991 | u64 type) | 4073 | u64 type) |
| @@ -4035,6 +4117,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 4035 | if (type & BTRFS_BLOCK_GROUP_DATA) { | 4117 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
| 4036 | max_stripe_size = 1024 * 1024 * 1024; | 4118 | max_stripe_size = 1024 * 1024 * 1024; |
| 4037 | max_chunk_size = 10 * max_stripe_size; | 4119 | max_chunk_size = 10 * max_stripe_size; |
| 4120 | if (!devs_max) | ||
| 4121 | devs_max = BTRFS_MAX_DEVS(info->chunk_root); | ||
| 4038 | } else if (type & BTRFS_BLOCK_GROUP_METADATA) { | 4122 | } else if (type & BTRFS_BLOCK_GROUP_METADATA) { |
| 4039 | /* for larger filesystems, use larger metadata chunks */ | 4123 | /* for larger filesystems, use larger metadata chunks */ |
| 4040 | if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024) | 4124 | if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024) |
| @@ -4042,11 +4126,15 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 4042 | else | 4126 | else |
| 4043 | max_stripe_size = 256 * 1024 * 1024; | 4127 | max_stripe_size = 256 * 1024 * 1024; |
| 4044 | max_chunk_size = max_stripe_size; | 4128 | max_chunk_size = max_stripe_size; |
| 4129 | if (!devs_max) | ||
| 4130 | devs_max = BTRFS_MAX_DEVS(info->chunk_root); | ||
| 4045 | } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { | 4131 | } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { |
| 4046 | max_stripe_size = 32 * 1024 * 1024; | 4132 | max_stripe_size = 32 * 1024 * 1024; |
| 4047 | max_chunk_size = 2 * max_stripe_size; | 4133 | max_chunk_size = 2 * max_stripe_size; |
| 4134 | if (!devs_max) | ||
| 4135 | devs_max = BTRFS_MAX_DEVS_SYS_CHUNK; | ||
| 4048 | } else { | 4136 | } else { |
| 4049 | btrfs_err(info, "invalid chunk type 0x%llx requested\n", | 4137 | btrfs_err(info, "invalid chunk type 0x%llx requested", |
| 4050 | type); | 4138 | type); |
| 4051 | BUG_ON(1); | 4139 | BUG_ON(1); |
| 4052 | } | 4140 | } |
| @@ -4294,7 +4382,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 4294 | 4382 | ||
| 4295 | if (em->start != chunk_offset || em->len != chunk_size) { | 4383 | if (em->start != chunk_offset || em->len != chunk_size) { |
| 4296 | btrfs_crit(extent_root->fs_info, "found a bad mapping, wanted" | 4384 | btrfs_crit(extent_root->fs_info, "found a bad mapping, wanted" |
| 4297 | " %Lu-%Lu, found %Lu-%Lu\n", chunk_offset, | 4385 | " %Lu-%Lu, found %Lu-%Lu", chunk_offset, |
| 4298 | chunk_size, em->start, em->len); | 4386 | chunk_size, em->start, em->len); |
| 4299 | free_extent_map(em); | 4387 | free_extent_map(em); |
| 4300 | return -EINVAL; | 4388 | return -EINVAL; |
| @@ -4496,14 +4584,14 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) | |||
| 4496 | * and exit, so return 1 so the callers don't try to use other copies. | 4584 | * and exit, so return 1 so the callers don't try to use other copies. |
| 4497 | */ | 4585 | */ |
| 4498 | if (!em) { | 4586 | if (!em) { |
| 4499 | btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical, | 4587 | btrfs_crit(fs_info, "No mapping for %Lu-%Lu", logical, |
| 4500 | logical+len); | 4588 | logical+len); |
| 4501 | return 1; | 4589 | return 1; |
| 4502 | } | 4590 | } |
| 4503 | 4591 | ||
| 4504 | if (em->start > logical || em->start + em->len < logical) { | 4592 | if (em->start > logical || em->start + em->len < logical) { |
| 4505 | btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got " | 4593 | btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got " |
| 4506 | "%Lu-%Lu\n", logical, logical+len, em->start, | 4594 | "%Lu-%Lu", logical, logical+len, em->start, |
| 4507 | em->start + em->len); | 4595 | em->start + em->len); |
| 4508 | free_extent_map(em); | 4596 | free_extent_map(em); |
| 4509 | return 1; | 4597 | return 1; |
| @@ -4684,7 +4772,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
| 4684 | 4772 | ||
| 4685 | if (em->start > logical || em->start + em->len < logical) { | 4773 | if (em->start > logical || em->start + em->len < logical) { |
| 4686 | btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, " | 4774 | btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, " |
| 4687 | "found %Lu-%Lu\n", logical, em->start, | 4775 | "found %Lu-%Lu", logical, em->start, |
| 4688 | em->start + em->len); | 4776 | em->start + em->len); |
| 4689 | free_extent_map(em); | 4777 | free_extent_map(em); |
| 4690 | return -EINVAL; | 4778 | return -EINVAL; |
| @@ -6058,10 +6146,14 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info) | |||
| 6058 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | 6146 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
| 6059 | struct btrfs_device *device; | 6147 | struct btrfs_device *device; |
| 6060 | 6148 | ||
| 6061 | mutex_lock(&fs_devices->device_list_mutex); | 6149 | while (fs_devices) { |
| 6062 | list_for_each_entry(device, &fs_devices->devices, dev_list) | 6150 | mutex_lock(&fs_devices->device_list_mutex); |
| 6063 | device->dev_root = fs_info->dev_root; | 6151 | list_for_each_entry(device, &fs_devices->devices, dev_list) |
| 6064 | mutex_unlock(&fs_devices->device_list_mutex); | 6152 | device->dev_root = fs_info->dev_root; |
| 6153 | mutex_unlock(&fs_devices->device_list_mutex); | ||
| 6154 | |||
| 6155 | fs_devices = fs_devices->seed; | ||
| 6156 | } | ||
| 6065 | } | 6157 | } |
| 6066 | 6158 | ||
| 6067 | static void __btrfs_reset_dev_stats(struct btrfs_device *dev) | 6159 | static void __btrfs_reset_dev_stats(struct btrfs_device *dev) |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 80754f9dd3df..1a15bbeb65e2 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
| @@ -255,6 +255,7 @@ struct map_lookup { | |||
| 255 | #define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2) | 255 | #define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2) |
| 256 | #define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3) | 256 | #define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3) |
| 257 | #define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) | 257 | #define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) |
| 258 | #define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) | ||
| 258 | 259 | ||
| 259 | /* | 260 | /* |
| 260 | * Profile changing flags. When SOFT is set we won't relocate chunk if | 261 | * Profile changing flags. When SOFT is set we won't relocate chunk if |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 8e57191950cb..4f196314c0c1 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
| @@ -98,7 +98,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
| 98 | 98 | ||
| 99 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 99 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { |
| 100 | printk(KERN_WARNING "BTRFS: deflateInit failed\n"); | 100 | printk(KERN_WARNING "BTRFS: deflateInit failed\n"); |
| 101 | ret = -1; | 101 | ret = -EIO; |
| 102 | goto out; | 102 | goto out; |
| 103 | } | 103 | } |
| 104 | 104 | ||
| @@ -110,7 +110,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
| 110 | 110 | ||
| 111 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 111 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
| 112 | if (out_page == NULL) { | 112 | if (out_page == NULL) { |
| 113 | ret = -1; | 113 | ret = -ENOMEM; |
| 114 | goto out; | 114 | goto out; |
| 115 | } | 115 | } |
| 116 | cpage_out = kmap(out_page); | 116 | cpage_out = kmap(out_page); |
| @@ -128,7 +128,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
| 128 | printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n", | 128 | printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n", |
| 129 | ret); | 129 | ret); |
| 130 | zlib_deflateEnd(&workspace->def_strm); | 130 | zlib_deflateEnd(&workspace->def_strm); |
| 131 | ret = -1; | 131 | ret = -EIO; |
| 132 | goto out; | 132 | goto out; |
| 133 | } | 133 | } |
| 134 | 134 | ||
| @@ -136,7 +136,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
| 136 | if (workspace->def_strm.total_in > 8192 && | 136 | if (workspace->def_strm.total_in > 8192 && |
| 137 | workspace->def_strm.total_in < | 137 | workspace->def_strm.total_in < |
| 138 | workspace->def_strm.total_out) { | 138 | workspace->def_strm.total_out) { |
| 139 | ret = -1; | 139 | ret = -EIO; |
| 140 | goto out; | 140 | goto out; |
| 141 | } | 141 | } |
| 142 | /* we need another page for writing out. Test this | 142 | /* we need another page for writing out. Test this |
| @@ -147,12 +147,12 @@ static int zlib_compress_pages(struct list_head *ws, | |||
| 147 | kunmap(out_page); | 147 | kunmap(out_page); |
| 148 | if (nr_pages == nr_dest_pages) { | 148 | if (nr_pages == nr_dest_pages) { |
| 149 | out_page = NULL; | 149 | out_page = NULL; |
| 150 | ret = -1; | 150 | ret = -E2BIG; |
| 151 | goto out; | 151 | goto out; |
| 152 | } | 152 | } |
| 153 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 153 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
| 154 | if (out_page == NULL) { | 154 | if (out_page == NULL) { |
| 155 | ret = -1; | 155 | ret = -ENOMEM; |
| 156 | goto out; | 156 | goto out; |
| 157 | } | 157 | } |
| 158 | cpage_out = kmap(out_page); | 158 | cpage_out = kmap(out_page); |
| @@ -188,12 +188,12 @@ static int zlib_compress_pages(struct list_head *ws, | |||
| 188 | zlib_deflateEnd(&workspace->def_strm); | 188 | zlib_deflateEnd(&workspace->def_strm); |
| 189 | 189 | ||
| 190 | if (ret != Z_STREAM_END) { | 190 | if (ret != Z_STREAM_END) { |
| 191 | ret = -1; | 191 | ret = -EIO; |
| 192 | goto out; | 192 | goto out; |
| 193 | } | 193 | } |
| 194 | 194 | ||
| 195 | if (workspace->def_strm.total_out >= workspace->def_strm.total_in) { | 195 | if (workspace->def_strm.total_out >= workspace->def_strm.total_in) { |
| 196 | ret = -1; | 196 | ret = -E2BIG; |
| 197 | goto out; | 197 | goto out; |
| 198 | } | 198 | } |
| 199 | 199 | ||
| @@ -253,7 +253,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
| 253 | 253 | ||
| 254 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 254 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
| 255 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); | 255 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); |
| 256 | return -1; | 256 | return -EIO; |
| 257 | } | 257 | } |
| 258 | while (workspace->inf_strm.total_in < srclen) { | 258 | while (workspace->inf_strm.total_in < srclen) { |
| 259 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | 259 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); |
| @@ -295,7 +295,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
| 295 | } | 295 | } |
| 296 | } | 296 | } |
| 297 | if (ret != Z_STREAM_END) | 297 | if (ret != Z_STREAM_END) |
| 298 | ret = -1; | 298 | ret = -EIO; |
| 299 | else | 299 | else |
| 300 | ret = 0; | 300 | ret = 0; |
| 301 | done: | 301 | done: |
| @@ -337,7 +337,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
| 337 | 337 | ||
| 338 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 338 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
| 339 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); | 339 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); |
| 340 | return -1; | 340 | return -EIO; |
| 341 | } | 341 | } |
| 342 | 342 | ||
| 343 | while (bytes_left > 0) { | 343 | while (bytes_left > 0) { |
| @@ -354,7 +354,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
| 354 | total_out = workspace->inf_strm.total_out; | 354 | total_out = workspace->inf_strm.total_out; |
| 355 | 355 | ||
| 356 | if (total_out == buf_start) { | 356 | if (total_out == buf_start) { |
| 357 | ret = -1; | 357 | ret = -EIO; |
| 358 | break; | 358 | break; |
| 359 | } | 359 | } |
| 360 | 360 | ||
| @@ -382,7 +382,7 @@ next: | |||
| 382 | } | 382 | } |
| 383 | 383 | ||
| 384 | if (ret != Z_STREAM_END && bytes_left != 0) | 384 | if (ret != Z_STREAM_END && bytes_left != 0) |
| 385 | ret = -1; | 385 | ret = -EIO; |
| 386 | else | 386 | else |
| 387 | ret = 0; | 387 | ret = 0; |
| 388 | 388 | ||
