diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/backref.c | 1 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 5 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.c | 34 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.h | 3 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 56 | ||||
-rw-r--r-- | fs/btrfs/disk-io.h | 2 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 185 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 1 | ||||
-rw-r--r-- | fs/btrfs/file.c | 36 | ||||
-rw-r--r-- | fs/btrfs/inode-map.c | 3 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 83 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 2 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 62 | ||||
-rw-r--r-- | fs/btrfs/qgroup.h | 36 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 126 | ||||
-rw-r--r-- | fs/btrfs/root-tree.c | 27 | ||||
-rw-r--r-- | fs/btrfs/send.c | 173 | ||||
-rw-r--r-- | fs/btrfs/super.c | 16 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 7 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 106 | ||||
-rw-r--r-- | fs/btrfs/tree-log.h | 5 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 27 |
22 files changed, 756 insertions, 240 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 2b88439c2ee8..455a6b2fd539 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -589,6 +589,7 @@ static void __merge_refs(struct list_head *head, int mode) | |||
589 | 589 | ||
590 | list_del(&ref2->list); | 590 | list_del(&ref2->list); |
591 | kmem_cache_free(btrfs_prelim_ref_cache, ref2); | 591 | kmem_cache_free(btrfs_prelim_ref_cache, ref2); |
592 | cond_resched(); | ||
592 | } | 593 | } |
593 | 594 | ||
594 | } | 595 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2fe8f89091a3..eff3993c77b3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -1028,6 +1028,7 @@ struct btrfs_fs_info { | |||
1028 | struct btrfs_workqueue *qgroup_rescan_workers; | 1028 | struct btrfs_workqueue *qgroup_rescan_workers; |
1029 | struct completion qgroup_rescan_completion; | 1029 | struct completion qgroup_rescan_completion; |
1030 | struct btrfs_work qgroup_rescan_work; | 1030 | struct btrfs_work qgroup_rescan_work; |
1031 | bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */ | ||
1031 | 1032 | ||
1032 | /* filesystem state */ | 1033 | /* filesystem state */ |
1033 | unsigned long fs_state; | 1034 | unsigned long fs_state; |
@@ -1079,6 +1080,8 @@ struct btrfs_fs_info { | |||
1079 | struct list_head pinned_chunks; | 1080 | struct list_head pinned_chunks; |
1080 | 1081 | ||
1081 | int creating_free_space_tree; | 1082 | int creating_free_space_tree; |
1083 | /* Used to record internally whether fs has been frozen */ | ||
1084 | int fs_frozen; | ||
1082 | }; | 1085 | }; |
1083 | 1086 | ||
1084 | struct btrfs_subvolume_writers { | 1087 | struct btrfs_subvolume_writers { |
@@ -2578,7 +2581,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
2578 | struct btrfs_root *root, | 2581 | struct btrfs_root *root, |
2579 | u64 root_objectid, u64 owner, u64 offset, | 2582 | u64 root_objectid, u64 owner, u64 offset, |
2580 | struct btrfs_key *ins); | 2583 | struct btrfs_key *ins); |
2581 | int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, | 2584 | int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, |
2582 | u64 min_alloc_size, u64 empty_size, u64 hint_byte, | 2585 | u64 min_alloc_size, u64 empty_size, u64 hint_byte, |
2583 | struct btrfs_key *ins, int is_data, int delalloc); | 2586 | struct btrfs_key *ins, int is_data, int delalloc); |
2584 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2587 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index b6d210e7a993..ac02e041464b 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -541,7 +541,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, | |||
541 | struct btrfs_delayed_ref_head *existing; | 541 | struct btrfs_delayed_ref_head *existing; |
542 | struct btrfs_delayed_ref_head *head_ref = NULL; | 542 | struct btrfs_delayed_ref_head *head_ref = NULL; |
543 | struct btrfs_delayed_ref_root *delayed_refs; | 543 | struct btrfs_delayed_ref_root *delayed_refs; |
544 | struct btrfs_qgroup_extent_record *qexisting; | ||
545 | int count_mod = 1; | 544 | int count_mod = 1; |
546 | int must_insert_reserved = 0; | 545 | int must_insert_reserved = 0; |
547 | 546 | ||
@@ -606,10 +605,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, | |||
606 | qrecord->num_bytes = num_bytes; | 605 | qrecord->num_bytes = num_bytes; |
607 | qrecord->old_roots = NULL; | 606 | qrecord->old_roots = NULL; |
608 | 607 | ||
609 | qexisting = btrfs_qgroup_insert_dirty_extent(fs_info, | 608 | if(btrfs_qgroup_insert_dirty_extent_nolock(fs_info, |
610 | delayed_refs, | 609 | delayed_refs, qrecord)) |
611 | qrecord); | ||
612 | if (qexisting) | ||
613 | kfree(qrecord); | 610 | kfree(qrecord); |
614 | } | 611 | } |
615 | 612 | ||
@@ -862,33 +859,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
862 | return 0; | 859 | return 0; |
863 | } | 860 | } |
864 | 861 | ||
865 | int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info, | ||
866 | struct btrfs_trans_handle *trans, | ||
867 | u64 ref_root, u64 bytenr, u64 num_bytes) | ||
868 | { | ||
869 | struct btrfs_delayed_ref_root *delayed_refs; | ||
870 | struct btrfs_delayed_ref_head *ref_head; | ||
871 | int ret = 0; | ||
872 | |||
873 | if (!fs_info->quota_enabled || !is_fstree(ref_root)) | ||
874 | return 0; | ||
875 | |||
876 | delayed_refs = &trans->transaction->delayed_refs; | ||
877 | |||
878 | spin_lock(&delayed_refs->lock); | ||
879 | ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0); | ||
880 | if (!ref_head) { | ||
881 | ret = -ENOENT; | ||
882 | goto out; | ||
883 | } | ||
884 | WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root); | ||
885 | ref_head->qgroup_ref_root = ref_root; | ||
886 | ref_head->qgroup_reserved = num_bytes; | ||
887 | out: | ||
888 | spin_unlock(&delayed_refs->lock); | ||
889 | return ret; | ||
890 | } | ||
891 | |||
892 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | 862 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, |
893 | struct btrfs_trans_handle *trans, | 863 | struct btrfs_trans_handle *trans, |
894 | u64 bytenr, u64 num_bytes, | 864 | u64 bytenr, u64 num_bytes, |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 5fca9534a271..43f3629760e9 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -250,9 +250,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
250 | u64 parent, u64 ref_root, | 250 | u64 parent, u64 ref_root, |
251 | u64 owner, u64 offset, u64 reserved, int action, | 251 | u64 owner, u64 offset, u64 reserved, int action, |
252 | struct btrfs_delayed_extent_op *extent_op); | 252 | struct btrfs_delayed_extent_op *extent_op); |
253 | int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info, | ||
254 | struct btrfs_trans_handle *trans, | ||
255 | u64 ref_root, u64 bytenr, u64 num_bytes); | ||
256 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | 253 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, |
257 | struct btrfs_trans_handle *trans, | 254 | struct btrfs_trans_handle *trans, |
258 | u64 bytenr, u64 num_bytes, | 255 | u64 bytenr, u64 num_bytes, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 59febfb8d04a..54bc8c7c6bcd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -559,8 +559,29 @@ static noinline int check_leaf(struct btrfs_root *root, | |||
559 | u32 nritems = btrfs_header_nritems(leaf); | 559 | u32 nritems = btrfs_header_nritems(leaf); |
560 | int slot; | 560 | int slot; |
561 | 561 | ||
562 | if (nritems == 0) | 562 | if (nritems == 0) { |
563 | struct btrfs_root *check_root; | ||
564 | |||
565 | key.objectid = btrfs_header_owner(leaf); | ||
566 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
567 | key.offset = (u64)-1; | ||
568 | |||
569 | check_root = btrfs_get_fs_root(root->fs_info, &key, false); | ||
570 | /* | ||
571 | * The only reason we also check NULL here is that during | ||
572 | * open_ctree() some roots has not yet been set up. | ||
573 | */ | ||
574 | if (!IS_ERR_OR_NULL(check_root)) { | ||
575 | /* if leaf is the root, then it's fine */ | ||
576 | if (leaf->start != | ||
577 | btrfs_root_bytenr(&check_root->root_item)) { | ||
578 | CORRUPT("non-root leaf's nritems is 0", | ||
579 | leaf, root, 0); | ||
580 | return -EIO; | ||
581 | } | ||
582 | } | ||
563 | return 0; | 583 | return 0; |
584 | } | ||
564 | 585 | ||
565 | /* Check the 0 item */ | 586 | /* Check the 0 item */ |
566 | if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != | 587 | if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != |
@@ -612,6 +633,19 @@ static noinline int check_leaf(struct btrfs_root *root, | |||
612 | return 0; | 633 | return 0; |
613 | } | 634 | } |
614 | 635 | ||
636 | static int check_node(struct btrfs_root *root, struct extent_buffer *node) | ||
637 | { | ||
638 | unsigned long nr = btrfs_header_nritems(node); | ||
639 | |||
640 | if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) { | ||
641 | btrfs_crit(root->fs_info, | ||
642 | "corrupt node: block %llu root %llu nritems %lu", | ||
643 | node->start, root->objectid, nr); | ||
644 | return -EIO; | ||
645 | } | ||
646 | return 0; | ||
647 | } | ||
648 | |||
615 | static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | 649 | static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, |
616 | u64 phy_offset, struct page *page, | 650 | u64 phy_offset, struct page *page, |
617 | u64 start, u64 end, int mirror) | 651 | u64 start, u64 end, int mirror) |
@@ -682,6 +716,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
682 | ret = -EIO; | 716 | ret = -EIO; |
683 | } | 717 | } |
684 | 718 | ||
719 | if (found_level > 0 && check_node(root, eb)) | ||
720 | ret = -EIO; | ||
721 | |||
685 | if (!ret) | 722 | if (!ret) |
686 | set_extent_buffer_uptodate(eb); | 723 | set_extent_buffer_uptodate(eb); |
687 | err: | 724 | err: |
@@ -1618,8 +1655,8 @@ fail: | |||
1618 | return ret; | 1655 | return ret; |
1619 | } | 1656 | } |
1620 | 1657 | ||
1621 | static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | 1658 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, |
1622 | u64 root_id) | 1659 | u64 root_id) |
1623 | { | 1660 | { |
1624 | struct btrfs_root *root; | 1661 | struct btrfs_root *root; |
1625 | 1662 | ||
@@ -2298,6 +2335,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) | |||
2298 | fs_info->quota_enabled = 0; | 2335 | fs_info->quota_enabled = 0; |
2299 | fs_info->pending_quota_state = 0; | 2336 | fs_info->pending_quota_state = 0; |
2300 | fs_info->qgroup_ulist = NULL; | 2337 | fs_info->qgroup_ulist = NULL; |
2338 | fs_info->qgroup_rescan_running = false; | ||
2301 | mutex_init(&fs_info->qgroup_rescan_lock); | 2339 | mutex_init(&fs_info->qgroup_rescan_lock); |
2302 | } | 2340 | } |
2303 | 2341 | ||
@@ -2624,6 +2662,7 @@ int open_ctree(struct super_block *sb, | |||
2624 | atomic_set(&fs_info->qgroup_op_seq, 0); | 2662 | atomic_set(&fs_info->qgroup_op_seq, 0); |
2625 | atomic_set(&fs_info->reada_works_cnt, 0); | 2663 | atomic_set(&fs_info->reada_works_cnt, 0); |
2626 | atomic64_set(&fs_info->tree_mod_seq, 0); | 2664 | atomic64_set(&fs_info->tree_mod_seq, 0); |
2665 | fs_info->fs_frozen = 0; | ||
2627 | fs_info->sb = sb; | 2666 | fs_info->sb = sb; |
2628 | fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; | 2667 | fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; |
2629 | fs_info->metadata_ratio = 0; | 2668 | fs_info->metadata_ratio = 0; |
@@ -3739,8 +3778,15 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, | |||
3739 | if (btrfs_root_refs(&root->root_item) == 0) | 3778 | if (btrfs_root_refs(&root->root_item) == 0) |
3740 | synchronize_srcu(&fs_info->subvol_srcu); | 3779 | synchronize_srcu(&fs_info->subvol_srcu); |
3741 | 3780 | ||
3742 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) | 3781 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { |
3743 | btrfs_free_log(NULL, root); | 3782 | btrfs_free_log(NULL, root); |
3783 | if (root->reloc_root) { | ||
3784 | free_extent_buffer(root->reloc_root->node); | ||
3785 | free_extent_buffer(root->reloc_root->commit_root); | ||
3786 | btrfs_put_fs_root(root->reloc_root); | ||
3787 | root->reloc_root = NULL; | ||
3788 | } | ||
3789 | } | ||
3744 | 3790 | ||
3745 | if (root->free_ino_pinned) | 3791 | if (root->free_ino_pinned) |
3746 | __btrfs_remove_free_space_cache(root->free_ino_pinned); | 3792 | __btrfs_remove_free_space_cache(root->free_ino_pinned); |
@@ -3851,7 +3897,7 @@ void close_ctree(struct btrfs_root *root) | |||
3851 | smp_mb(); | 3897 | smp_mb(); |
3852 | 3898 | ||
3853 | /* wait for the qgroup rescan worker to stop */ | 3899 | /* wait for the qgroup rescan worker to stop */ |
3854 | btrfs_qgroup_wait_for_completion(fs_info); | 3900 | btrfs_qgroup_wait_for_completion(fs_info, false); |
3855 | 3901 | ||
3856 | /* wait for the uuid_scan task to finish */ | 3902 | /* wait for the uuid_scan task to finish */ |
3857 | down(&fs_info->uuid_tree_rescan_sem); | 3903 | down(&fs_info->uuid_tree_rescan_sem); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index b3207a0e09f7..f19a982f5a4f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -68,6 +68,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, | |||
68 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | 68 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, |
69 | struct btrfs_key *location); | 69 | struct btrfs_key *location); |
70 | int btrfs_init_fs_root(struct btrfs_root *root); | 70 | int btrfs_init_fs_root(struct btrfs_root *root); |
71 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | ||
72 | u64 root_id); | ||
71 | int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, | 73 | int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, |
72 | struct btrfs_root *root); | 74 | struct btrfs_root *root); |
73 | void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); | 75 | void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 61b494e8e604..0450dc410533 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -60,21 +60,6 @@ enum { | |||
60 | CHUNK_ALLOC_FORCE = 2, | 60 | CHUNK_ALLOC_FORCE = 2, |
61 | }; | 61 | }; |
62 | 62 | ||
63 | /* | ||
64 | * Control how reservations are dealt with. | ||
65 | * | ||
66 | * RESERVE_FREE - freeing a reservation. | ||
67 | * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for | ||
68 | * ENOSPC accounting | ||
69 | * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update | ||
70 | * bytes_may_use as the ENOSPC accounting is done elsewhere | ||
71 | */ | ||
72 | enum { | ||
73 | RESERVE_FREE = 0, | ||
74 | RESERVE_ALLOC = 1, | ||
75 | RESERVE_ALLOC_NO_ACCOUNT = 2, | ||
76 | }; | ||
77 | |||
78 | static int update_block_group(struct btrfs_trans_handle *trans, | 63 | static int update_block_group(struct btrfs_trans_handle *trans, |
79 | struct btrfs_root *root, u64 bytenr, | 64 | struct btrfs_root *root, u64 bytenr, |
80 | u64 num_bytes, int alloc); | 65 | u64 num_bytes, int alloc); |
@@ -104,9 +89,10 @@ static int find_next_key(struct btrfs_path *path, int level, | |||
104 | struct btrfs_key *key); | 89 | struct btrfs_key *key); |
105 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 90 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
106 | int dump_block_groups); | 91 | int dump_block_groups); |
107 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | 92 | static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, |
108 | u64 num_bytes, int reserve, | 93 | u64 ram_bytes, u64 num_bytes, int delalloc); |
109 | int delalloc); | 94 | static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, |
95 | u64 num_bytes, int delalloc); | ||
110 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | 96 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, |
111 | u64 num_bytes); | 97 | u64 num_bytes); |
112 | int btrfs_pin_extent(struct btrfs_root *root, | 98 | int btrfs_pin_extent(struct btrfs_root *root, |
@@ -3501,7 +3487,6 @@ again: | |||
3501 | dcs = BTRFS_DC_SETUP; | 3487 | dcs = BTRFS_DC_SETUP; |
3502 | else if (ret == -ENOSPC) | 3488 | else if (ret == -ENOSPC) |
3503 | set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags); | 3489 | set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags); |
3504 | btrfs_free_reserved_data_space(inode, 0, num_pages); | ||
3505 | 3490 | ||
3506 | out_put: | 3491 | out_put: |
3507 | iput(inode); | 3492 | iput(inode); |
@@ -4472,6 +4457,15 @@ void check_system_chunk(struct btrfs_trans_handle *trans, | |||
4472 | } | 4457 | } |
4473 | } | 4458 | } |
4474 | 4459 | ||
4460 | /* | ||
4461 | * If force is CHUNK_ALLOC_FORCE: | ||
4462 | * - return 1 if it successfully allocates a chunk, | ||
4463 | * - return errors including -ENOSPC otherwise. | ||
4464 | * If force is NOT CHUNK_ALLOC_FORCE: | ||
4465 | * - return 0 if it doesn't need to allocate a new chunk, | ||
4466 | * - return 1 if it successfully allocates a chunk, | ||
4467 | * - return errors including -ENOSPC otherwise. | ||
4468 | */ | ||
4475 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 4469 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
4476 | struct btrfs_root *extent_root, u64 flags, int force) | 4470 | struct btrfs_root *extent_root, u64 flags, int force) |
4477 | { | 4471 | { |
@@ -4882,7 +4876,7 @@ static int flush_space(struct btrfs_root *root, | |||
4882 | btrfs_get_alloc_profile(root, 0), | 4876 | btrfs_get_alloc_profile(root, 0), |
4883 | CHUNK_ALLOC_NO_FORCE); | 4877 | CHUNK_ALLOC_NO_FORCE); |
4884 | btrfs_end_transaction(trans, root); | 4878 | btrfs_end_transaction(trans, root); |
4885 | if (ret == -ENOSPC) | 4879 | if (ret > 0 || ret == -ENOSPC) |
4886 | ret = 0; | 4880 | ret = 0; |
4887 | break; | 4881 | break; |
4888 | case COMMIT_TRANS: | 4882 | case COMMIT_TRANS: |
@@ -6497,19 +6491,15 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) | |||
6497 | } | 6491 | } |
6498 | 6492 | ||
6499 | /** | 6493 | /** |
6500 | * btrfs_update_reserved_bytes - update the block_group and space info counters | 6494 | * btrfs_add_reserved_bytes - update the block_group and space info counters |
6501 | * @cache: The cache we are manipulating | 6495 | * @cache: The cache we are manipulating |
6496 | * @ram_bytes: The number of bytes of file content, and will be same to | ||
6497 | * @num_bytes except for the compress path. | ||
6502 | * @num_bytes: The number of bytes in question | 6498 | * @num_bytes: The number of bytes in question |
6503 | * @reserve: One of the reservation enums | ||
6504 | * @delalloc: The blocks are allocated for the delalloc write | 6499 | * @delalloc: The blocks are allocated for the delalloc write |
6505 | * | 6500 | * |
6506 | * This is called by the allocator when it reserves space, or by somebody who is | 6501 | * This is called by the allocator when it reserves space. Metadata |
6507 | * freeing space that was never actually used on disk. For example if you | 6502 | * reservations should be called with RESERVE_ALLOC so we do the proper |
6508 | * reserve some space for a new leaf in transaction A and before transaction A | ||
6509 | * commits you free that leaf, you call this with reserve set to 0 in order to | ||
6510 | * clear the reservation. | ||
6511 | * | ||
6512 | * Metadata reservations should be called with RESERVE_ALLOC so we do the proper | ||
6513 | * ENOSPC accounting. For data we handle the reservation through clearing the | 6503 | * ENOSPC accounting. For data we handle the reservation through clearing the |
6514 | * delalloc bits in the io_tree. We have to do this since we could end up | 6504 | * delalloc bits in the io_tree. We have to do this since we could end up |
6515 | * allocating less disk space for the amount of data we have reserved in the | 6505 | * allocating less disk space for the amount of data we have reserved in the |
@@ -6519,44 +6509,63 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) | |||
6519 | * make the reservation and return -EAGAIN, otherwise this function always | 6509 | * make the reservation and return -EAGAIN, otherwise this function always |
6520 | * succeeds. | 6510 | * succeeds. |
6521 | */ | 6511 | */ |
6522 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | 6512 | static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, |
6523 | u64 num_bytes, int reserve, int delalloc) | 6513 | u64 ram_bytes, u64 num_bytes, int delalloc) |
6524 | { | 6514 | { |
6525 | struct btrfs_space_info *space_info = cache->space_info; | 6515 | struct btrfs_space_info *space_info = cache->space_info; |
6526 | int ret = 0; | 6516 | int ret = 0; |
6527 | 6517 | ||
6528 | spin_lock(&space_info->lock); | 6518 | spin_lock(&space_info->lock); |
6529 | spin_lock(&cache->lock); | 6519 | spin_lock(&cache->lock); |
6530 | if (reserve != RESERVE_FREE) { | 6520 | if (cache->ro) { |
6531 | if (cache->ro) { | 6521 | ret = -EAGAIN; |
6532 | ret = -EAGAIN; | ||
6533 | } else { | ||
6534 | cache->reserved += num_bytes; | ||
6535 | space_info->bytes_reserved += num_bytes; | ||
6536 | if (reserve == RESERVE_ALLOC) { | ||
6537 | trace_btrfs_space_reservation(cache->fs_info, | ||
6538 | "space_info", space_info->flags, | ||
6539 | num_bytes, 0); | ||
6540 | space_info->bytes_may_use -= num_bytes; | ||
6541 | } | ||
6542 | |||
6543 | if (delalloc) | ||
6544 | cache->delalloc_bytes += num_bytes; | ||
6545 | } | ||
6546 | } else { | 6522 | } else { |
6547 | if (cache->ro) | 6523 | cache->reserved += num_bytes; |
6548 | space_info->bytes_readonly += num_bytes; | 6524 | space_info->bytes_reserved += num_bytes; |
6549 | cache->reserved -= num_bytes; | ||
6550 | space_info->bytes_reserved -= num_bytes; | ||
6551 | 6525 | ||
6526 | trace_btrfs_space_reservation(cache->fs_info, | ||
6527 | "space_info", space_info->flags, | ||
6528 | ram_bytes, 0); | ||
6529 | space_info->bytes_may_use -= ram_bytes; | ||
6552 | if (delalloc) | 6530 | if (delalloc) |
6553 | cache->delalloc_bytes -= num_bytes; | 6531 | cache->delalloc_bytes += num_bytes; |
6554 | } | 6532 | } |
6555 | spin_unlock(&cache->lock); | 6533 | spin_unlock(&cache->lock); |
6556 | spin_unlock(&space_info->lock); | 6534 | spin_unlock(&space_info->lock); |
6557 | return ret; | 6535 | return ret; |
6558 | } | 6536 | } |
6559 | 6537 | ||
6538 | /** | ||
6539 | * btrfs_free_reserved_bytes - update the block_group and space info counters | ||
6540 | * @cache: The cache we are manipulating | ||
6541 | * @num_bytes: The number of bytes in question | ||
6542 | * @delalloc: The blocks are allocated for the delalloc write | ||
6543 | * | ||
6544 | * This is called by somebody who is freeing space that was never actually used | ||
6545 | * on disk. For example if you reserve some space for a new leaf in transaction | ||
6546 | * A and before transaction A commits you free that leaf, you call this with | ||
6547 | * reserve set to 0 in order to clear the reservation. | ||
6548 | */ | ||
6549 | |||
6550 | static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
6551 | u64 num_bytes, int delalloc) | ||
6552 | { | ||
6553 | struct btrfs_space_info *space_info = cache->space_info; | ||
6554 | int ret = 0; | ||
6555 | |||
6556 | spin_lock(&space_info->lock); | ||
6557 | spin_lock(&cache->lock); | ||
6558 | if (cache->ro) | ||
6559 | space_info->bytes_readonly += num_bytes; | ||
6560 | cache->reserved -= num_bytes; | ||
6561 | space_info->bytes_reserved -= num_bytes; | ||
6562 | |||
6563 | if (delalloc) | ||
6564 | cache->delalloc_bytes -= num_bytes; | ||
6565 | spin_unlock(&cache->lock); | ||
6566 | spin_unlock(&space_info->lock); | ||
6567 | return ret; | ||
6568 | } | ||
6560 | void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 6569 | void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
6561 | struct btrfs_root *root) | 6570 | struct btrfs_root *root) |
6562 | { | 6571 | { |
@@ -7191,7 +7200,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
7191 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | 7200 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); |
7192 | 7201 | ||
7193 | btrfs_add_free_space(cache, buf->start, buf->len); | 7202 | btrfs_add_free_space(cache, buf->start, buf->len); |
7194 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); | 7203 | btrfs_free_reserved_bytes(cache, buf->len, 0); |
7195 | btrfs_put_block_group(cache); | 7204 | btrfs_put_block_group(cache); |
7196 | trace_btrfs_reserved_extent_free(root, buf->start, buf->len); | 7205 | trace_btrfs_reserved_extent_free(root, buf->start, buf->len); |
7197 | pin = 0; | 7206 | pin = 0; |
@@ -7416,9 +7425,9 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache, | |||
7416 | * the free space extent currently. | 7425 | * the free space extent currently. |
7417 | */ | 7426 | */ |
7418 | static noinline int find_free_extent(struct btrfs_root *orig_root, | 7427 | static noinline int find_free_extent(struct btrfs_root *orig_root, |
7419 | u64 num_bytes, u64 empty_size, | 7428 | u64 ram_bytes, u64 num_bytes, u64 empty_size, |
7420 | u64 hint_byte, struct btrfs_key *ins, | 7429 | u64 hint_byte, struct btrfs_key *ins, |
7421 | u64 flags, int delalloc) | 7430 | u64 flags, int delalloc) |
7422 | { | 7431 | { |
7423 | int ret = 0; | 7432 | int ret = 0; |
7424 | struct btrfs_root *root = orig_root->fs_info->extent_root; | 7433 | struct btrfs_root *root = orig_root->fs_info->extent_root; |
@@ -7430,8 +7439,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, | |||
7430 | struct btrfs_space_info *space_info; | 7439 | struct btrfs_space_info *space_info; |
7431 | int loop = 0; | 7440 | int loop = 0; |
7432 | int index = __get_raid_index(flags); | 7441 | int index = __get_raid_index(flags); |
7433 | int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ? | ||
7434 | RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; | ||
7435 | bool failed_cluster_refill = false; | 7442 | bool failed_cluster_refill = false; |
7436 | bool failed_alloc = false; | 7443 | bool failed_alloc = false; |
7437 | bool use_cluster = true; | 7444 | bool use_cluster = true; |
@@ -7763,8 +7770,8 @@ checks: | |||
7763 | search_start - offset); | 7770 | search_start - offset); |
7764 | BUG_ON(offset > search_start); | 7771 | BUG_ON(offset > search_start); |
7765 | 7772 | ||
7766 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, | 7773 | ret = btrfs_add_reserved_bytes(block_group, ram_bytes, |
7767 | alloc_type, delalloc); | 7774 | num_bytes, delalloc); |
7768 | if (ret == -EAGAIN) { | 7775 | if (ret == -EAGAIN) { |
7769 | btrfs_add_free_space(block_group, offset, num_bytes); | 7776 | btrfs_add_free_space(block_group, offset, num_bytes); |
7770 | goto loop; | 7777 | goto loop; |
@@ -7936,7 +7943,7 @@ again: | |||
7936 | up_read(&info->groups_sem); | 7943 | up_read(&info->groups_sem); |
7937 | } | 7944 | } |
7938 | 7945 | ||
7939 | int btrfs_reserve_extent(struct btrfs_root *root, | 7946 | int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, |
7940 | u64 num_bytes, u64 min_alloc_size, | 7947 | u64 num_bytes, u64 min_alloc_size, |
7941 | u64 empty_size, u64 hint_byte, | 7948 | u64 empty_size, u64 hint_byte, |
7942 | struct btrfs_key *ins, int is_data, int delalloc) | 7949 | struct btrfs_key *ins, int is_data, int delalloc) |
@@ -7948,8 +7955,8 @@ int btrfs_reserve_extent(struct btrfs_root *root, | |||
7948 | flags = btrfs_get_alloc_profile(root, is_data); | 7955 | flags = btrfs_get_alloc_profile(root, is_data); |
7949 | again: | 7956 | again: |
7950 | WARN_ON(num_bytes < root->sectorsize); | 7957 | WARN_ON(num_bytes < root->sectorsize); |
7951 | ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, | 7958 | ret = find_free_extent(root, ram_bytes, num_bytes, empty_size, |
7952 | flags, delalloc); | 7959 | hint_byte, ins, flags, delalloc); |
7953 | if (!ret && !is_data) { | 7960 | if (!ret && !is_data) { |
7954 | btrfs_dec_block_group_reservations(root->fs_info, | 7961 | btrfs_dec_block_group_reservations(root->fs_info, |
7955 | ins->objectid); | 7962 | ins->objectid); |
@@ -7958,6 +7965,7 @@ again: | |||
7958 | num_bytes = min(num_bytes >> 1, ins->offset); | 7965 | num_bytes = min(num_bytes >> 1, ins->offset); |
7959 | num_bytes = round_down(num_bytes, root->sectorsize); | 7966 | num_bytes = round_down(num_bytes, root->sectorsize); |
7960 | num_bytes = max(num_bytes, min_alloc_size); | 7967 | num_bytes = max(num_bytes, min_alloc_size); |
7968 | ram_bytes = num_bytes; | ||
7961 | if (num_bytes == min_alloc_size) | 7969 | if (num_bytes == min_alloc_size) |
7962 | final_tried = true; | 7970 | final_tried = true; |
7963 | goto again; | 7971 | goto again; |
@@ -7995,7 +8003,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, | |||
7995 | if (btrfs_test_opt(root->fs_info, DISCARD)) | 8003 | if (btrfs_test_opt(root->fs_info, DISCARD)) |
7996 | ret = btrfs_discard_extent(root, start, len, NULL); | 8004 | ret = btrfs_discard_extent(root, start, len, NULL); |
7997 | btrfs_add_free_space(cache, start, len); | 8005 | btrfs_add_free_space(cache, start, len); |
7998 | btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); | 8006 | btrfs_free_reserved_bytes(cache, len, delalloc); |
7999 | trace_btrfs_reserved_extent_free(root, start, len); | 8007 | trace_btrfs_reserved_extent_free(root, start, len); |
8000 | } | 8008 | } |
8001 | 8009 | ||
@@ -8223,8 +8231,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
8223 | if (!block_group) | 8231 | if (!block_group) |
8224 | return -EINVAL; | 8232 | return -EINVAL; |
8225 | 8233 | ||
8226 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, | 8234 | ret = btrfs_add_reserved_bytes(block_group, ins->offset, |
8227 | RESERVE_ALLOC_NO_ACCOUNT, 0); | 8235 | ins->offset, 0); |
8228 | BUG_ON(ret); /* logic error */ | 8236 | BUG_ON(ret); /* logic error */ |
8229 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 8237 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
8230 | 0, owner, offset, ins, 1); | 8238 | 0, owner, offset, ins, 1); |
@@ -8368,7 +8376,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, | |||
8368 | if (IS_ERR(block_rsv)) | 8376 | if (IS_ERR(block_rsv)) |
8369 | return ERR_CAST(block_rsv); | 8377 | return ERR_CAST(block_rsv); |
8370 | 8378 | ||
8371 | ret = btrfs_reserve_extent(root, blocksize, blocksize, | 8379 | ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize, |
8372 | empty_size, hint, &ins, 0, 0); | 8380 | empty_size, hint, &ins, 0, 0); |
8373 | if (ret) | 8381 | if (ret) |
8374 | goto out_unuse; | 8382 | goto out_unuse; |
@@ -8521,35 +8529,6 @@ reada: | |||
8521 | wc->reada_slot = slot; | 8529 | wc->reada_slot = slot; |
8522 | } | 8530 | } |
8523 | 8531 | ||
8524 | /* | ||
8525 | * These may not be seen by the usual inc/dec ref code so we have to | ||
8526 | * add them here. | ||
8527 | */ | ||
8528 | static int record_one_subtree_extent(struct btrfs_trans_handle *trans, | ||
8529 | struct btrfs_root *root, u64 bytenr, | ||
8530 | u64 num_bytes) | ||
8531 | { | ||
8532 | struct btrfs_qgroup_extent_record *qrecord; | ||
8533 | struct btrfs_delayed_ref_root *delayed_refs; | ||
8534 | |||
8535 | qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS); | ||
8536 | if (!qrecord) | ||
8537 | return -ENOMEM; | ||
8538 | |||
8539 | qrecord->bytenr = bytenr; | ||
8540 | qrecord->num_bytes = num_bytes; | ||
8541 | qrecord->old_roots = NULL; | ||
8542 | |||
8543 | delayed_refs = &trans->transaction->delayed_refs; | ||
8544 | spin_lock(&delayed_refs->lock); | ||
8545 | if (btrfs_qgroup_insert_dirty_extent(trans->fs_info, | ||
8546 | delayed_refs, qrecord)) | ||
8547 | kfree(qrecord); | ||
8548 | spin_unlock(&delayed_refs->lock); | ||
8549 | |||
8550 | return 0; | ||
8551 | } | ||
8552 | |||
8553 | static int account_leaf_items(struct btrfs_trans_handle *trans, | 8532 | static int account_leaf_items(struct btrfs_trans_handle *trans, |
8554 | struct btrfs_root *root, | 8533 | struct btrfs_root *root, |
8555 | struct extent_buffer *eb) | 8534 | struct extent_buffer *eb) |
@@ -8583,7 +8562,8 @@ static int account_leaf_items(struct btrfs_trans_handle *trans, | |||
8583 | 8562 | ||
8584 | num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); | 8563 | num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); |
8585 | 8564 | ||
8586 | ret = record_one_subtree_extent(trans, root, bytenr, num_bytes); | 8565 | ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info, |
8566 | bytenr, num_bytes, GFP_NOFS); | ||
8587 | if (ret) | 8567 | if (ret) |
8588 | return ret; | 8568 | return ret; |
8589 | } | 8569 | } |
@@ -8732,8 +8712,9 @@ walk_down: | |||
8732 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | 8712 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); |
8733 | path->locks[level] = BTRFS_READ_LOCK_BLOCKING; | 8713 | path->locks[level] = BTRFS_READ_LOCK_BLOCKING; |
8734 | 8714 | ||
8735 | ret = record_one_subtree_extent(trans, root, child_bytenr, | 8715 | ret = btrfs_qgroup_insert_dirty_extent(trans, |
8736 | root->nodesize); | 8716 | root->fs_info, child_bytenr, |
8717 | root->nodesize, GFP_NOFS); | ||
8737 | if (ret) | 8718 | if (ret) |
8738 | goto out; | 8719 | goto out; |
8739 | } | 8720 | } |
@@ -9906,6 +9887,7 @@ static int find_first_block_group(struct btrfs_root *root, | |||
9906 | } else { | 9887 | } else { |
9907 | ret = 0; | 9888 | ret = 0; |
9908 | } | 9889 | } |
9890 | free_extent_map(em); | ||
9909 | goto out; | 9891 | goto out; |
9910 | } | 9892 | } |
9911 | path->slots[0]++; | 9893 | path->slots[0]++; |
@@ -9942,6 +9924,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info) | |||
9942 | block_group->iref = 0; | 9924 | block_group->iref = 0; |
9943 | block_group->inode = NULL; | 9925 | block_group->inode = NULL; |
9944 | spin_unlock(&block_group->lock); | 9926 | spin_unlock(&block_group->lock); |
9927 | ASSERT(block_group->io_ctl.inode == NULL); | ||
9945 | iput(inode); | 9928 | iput(inode); |
9946 | last = block_group->key.objectid + block_group->key.offset; | 9929 | last = block_group->key.objectid + block_group->key.offset; |
9947 | btrfs_put_block_group(block_group); | 9930 | btrfs_put_block_group(block_group); |
@@ -9999,6 +9982,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
9999 | free_excluded_extents(info->extent_root, block_group); | 9982 | free_excluded_extents(info->extent_root, block_group); |
10000 | 9983 | ||
10001 | btrfs_remove_free_space_cache(block_group); | 9984 | btrfs_remove_free_space_cache(block_group); |
9985 | ASSERT(list_empty(&block_group->dirty_list)); | ||
9986 | ASSERT(list_empty(&block_group->io_list)); | ||
9987 | ASSERT(list_empty(&block_group->bg_list)); | ||
9988 | ASSERT(atomic_read(&block_group->count) == 1); | ||
10002 | btrfs_put_block_group(block_group); | 9989 | btrfs_put_block_group(block_group); |
10003 | 9990 | ||
10004 | spin_lock(&info->block_group_cache_lock); | 9991 | spin_lock(&info->block_group_cache_lock); |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index bc2729a7612d..28cd88fccc7e 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -20,6 +20,7 @@ | |||
20 | #define EXTENT_DAMAGED (1U << 14) | 20 | #define EXTENT_DAMAGED (1U << 14) |
21 | #define EXTENT_NORESERVE (1U << 15) | 21 | #define EXTENT_NORESERVE (1U << 15) |
22 | #define EXTENT_QGROUP_RESERVED (1U << 16) | 22 | #define EXTENT_QGROUP_RESERVED (1U << 16) |
23 | #define EXTENT_CLEAR_DATA_RESV (1U << 17) | ||
23 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 24 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
24 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | 25 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) |
25 | 26 | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9404121fd5f7..fea31a4a6e36 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -2033,6 +2033,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
2033 | */ | 2033 | */ |
2034 | clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 2034 | clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
2035 | &BTRFS_I(inode)->runtime_flags); | 2035 | &BTRFS_I(inode)->runtime_flags); |
2036 | /* | ||
2037 | * An ordered extent might have started before and completed | ||
2038 | * already with io errors, in which case the inode was not | ||
2039 | * updated and we end up here. So check the inode's mapping | ||
2040 | * flags for any errors that might have happened while doing | ||
2041 | * writeback of file data. | ||
2042 | */ | ||
2043 | ret = btrfs_inode_check_errors(inode); | ||
2036 | inode_unlock(inode); | 2044 | inode_unlock(inode); |
2037 | goto out; | 2045 | goto out; |
2038 | } | 2046 | } |
@@ -2062,7 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
2062 | } | 2070 | } |
2063 | trans->sync = true; | 2071 | trans->sync = true; |
2064 | 2072 | ||
2065 | btrfs_init_log_ctx(&ctx); | 2073 | btrfs_init_log_ctx(&ctx, inode); |
2066 | 2074 | ||
2067 | ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); | 2075 | ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); |
2068 | if (ret < 0) { | 2076 | if (ret < 0) { |
@@ -2667,6 +2675,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2667 | 2675 | ||
2668 | alloc_start = round_down(offset, blocksize); | 2676 | alloc_start = round_down(offset, blocksize); |
2669 | alloc_end = round_up(offset + len, blocksize); | 2677 | alloc_end = round_up(offset + len, blocksize); |
2678 | cur_offset = alloc_start; | ||
2670 | 2679 | ||
2671 | /* Make sure we aren't being give some crap mode */ | 2680 | /* Make sure we aren't being give some crap mode */ |
2672 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 2681 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
@@ -2759,7 +2768,6 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2759 | 2768 | ||
2760 | /* First, check if we exceed the qgroup limit */ | 2769 | /* First, check if we exceed the qgroup limit */ |
2761 | INIT_LIST_HEAD(&reserve_list); | 2770 | INIT_LIST_HEAD(&reserve_list); |
2762 | cur_offset = alloc_start; | ||
2763 | while (1) { | 2771 | while (1) { |
2764 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | 2772 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, |
2765 | alloc_end - cur_offset, 0); | 2773 | alloc_end - cur_offset, 0); |
@@ -2786,6 +2794,14 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2786 | last_byte - cur_offset); | 2794 | last_byte - cur_offset); |
2787 | if (ret < 0) | 2795 | if (ret < 0) |
2788 | break; | 2796 | break; |
2797 | } else { | ||
2798 | /* | ||
2799 | * Do not need to reserve unwritten extent for this | ||
2800 | * range, free reserved data space first, otherwise | ||
2801 | * it'll result in false ENOSPC error. | ||
2802 | */ | ||
2803 | btrfs_free_reserved_data_space(inode, cur_offset, | ||
2804 | last_byte - cur_offset); | ||
2789 | } | 2805 | } |
2790 | free_extent_map(em); | 2806 | free_extent_map(em); |
2791 | cur_offset = last_byte; | 2807 | cur_offset = last_byte; |
@@ -2803,6 +2819,9 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2803 | range->start, | 2819 | range->start, |
2804 | range->len, 1 << inode->i_blkbits, | 2820 | range->len, 1 << inode->i_blkbits, |
2805 | offset + len, &alloc_hint); | 2821 | offset + len, &alloc_hint); |
2822 | else | ||
2823 | btrfs_free_reserved_data_space(inode, range->start, | ||
2824 | range->len); | ||
2806 | list_del(&range->list); | 2825 | list_del(&range->list); |
2807 | kfree(range); | 2826 | kfree(range); |
2808 | } | 2827 | } |
@@ -2837,18 +2856,11 @@ out_unlock: | |||
2837 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | 2856 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
2838 | &cached_state, GFP_KERNEL); | 2857 | &cached_state, GFP_KERNEL); |
2839 | out: | 2858 | out: |
2840 | /* | ||
2841 | * As we waited the extent range, the data_rsv_map must be empty | ||
2842 | * in the range, as written data range will be released from it. | ||
2843 | * And for prealloacted extent, it will also be released when | ||
2844 | * its metadata is written. | ||
2845 | * So this is completely used as cleanup. | ||
2846 | */ | ||
2847 | btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start); | ||
2848 | inode_unlock(inode); | 2859 | inode_unlock(inode); |
2849 | /* Let go of our reservation. */ | 2860 | /* Let go of our reservation. */ |
2850 | btrfs_free_reserved_data_space(inode, alloc_start, | 2861 | if (ret != 0) |
2851 | alloc_end - alloc_start); | 2862 | btrfs_free_reserved_data_space(inode, alloc_start, |
2863 | alloc_end - cur_offset); | ||
2852 | return ret; | 2864 | return ret; |
2853 | } | 2865 | } |
2854 | 2866 | ||
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index aa6fabaee72e..359ee861b5a4 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -495,10 +495,9 @@ again: | |||
495 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, | 495 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, |
496 | prealloc, prealloc, &alloc_hint); | 496 | prealloc, prealloc, &alloc_hint); |
497 | if (ret) { | 497 | if (ret) { |
498 | btrfs_delalloc_release_space(inode, 0, prealloc); | 498 | btrfs_delalloc_release_metadata(inode, prealloc); |
499 | goto out_put; | 499 | goto out_put; |
500 | } | 500 | } |
501 | btrfs_free_reserved_data_space(inode, 0, prealloc); | ||
502 | 501 | ||
503 | ret = btrfs_write_out_ino_cache(root, trans, path, inode); | 502 | ret = btrfs_write_out_ino_cache(root, trans, path, inode); |
504 | out_put: | 503 | out_put: |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2f5975954ccf..e6811c42e41e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -566,6 +566,8 @@ cont: | |||
566 | PAGE_SET_WRITEBACK | | 566 | PAGE_SET_WRITEBACK | |
567 | page_error_op | | 567 | page_error_op | |
568 | PAGE_END_WRITEBACK); | 568 | PAGE_END_WRITEBACK); |
569 | btrfs_free_reserved_data_space_noquota(inode, start, | ||
570 | end - start + 1); | ||
569 | goto free_pages_out; | 571 | goto free_pages_out; |
570 | } | 572 | } |
571 | } | 573 | } |
@@ -742,7 +744,7 @@ retry: | |||
742 | lock_extent(io_tree, async_extent->start, | 744 | lock_extent(io_tree, async_extent->start, |
743 | async_extent->start + async_extent->ram_size - 1); | 745 | async_extent->start + async_extent->ram_size - 1); |
744 | 746 | ||
745 | ret = btrfs_reserve_extent(root, | 747 | ret = btrfs_reserve_extent(root, async_extent->ram_size, |
746 | async_extent->compressed_size, | 748 | async_extent->compressed_size, |
747 | async_extent->compressed_size, | 749 | async_extent->compressed_size, |
748 | 0, alloc_hint, &ins, 1, 1); | 750 | 0, alloc_hint, &ins, 1, 1); |
@@ -969,7 +971,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
969 | EXTENT_DEFRAG, PAGE_UNLOCK | | 971 | EXTENT_DEFRAG, PAGE_UNLOCK | |
970 | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | | 972 | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | |
971 | PAGE_END_WRITEBACK); | 973 | PAGE_END_WRITEBACK); |
972 | 974 | btrfs_free_reserved_data_space_noquota(inode, start, | |
975 | end - start + 1); | ||
973 | *nr_written = *nr_written + | 976 | *nr_written = *nr_written + |
974 | (end - start + PAGE_SIZE) / PAGE_SIZE; | 977 | (end - start + PAGE_SIZE) / PAGE_SIZE; |
975 | *page_started = 1; | 978 | *page_started = 1; |
@@ -989,7 +992,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
989 | unsigned long op; | 992 | unsigned long op; |
990 | 993 | ||
991 | cur_alloc_size = disk_num_bytes; | 994 | cur_alloc_size = disk_num_bytes; |
992 | ret = btrfs_reserve_extent(root, cur_alloc_size, | 995 | ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, |
993 | root->sectorsize, 0, alloc_hint, | 996 | root->sectorsize, 0, alloc_hint, |
994 | &ins, 1, 1); | 997 | &ins, 1, 1); |
995 | if (ret < 0) | 998 | if (ret < 0) |
@@ -1489,8 +1492,10 @@ out_check: | |||
1489 | extent_clear_unlock_delalloc(inode, cur_offset, | 1492 | extent_clear_unlock_delalloc(inode, cur_offset, |
1490 | cur_offset + num_bytes - 1, | 1493 | cur_offset + num_bytes - 1, |
1491 | locked_page, EXTENT_LOCKED | | 1494 | locked_page, EXTENT_LOCKED | |
1492 | EXTENT_DELALLOC, PAGE_UNLOCK | | 1495 | EXTENT_DELALLOC | |
1493 | PAGE_SET_PRIVATE2); | 1496 | EXTENT_CLEAR_DATA_RESV, |
1497 | PAGE_UNLOCK | PAGE_SET_PRIVATE2); | ||
1498 | |||
1494 | if (!nolock && nocow) | 1499 | if (!nolock && nocow) |
1495 | btrfs_end_write_no_snapshoting(root); | 1500 | btrfs_end_write_no_snapshoting(root); |
1496 | cur_offset = extent_end; | 1501 | cur_offset = extent_end; |
@@ -1807,7 +1812,9 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
1807 | return; | 1812 | return; |
1808 | 1813 | ||
1809 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID | 1814 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID |
1810 | && do_list && !(state->state & EXTENT_NORESERVE)) | 1815 | && do_list && !(state->state & EXTENT_NORESERVE) |
1816 | && (*bits & (EXTENT_DO_ACCOUNTING | | ||
1817 | EXTENT_CLEAR_DATA_RESV))) | ||
1811 | btrfs_free_reserved_data_space_noquota(inode, | 1818 | btrfs_free_reserved_data_space_noquota(inode, |
1812 | state->start, len); | 1819 | state->start, len); |
1813 | 1820 | ||
@@ -3435,10 +3442,10 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
3435 | found_key.offset = 0; | 3442 | found_key.offset = 0; |
3436 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); | 3443 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
3437 | ret = PTR_ERR_OR_ZERO(inode); | 3444 | ret = PTR_ERR_OR_ZERO(inode); |
3438 | if (ret && ret != -ESTALE) | 3445 | if (ret && ret != -ENOENT) |
3439 | goto out; | 3446 | goto out; |
3440 | 3447 | ||
3441 | if (ret == -ESTALE && root == root->fs_info->tree_root) { | 3448 | if (ret == -ENOENT && root == root->fs_info->tree_root) { |
3442 | struct btrfs_root *dead_root; | 3449 | struct btrfs_root *dead_root; |
3443 | struct btrfs_fs_info *fs_info = root->fs_info; | 3450 | struct btrfs_fs_info *fs_info = root->fs_info; |
3444 | int is_dead_root = 0; | 3451 | int is_dead_root = 0; |
@@ -3474,7 +3481,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
3474 | * Inode is already gone but the orphan item is still there, | 3481 | * Inode is already gone but the orphan item is still there, |
3475 | * kill the orphan item. | 3482 | * kill the orphan item. |
3476 | */ | 3483 | */ |
3477 | if (ret == -ESTALE) { | 3484 | if (ret == -ENOENT) { |
3478 | trans = btrfs_start_transaction(root, 1); | 3485 | trans = btrfs_start_transaction(root, 1); |
3479 | if (IS_ERR(trans)) { | 3486 | if (IS_ERR(trans)) { |
3480 | ret = PTR_ERR(trans); | 3487 | ret = PTR_ERR(trans); |
@@ -3633,7 +3640,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf, | |||
3633 | /* | 3640 | /* |
3634 | * read an inode from the btree into the in-memory inode | 3641 | * read an inode from the btree into the in-memory inode |
3635 | */ | 3642 | */ |
3636 | static void btrfs_read_locked_inode(struct inode *inode) | 3643 | static int btrfs_read_locked_inode(struct inode *inode) |
3637 | { | 3644 | { |
3638 | struct btrfs_path *path; | 3645 | struct btrfs_path *path; |
3639 | struct extent_buffer *leaf; | 3646 | struct extent_buffer *leaf; |
@@ -3652,14 +3659,19 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
3652 | filled = true; | 3659 | filled = true; |
3653 | 3660 | ||
3654 | path = btrfs_alloc_path(); | 3661 | path = btrfs_alloc_path(); |
3655 | if (!path) | 3662 | if (!path) { |
3663 | ret = -ENOMEM; | ||
3656 | goto make_bad; | 3664 | goto make_bad; |
3665 | } | ||
3657 | 3666 | ||
3658 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); | 3667 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); |
3659 | 3668 | ||
3660 | ret = btrfs_lookup_inode(NULL, root, path, &location, 0); | 3669 | ret = btrfs_lookup_inode(NULL, root, path, &location, 0); |
3661 | if (ret) | 3670 | if (ret) { |
3671 | if (ret > 0) | ||
3672 | ret = -ENOENT; | ||
3662 | goto make_bad; | 3673 | goto make_bad; |
3674 | } | ||
3663 | 3675 | ||
3664 | leaf = path->nodes[0]; | 3676 | leaf = path->nodes[0]; |
3665 | 3677 | ||
@@ -3812,11 +3824,12 @@ cache_acl: | |||
3812 | } | 3824 | } |
3813 | 3825 | ||
3814 | btrfs_update_iflags(inode); | 3826 | btrfs_update_iflags(inode); |
3815 | return; | 3827 | return 0; |
3816 | 3828 | ||
3817 | make_bad: | 3829 | make_bad: |
3818 | btrfs_free_path(path); | 3830 | btrfs_free_path(path); |
3819 | make_bad_inode(inode); | 3831 | make_bad_inode(inode); |
3832 | return ret; | ||
3820 | } | 3833 | } |
3821 | 3834 | ||
3822 | /* | 3835 | /* |
@@ -4204,6 +4217,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
4204 | int err = 0; | 4217 | int err = 0; |
4205 | struct btrfs_root *root = BTRFS_I(dir)->root; | 4218 | struct btrfs_root *root = BTRFS_I(dir)->root; |
4206 | struct btrfs_trans_handle *trans; | 4219 | struct btrfs_trans_handle *trans; |
4220 | u64 last_unlink_trans; | ||
4207 | 4221 | ||
4208 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) | 4222 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
4209 | return -ENOTEMPTY; | 4223 | return -ENOTEMPTY; |
@@ -4226,11 +4240,27 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
4226 | if (err) | 4240 | if (err) |
4227 | goto out; | 4241 | goto out; |
4228 | 4242 | ||
4243 | last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; | ||
4244 | |||
4229 | /* now the directory is empty */ | 4245 | /* now the directory is empty */ |
4230 | err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry), | 4246 | err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry), |
4231 | dentry->d_name.name, dentry->d_name.len); | 4247 | dentry->d_name.name, dentry->d_name.len); |
4232 | if (!err) | 4248 | if (!err) { |
4233 | btrfs_i_size_write(inode, 0); | 4249 | btrfs_i_size_write(inode, 0); |
4250 | /* | ||
4251 | * Propagate the last_unlink_trans value of the deleted dir to | ||
4252 | * its parent directory. This is to prevent an unrecoverable | ||
4253 | * log tree in the case we do something like this: | ||
4254 | * 1) create dir foo | ||
4255 | * 2) create snapshot under dir foo | ||
4256 | * 3) delete the snapshot | ||
4257 | * 4) rmdir foo | ||
4258 | * 5) mkdir foo | ||
4259 | * 6) fsync foo or some file inside foo | ||
4260 | */ | ||
4261 | if (last_unlink_trans >= trans->transid) | ||
4262 | BTRFS_I(dir)->last_unlink_trans = last_unlink_trans; | ||
4263 | } | ||
4234 | out: | 4264 | out: |
4235 | btrfs_end_transaction(trans, root); | 4265 | btrfs_end_transaction(trans, root); |
4236 | btrfs_btree_balance_dirty(root); | 4266 | btrfs_btree_balance_dirty(root); |
@@ -5606,7 +5636,9 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
5606 | return ERR_PTR(-ENOMEM); | 5636 | return ERR_PTR(-ENOMEM); |
5607 | 5637 | ||
5608 | if (inode->i_state & I_NEW) { | 5638 | if (inode->i_state & I_NEW) { |
5609 | btrfs_read_locked_inode(inode); | 5639 | int ret; |
5640 | |||
5641 | ret = btrfs_read_locked_inode(inode); | ||
5610 | if (!is_bad_inode(inode)) { | 5642 | if (!is_bad_inode(inode)) { |
5611 | inode_tree_add(inode); | 5643 | inode_tree_add(inode); |
5612 | unlock_new_inode(inode); | 5644 | unlock_new_inode(inode); |
@@ -5615,7 +5647,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
5615 | } else { | 5647 | } else { |
5616 | unlock_new_inode(inode); | 5648 | unlock_new_inode(inode); |
5617 | iput(inode); | 5649 | iput(inode); |
5618 | inode = ERR_PTR(-ESTALE); | 5650 | ASSERT(ret < 0); |
5651 | inode = ERR_PTR(ret < 0 ? ret : -ESTALE); | ||
5619 | } | 5652 | } |
5620 | } | 5653 | } |
5621 | 5654 | ||
@@ -7225,7 +7258,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
7225 | int ret; | 7258 | int ret; |
7226 | 7259 | ||
7227 | alloc_hint = get_extent_allocation_hint(inode, start, len); | 7260 | alloc_hint = get_extent_allocation_hint(inode, start, len); |
7228 | ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, | 7261 | ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0, |
7229 | alloc_hint, &ins, 1, 1); | 7262 | alloc_hint, &ins, 1, 1); |
7230 | if (ret) | 7263 | if (ret) |
7231 | return ERR_PTR(ret); | 7264 | return ERR_PTR(ret); |
@@ -7725,6 +7758,13 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
7725 | ret = PTR_ERR(em2); | 7758 | ret = PTR_ERR(em2); |
7726 | goto unlock_err; | 7759 | goto unlock_err; |
7727 | } | 7760 | } |
7761 | /* | ||
7762 | * For inode marked NODATACOW or extent marked PREALLOC, | ||
7763 | * use the existing or preallocated extent, so does not | ||
7764 | * need to adjust btrfs_space_info's bytes_may_use. | ||
7765 | */ | ||
7766 | btrfs_free_reserved_data_space_noquota(inode, | ||
7767 | start, len); | ||
7728 | goto unlock; | 7768 | goto unlock; |
7729 | } | 7769 | } |
7730 | } | 7770 | } |
@@ -7759,7 +7799,6 @@ unlock: | |||
7759 | i_size_write(inode, start + len); | 7799 | i_size_write(inode, start + len); |
7760 | 7800 | ||
7761 | adjust_dio_outstanding_extents(inode, dio_data, len); | 7801 | adjust_dio_outstanding_extents(inode, dio_data, len); |
7762 | btrfs_free_reserved_data_space(inode, start, len); | ||
7763 | WARN_ON(dio_data->reserve < len); | 7802 | WARN_ON(dio_data->reserve < len); |
7764 | dio_data->reserve -= len; | 7803 | dio_data->reserve -= len; |
7765 | dio_data->unsubmitted_oe_range_end = start + len; | 7804 | dio_data->unsubmitted_oe_range_end = start + len; |
@@ -10280,6 +10319,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
10280 | u64 last_alloc = (u64)-1; | 10319 | u64 last_alloc = (u64)-1; |
10281 | int ret = 0; | 10320 | int ret = 0; |
10282 | bool own_trans = true; | 10321 | bool own_trans = true; |
10322 | u64 end = start + num_bytes - 1; | ||
10283 | 10323 | ||
10284 | if (trans) | 10324 | if (trans) |
10285 | own_trans = false; | 10325 | own_trans = false; |
@@ -10301,8 +10341,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
10301 | * sized chunks. | 10341 | * sized chunks. |
10302 | */ | 10342 | */ |
10303 | cur_bytes = min(cur_bytes, last_alloc); | 10343 | cur_bytes = min(cur_bytes, last_alloc); |
10304 | ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, | 10344 | ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes, |
10305 | *alloc_hint, &ins, 1, 0); | 10345 | min_size, 0, *alloc_hint, &ins, 1, 0); |
10306 | if (ret) { | 10346 | if (ret) { |
10307 | if (own_trans) | 10347 | if (own_trans) |
10308 | btrfs_end_transaction(trans, root); | 10348 | btrfs_end_transaction(trans, root); |
@@ -10388,6 +10428,9 @@ next: | |||
10388 | if (own_trans) | 10428 | if (own_trans) |
10389 | btrfs_end_transaction(trans, root); | 10429 | btrfs_end_transaction(trans, root); |
10390 | } | 10430 | } |
10431 | if (cur_offset < end) | ||
10432 | btrfs_free_reserved_data_space(inode, cur_offset, | ||
10433 | end - cur_offset + 1); | ||
10391 | return ret; | 10434 | return ret; |
10392 | } | 10435 | } |
10393 | 10436 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 14ed1e9e6bc8..b2a2da5893af 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -5084,7 +5084,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) | |||
5084 | if (!capable(CAP_SYS_ADMIN)) | 5084 | if (!capable(CAP_SYS_ADMIN)) |
5085 | return -EPERM; | 5085 | return -EPERM; |
5086 | 5086 | ||
5087 | return btrfs_qgroup_wait_for_completion(root->fs_info); | 5087 | return btrfs_qgroup_wait_for_completion(root->fs_info, true); |
5088 | } | 5088 | } |
5089 | 5089 | ||
5090 | static long _btrfs_ioctl_set_received_subvol(struct file *file, | 5090 | static long _btrfs_ioctl_set_received_subvol(struct file *file, |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 93ee1c18ef9d..8db2e29fdcf4 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -995,7 +995,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, | |||
995 | goto out; | 995 | goto out; |
996 | fs_info->quota_enabled = 0; | 996 | fs_info->quota_enabled = 0; |
997 | fs_info->pending_quota_state = 0; | 997 | fs_info->pending_quota_state = 0; |
998 | btrfs_qgroup_wait_for_completion(fs_info); | 998 | btrfs_qgroup_wait_for_completion(fs_info, false); |
999 | spin_lock(&fs_info->qgroup_lock); | 999 | spin_lock(&fs_info->qgroup_lock); |
1000 | quota_root = fs_info->quota_root; | 1000 | quota_root = fs_info->quota_root; |
1001 | fs_info->quota_root = NULL; | 1001 | fs_info->quota_root = NULL; |
@@ -1453,10 +1453,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, | |||
1453 | return ret; | 1453 | return ret; |
1454 | } | 1454 | } |
1455 | 1455 | ||
1456 | struct btrfs_qgroup_extent_record * | 1456 | int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info, |
1457 | btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, | 1457 | struct btrfs_delayed_ref_root *delayed_refs, |
1458 | struct btrfs_delayed_ref_root *delayed_refs, | 1458 | struct btrfs_qgroup_extent_record *record) |
1459 | struct btrfs_qgroup_extent_record *record) | ||
1460 | { | 1459 | { |
1461 | struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; | 1460 | struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; |
1462 | struct rb_node *parent_node = NULL; | 1461 | struct rb_node *parent_node = NULL; |
@@ -1475,12 +1474,42 @@ btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, | |||
1475 | else if (bytenr > entry->bytenr) | 1474 | else if (bytenr > entry->bytenr) |
1476 | p = &(*p)->rb_right; | 1475 | p = &(*p)->rb_right; |
1477 | else | 1476 | else |
1478 | return entry; | 1477 | return 1; |
1479 | } | 1478 | } |
1480 | 1479 | ||
1481 | rb_link_node(&record->node, parent_node, p); | 1480 | rb_link_node(&record->node, parent_node, p); |
1482 | rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); | 1481 | rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); |
1483 | return NULL; | 1482 | return 0; |
1483 | } | ||
1484 | |||
1485 | int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, | ||
1486 | struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, | ||
1487 | gfp_t gfp_flag) | ||
1488 | { | ||
1489 | struct btrfs_qgroup_extent_record *record; | ||
1490 | struct btrfs_delayed_ref_root *delayed_refs; | ||
1491 | int ret; | ||
1492 | |||
1493 | if (!fs_info->quota_enabled || bytenr == 0 || num_bytes == 0) | ||
1494 | return 0; | ||
1495 | if (WARN_ON(trans == NULL)) | ||
1496 | return -EINVAL; | ||
1497 | record = kmalloc(sizeof(*record), gfp_flag); | ||
1498 | if (!record) | ||
1499 | return -ENOMEM; | ||
1500 | |||
1501 | delayed_refs = &trans->transaction->delayed_refs; | ||
1502 | record->bytenr = bytenr; | ||
1503 | record->num_bytes = num_bytes; | ||
1504 | record->old_roots = NULL; | ||
1505 | |||
1506 | spin_lock(&delayed_refs->lock); | ||
1507 | ret = btrfs_qgroup_insert_dirty_extent_nolock(fs_info, delayed_refs, | ||
1508 | record); | ||
1509 | spin_unlock(&delayed_refs->lock); | ||
1510 | if (ret > 0) | ||
1511 | kfree(record); | ||
1512 | return 0; | ||
1484 | } | 1513 | } |
1485 | 1514 | ||
1486 | #define UPDATE_NEW 0 | 1515 | #define UPDATE_NEW 0 |
@@ -2303,6 +2332,10 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) | |||
2303 | int err = -ENOMEM; | 2332 | int err = -ENOMEM; |
2304 | int ret = 0; | 2333 | int ret = 0; |
2305 | 2334 | ||
2335 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
2336 | fs_info->qgroup_rescan_running = true; | ||
2337 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
2338 | |||
2306 | path = btrfs_alloc_path(); | 2339 | path = btrfs_alloc_path(); |
2307 | if (!path) | 2340 | if (!path) |
2308 | goto out; | 2341 | goto out; |
@@ -2369,6 +2402,9 @@ out: | |||
2369 | } | 2402 | } |
2370 | 2403 | ||
2371 | done: | 2404 | done: |
2405 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
2406 | fs_info->qgroup_rescan_running = false; | ||
2407 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
2372 | complete_all(&fs_info->qgroup_rescan_completion); | 2408 | complete_all(&fs_info->qgroup_rescan_completion); |
2373 | } | 2409 | } |
2374 | 2410 | ||
@@ -2487,20 +2523,26 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) | |||
2487 | return 0; | 2523 | return 0; |
2488 | } | 2524 | } |
2489 | 2525 | ||
2490 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) | 2526 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, |
2527 | bool interruptible) | ||
2491 | { | 2528 | { |
2492 | int running; | 2529 | int running; |
2493 | int ret = 0; | 2530 | int ret = 0; |
2494 | 2531 | ||
2495 | mutex_lock(&fs_info->qgroup_rescan_lock); | 2532 | mutex_lock(&fs_info->qgroup_rescan_lock); |
2496 | spin_lock(&fs_info->qgroup_lock); | 2533 | spin_lock(&fs_info->qgroup_lock); |
2497 | running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; | 2534 | running = fs_info->qgroup_rescan_running; |
2498 | spin_unlock(&fs_info->qgroup_lock); | 2535 | spin_unlock(&fs_info->qgroup_lock); |
2499 | mutex_unlock(&fs_info->qgroup_rescan_lock); | 2536 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
2500 | 2537 | ||
2501 | if (running) | 2538 | if (!running) |
2539 | return 0; | ||
2540 | |||
2541 | if (interruptible) | ||
2502 | ret = wait_for_completion_interruptible( | 2542 | ret = wait_for_completion_interruptible( |
2503 | &fs_info->qgroup_rescan_completion); | 2543 | &fs_info->qgroup_rescan_completion); |
2544 | else | ||
2545 | wait_for_completion(&fs_info->qgroup_rescan_completion); | ||
2504 | 2546 | ||
2505 | return ret; | 2547 | return ret; |
2506 | } | 2548 | } |
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 710887c06aaf..1bc64c864b62 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h | |||
@@ -46,7 +46,8 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, | |||
46 | struct btrfs_fs_info *fs_info); | 46 | struct btrfs_fs_info *fs_info); |
47 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); | 47 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); |
48 | void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); | 48 | void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); |
49 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); | 49 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, |
50 | bool interruptible); | ||
50 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | 51 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, |
51 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | 52 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); |
52 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | 53 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, |
@@ -63,10 +64,35 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); | |||
63 | struct btrfs_delayed_extent_op; | 64 | struct btrfs_delayed_extent_op; |
64 | int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, | 65 | int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, |
65 | struct btrfs_fs_info *fs_info); | 66 | struct btrfs_fs_info *fs_info); |
66 | struct btrfs_qgroup_extent_record * | 67 | /* |
67 | btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, | 68 | * Insert one dirty extent record into @delayed_refs, informing qgroup to |
68 | struct btrfs_delayed_ref_root *delayed_refs, | 69 | * account that extent at commit trans time. |
69 | struct btrfs_qgroup_extent_record *record); | 70 | * |
71 | * No lock version, caller must acquire delayed ref lock and allocate memory. | ||
72 | * | ||
73 | * Return 0 for success insert | ||
74 | * Return >0 for existing record, caller can free @record safely. | ||
75 | * Error is not possible | ||
76 | */ | ||
77 | int btrfs_qgroup_insert_dirty_extent_nolock( | ||
78 | struct btrfs_fs_info *fs_info, | ||
79 | struct btrfs_delayed_ref_root *delayed_refs, | ||
80 | struct btrfs_qgroup_extent_record *record); | ||
81 | |||
82 | /* | ||
83 | * Insert one dirty extent record into @delayed_refs, informing qgroup to | ||
84 | * account that extent at commit trans time. | ||
85 | * | ||
86 | * Better encapsulated version. | ||
87 | * | ||
88 | * Return 0 if the operation is done. | ||
89 | * Return <0 for error, like memory allocation failure or invalid parameter | ||
90 | * (NULL trans) | ||
91 | */ | ||
92 | int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, | ||
93 | struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, | ||
94 | gfp_t gfp_flag); | ||
95 | |||
70 | int | 96 | int |
71 | btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, | 97 | btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, |
72 | struct btrfs_fs_info *fs_info, | 98 | struct btrfs_fs_info *fs_info, |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b26a5aea41b4..8a2c2a07987b 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include "async-thread.h" | 31 | #include "async-thread.h" |
32 | #include "free-space-cache.h" | 32 | #include "free-space-cache.h" |
33 | #include "inode-map.h" | 33 | #include "inode-map.h" |
34 | #include "qgroup.h" | ||
34 | 35 | ||
35 | /* | 36 | /* |
36 | * backref_node, mapping_node and tree_block start with this | 37 | * backref_node, mapping_node and tree_block start with this |
@@ -3037,15 +3038,19 @@ int prealloc_file_extent_cluster(struct inode *inode, | |||
3037 | u64 num_bytes; | 3038 | u64 num_bytes; |
3038 | int nr = 0; | 3039 | int nr = 0; |
3039 | int ret = 0; | 3040 | int ret = 0; |
3041 | u64 prealloc_start = cluster->start - offset; | ||
3042 | u64 prealloc_end = cluster->end - offset; | ||
3043 | u64 cur_offset; | ||
3040 | 3044 | ||
3041 | BUG_ON(cluster->start != cluster->boundary[0]); | 3045 | BUG_ON(cluster->start != cluster->boundary[0]); |
3042 | inode_lock(inode); | 3046 | inode_lock(inode); |
3043 | 3047 | ||
3044 | ret = btrfs_check_data_free_space(inode, cluster->start, | 3048 | ret = btrfs_check_data_free_space(inode, prealloc_start, |
3045 | cluster->end + 1 - cluster->start); | 3049 | prealloc_end + 1 - prealloc_start); |
3046 | if (ret) | 3050 | if (ret) |
3047 | goto out; | 3051 | goto out; |
3048 | 3052 | ||
3053 | cur_offset = prealloc_start; | ||
3049 | while (nr < cluster->nr) { | 3054 | while (nr < cluster->nr) { |
3050 | start = cluster->boundary[nr] - offset; | 3055 | start = cluster->boundary[nr] - offset; |
3051 | if (nr + 1 < cluster->nr) | 3056 | if (nr + 1 < cluster->nr) |
@@ -3055,16 +3060,21 @@ int prealloc_file_extent_cluster(struct inode *inode, | |||
3055 | 3060 | ||
3056 | lock_extent(&BTRFS_I(inode)->io_tree, start, end); | 3061 | lock_extent(&BTRFS_I(inode)->io_tree, start, end); |
3057 | num_bytes = end + 1 - start; | 3062 | num_bytes = end + 1 - start; |
3063 | if (cur_offset < start) | ||
3064 | btrfs_free_reserved_data_space(inode, cur_offset, | ||
3065 | start - cur_offset); | ||
3058 | ret = btrfs_prealloc_file_range(inode, 0, start, | 3066 | ret = btrfs_prealloc_file_range(inode, 0, start, |
3059 | num_bytes, num_bytes, | 3067 | num_bytes, num_bytes, |
3060 | end + 1, &alloc_hint); | 3068 | end + 1, &alloc_hint); |
3069 | cur_offset = end + 1; | ||
3061 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end); | 3070 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end); |
3062 | if (ret) | 3071 | if (ret) |
3063 | break; | 3072 | break; |
3064 | nr++; | 3073 | nr++; |
3065 | } | 3074 | } |
3066 | btrfs_free_reserved_data_space(inode, cluster->start, | 3075 | if (cur_offset < prealloc_end) |
3067 | cluster->end + 1 - cluster->start); | 3076 | btrfs_free_reserved_data_space(inode, cur_offset, |
3077 | prealloc_end + 1 - cur_offset); | ||
3068 | out: | 3078 | out: |
3069 | inode_unlock(inode); | 3079 | inode_unlock(inode); |
3070 | return ret; | 3080 | return ret; |
@@ -3916,6 +3926,90 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
3916 | return 0; | 3926 | return 0; |
3917 | } | 3927 | } |
3918 | 3928 | ||
3929 | /* | ||
3930 | * Qgroup fixer for data chunk relocation. | ||
3931 | * The data relocation is done in the following steps | ||
3932 | * 1) Copy data extents into data reloc tree | ||
3933 | * 2) Create tree reloc tree(special snapshot) for related subvolumes | ||
3934 | * 3) Modify file extents in tree reloc tree | ||
3935 | * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks | ||
3936 | * | ||
3937 | * The problem is, data and tree reloc tree are not accounted to qgroup, | ||
3938 | * and 4) will only info qgroup to track tree blocks change, not file extents | ||
3939 | * in the tree blocks. | ||
3940 | * | ||
3941 | * The good news is, related data extents are all in data reloc tree, so we | ||
3942 | * only need to info qgroup to track all file extents in data reloc tree | ||
3943 | * before commit trans. | ||
3944 | */ | ||
3945 | static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans, | ||
3946 | struct reloc_control *rc) | ||
3947 | { | ||
3948 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | ||
3949 | struct inode *inode = rc->data_inode; | ||
3950 | struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root; | ||
3951 | struct btrfs_path *path; | ||
3952 | struct btrfs_key key; | ||
3953 | int ret = 0; | ||
3954 | |||
3955 | if (!fs_info->quota_enabled) | ||
3956 | return 0; | ||
3957 | |||
3958 | /* | ||
3959 | * Only for stage where we update data pointers the qgroup fix is | ||
3960 | * valid. | ||
3961 | * For MOVING_DATA stage, we will miss the timing of swapping tree | ||
3962 | * blocks, and won't fix it. | ||
3963 | */ | ||
3964 | if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found)) | ||
3965 | return 0; | ||
3966 | |||
3967 | path = btrfs_alloc_path(); | ||
3968 | if (!path) | ||
3969 | return -ENOMEM; | ||
3970 | key.objectid = btrfs_ino(inode); | ||
3971 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
3972 | key.offset = 0; | ||
3973 | |||
3974 | ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0); | ||
3975 | if (ret < 0) | ||
3976 | goto out; | ||
3977 | |||
3978 | lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1); | ||
3979 | while (1) { | ||
3980 | struct btrfs_file_extent_item *fi; | ||
3981 | |||
3982 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
3983 | if (key.objectid > btrfs_ino(inode)) | ||
3984 | break; | ||
3985 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
3986 | goto next; | ||
3987 | fi = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
3988 | struct btrfs_file_extent_item); | ||
3989 | if (btrfs_file_extent_type(path->nodes[0], fi) != | ||
3990 | BTRFS_FILE_EXTENT_REG) | ||
3991 | goto next; | ||
3992 | ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info, | ||
3993 | btrfs_file_extent_disk_bytenr(path->nodes[0], fi), | ||
3994 | btrfs_file_extent_disk_num_bytes(path->nodes[0], fi), | ||
3995 | GFP_NOFS); | ||
3996 | if (ret < 0) | ||
3997 | break; | ||
3998 | next: | ||
3999 | ret = btrfs_next_item(data_reloc_root, path); | ||
4000 | if (ret < 0) | ||
4001 | break; | ||
4002 | if (ret > 0) { | ||
4003 | ret = 0; | ||
4004 | break; | ||
4005 | } | ||
4006 | } | ||
4007 | unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1); | ||
4008 | out: | ||
4009 | btrfs_free_path(path); | ||
4010 | return ret; | ||
4011 | } | ||
4012 | |||
3919 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | 4013 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) |
3920 | { | 4014 | { |
3921 | struct rb_root blocks = RB_ROOT; | 4015 | struct rb_root blocks = RB_ROOT; |
@@ -4102,10 +4196,16 @@ restart: | |||
4102 | 4196 | ||
4103 | /* get rid of pinned extents */ | 4197 | /* get rid of pinned extents */ |
4104 | trans = btrfs_join_transaction(rc->extent_root); | 4198 | trans = btrfs_join_transaction(rc->extent_root); |
4105 | if (IS_ERR(trans)) | 4199 | if (IS_ERR(trans)) { |
4106 | err = PTR_ERR(trans); | 4200 | err = PTR_ERR(trans); |
4107 | else | 4201 | goto out_free; |
4108 | btrfs_commit_transaction(trans, rc->extent_root); | 4202 | } |
4203 | err = qgroup_fix_relocated_data_extents(trans, rc); | ||
4204 | if (err < 0) { | ||
4205 | btrfs_abort_transaction(trans, err); | ||
4206 | goto out_free; | ||
4207 | } | ||
4208 | btrfs_commit_transaction(trans, rc->extent_root); | ||
4109 | out_free: | 4209 | out_free: |
4110 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); | 4210 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); |
4111 | btrfs_free_path(path); | 4211 | btrfs_free_path(path); |
@@ -4468,10 +4568,16 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4468 | unset_reloc_control(rc); | 4568 | unset_reloc_control(rc); |
4469 | 4569 | ||
4470 | trans = btrfs_join_transaction(rc->extent_root); | 4570 | trans = btrfs_join_transaction(rc->extent_root); |
4471 | if (IS_ERR(trans)) | 4571 | if (IS_ERR(trans)) { |
4472 | err = PTR_ERR(trans); | 4572 | err = PTR_ERR(trans); |
4473 | else | 4573 | goto out_free; |
4474 | err = btrfs_commit_transaction(trans, rc->extent_root); | 4574 | } |
4575 | err = qgroup_fix_relocated_data_extents(trans, rc); | ||
4576 | if (err < 0) { | ||
4577 | btrfs_abort_transaction(trans, err); | ||
4578 | goto out_free; | ||
4579 | } | ||
4580 | err = btrfs_commit_transaction(trans, rc->extent_root); | ||
4475 | out_free: | 4581 | out_free: |
4476 | kfree(rc); | 4582 | kfree(rc); |
4477 | out: | 4583 | out: |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 7fd7e1830cfe..091296062456 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -272,6 +272,23 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
272 | root_key.objectid = key.offset; | 272 | root_key.objectid = key.offset; |
273 | key.offset++; | 273 | key.offset++; |
274 | 274 | ||
275 | /* | ||
276 | * The root might have been inserted already, as before we look | ||
277 | * for orphan roots, log replay might have happened, which | ||
278 | * triggers a transaction commit and qgroup accounting, which | ||
279 | * in turn reads and inserts fs roots while doing backref | ||
280 | * walking. | ||
281 | */ | ||
282 | root = btrfs_lookup_fs_root(tree_root->fs_info, | ||
283 | root_key.objectid); | ||
284 | if (root) { | ||
285 | WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, | ||
286 | &root->state)); | ||
287 | if (btrfs_root_refs(&root->root_item) == 0) | ||
288 | btrfs_add_dead_root(root); | ||
289 | continue; | ||
290 | } | ||
291 | |||
275 | root = btrfs_read_fs_root(tree_root, &root_key); | 292 | root = btrfs_read_fs_root(tree_root, &root_key); |
276 | err = PTR_ERR_OR_ZERO(root); | 293 | err = PTR_ERR_OR_ZERO(root); |
277 | if (err && err != -ENOENT) { | 294 | if (err && err != -ENOENT) { |
@@ -310,16 +327,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
310 | set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); | 327 | set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); |
311 | 328 | ||
312 | err = btrfs_insert_fs_root(root->fs_info, root); | 329 | err = btrfs_insert_fs_root(root->fs_info, root); |
313 | /* | ||
314 | * The root might have been inserted already, as before we look | ||
315 | * for orphan roots, log replay might have happened, which | ||
316 | * triggers a transaction commit and qgroup accounting, which | ||
317 | * in turn reads and inserts fs roots while doing backref | ||
318 | * walking. | ||
319 | */ | ||
320 | if (err == -EEXIST) | ||
321 | err = 0; | ||
322 | if (err) { | 330 | if (err) { |
331 | BUG_ON(err == -EEXIST); | ||
323 | btrfs_free_fs_root(root); | 332 | btrfs_free_fs_root(root); |
324 | break; | 333 | break; |
325 | } | 334 | } |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index b71dd298385c..efe129fe2678 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -231,7 +231,6 @@ struct pending_dir_move { | |||
231 | u64 parent_ino; | 231 | u64 parent_ino; |
232 | u64 ino; | 232 | u64 ino; |
233 | u64 gen; | 233 | u64 gen; |
234 | bool is_orphan; | ||
235 | struct list_head update_refs; | 234 | struct list_head update_refs; |
236 | }; | 235 | }; |
237 | 236 | ||
@@ -274,6 +273,39 @@ struct name_cache_entry { | |||
274 | char name[]; | 273 | char name[]; |
275 | }; | 274 | }; |
276 | 275 | ||
276 | static void inconsistent_snapshot_error(struct send_ctx *sctx, | ||
277 | enum btrfs_compare_tree_result result, | ||
278 | const char *what) | ||
279 | { | ||
280 | const char *result_string; | ||
281 | |||
282 | switch (result) { | ||
283 | case BTRFS_COMPARE_TREE_NEW: | ||
284 | result_string = "new"; | ||
285 | break; | ||
286 | case BTRFS_COMPARE_TREE_DELETED: | ||
287 | result_string = "deleted"; | ||
288 | break; | ||
289 | case BTRFS_COMPARE_TREE_CHANGED: | ||
290 | result_string = "updated"; | ||
291 | break; | ||
292 | case BTRFS_COMPARE_TREE_SAME: | ||
293 | ASSERT(0); | ||
294 | result_string = "unchanged"; | ||
295 | break; | ||
296 | default: | ||
297 | ASSERT(0); | ||
298 | result_string = "unexpected"; | ||
299 | } | ||
300 | |||
301 | btrfs_err(sctx->send_root->fs_info, | ||
302 | "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu", | ||
303 | result_string, what, sctx->cmp_key->objectid, | ||
304 | sctx->send_root->root_key.objectid, | ||
305 | (sctx->parent_root ? | ||
306 | sctx->parent_root->root_key.objectid : 0)); | ||
307 | } | ||
308 | |||
277 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); | 309 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); |
278 | 310 | ||
279 | static struct waiting_dir_move * | 311 | static struct waiting_dir_move * |
@@ -1861,7 +1893,8 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, | |||
1861 | * was already unlinked/moved, so we can safely assume that we will not | 1893 | * was already unlinked/moved, so we can safely assume that we will not |
1862 | * overwrite anything at this point in time. | 1894 | * overwrite anything at this point in time. |
1863 | */ | 1895 | */ |
1864 | if (other_inode > sctx->send_progress) { | 1896 | if (other_inode > sctx->send_progress || |
1897 | is_waiting_for_move(sctx, other_inode)) { | ||
1865 | ret = get_inode_info(sctx->parent_root, other_inode, NULL, | 1898 | ret = get_inode_info(sctx->parent_root, other_inode, NULL, |
1866 | who_gen, NULL, NULL, NULL, NULL); | 1899 | who_gen, NULL, NULL, NULL, NULL); |
1867 | if (ret < 0) | 1900 | if (ret < 0) |
@@ -2502,6 +2535,8 @@ verbose_printk("btrfs: send_utimes %llu\n", ino); | |||
2502 | key.type = BTRFS_INODE_ITEM_KEY; | 2535 | key.type = BTRFS_INODE_ITEM_KEY; |
2503 | key.offset = 0; | 2536 | key.offset = 0; |
2504 | ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); | 2537 | ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); |
2538 | if (ret > 0) | ||
2539 | ret = -ENOENT; | ||
2505 | if (ret < 0) | 2540 | if (ret < 0) |
2506 | goto out; | 2541 | goto out; |
2507 | 2542 | ||
@@ -2947,6 +2982,10 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, | |||
2947 | } | 2982 | } |
2948 | 2983 | ||
2949 | if (loc.objectid > send_progress) { | 2984 | if (loc.objectid > send_progress) { |
2985 | struct orphan_dir_info *odi; | ||
2986 | |||
2987 | odi = get_orphan_dir_info(sctx, dir); | ||
2988 | free_orphan_dir_info(sctx, odi); | ||
2950 | ret = 0; | 2989 | ret = 0; |
2951 | goto out; | 2990 | goto out; |
2952 | } | 2991 | } |
@@ -3047,7 +3086,6 @@ static int add_pending_dir_move(struct send_ctx *sctx, | |||
3047 | pm->parent_ino = parent_ino; | 3086 | pm->parent_ino = parent_ino; |
3048 | pm->ino = ino; | 3087 | pm->ino = ino; |
3049 | pm->gen = ino_gen; | 3088 | pm->gen = ino_gen; |
3050 | pm->is_orphan = is_orphan; | ||
3051 | INIT_LIST_HEAD(&pm->list); | 3089 | INIT_LIST_HEAD(&pm->list); |
3052 | INIT_LIST_HEAD(&pm->update_refs); | 3090 | INIT_LIST_HEAD(&pm->update_refs); |
3053 | RB_CLEAR_NODE(&pm->node); | 3091 | RB_CLEAR_NODE(&pm->node); |
@@ -3113,6 +3151,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, | |||
3113 | return NULL; | 3151 | return NULL; |
3114 | } | 3152 | } |
3115 | 3153 | ||
3154 | static int path_loop(struct send_ctx *sctx, struct fs_path *name, | ||
3155 | u64 ino, u64 gen, u64 *ancestor_ino) | ||
3156 | { | ||
3157 | int ret = 0; | ||
3158 | u64 parent_inode = 0; | ||
3159 | u64 parent_gen = 0; | ||
3160 | u64 start_ino = ino; | ||
3161 | |||
3162 | *ancestor_ino = 0; | ||
3163 | while (ino != BTRFS_FIRST_FREE_OBJECTID) { | ||
3164 | fs_path_reset(name); | ||
3165 | |||
3166 | if (is_waiting_for_rm(sctx, ino)) | ||
3167 | break; | ||
3168 | if (is_waiting_for_move(sctx, ino)) { | ||
3169 | if (*ancestor_ino == 0) | ||
3170 | *ancestor_ino = ino; | ||
3171 | ret = get_first_ref(sctx->parent_root, ino, | ||
3172 | &parent_inode, &parent_gen, name); | ||
3173 | } else { | ||
3174 | ret = __get_cur_name_and_parent(sctx, ino, gen, | ||
3175 | &parent_inode, | ||
3176 | &parent_gen, name); | ||
3177 | if (ret > 0) { | ||
3178 | ret = 0; | ||
3179 | break; | ||
3180 | } | ||
3181 | } | ||
3182 | if (ret < 0) | ||
3183 | break; | ||
3184 | if (parent_inode == start_ino) { | ||
3185 | ret = 1; | ||
3186 | if (*ancestor_ino == 0) | ||
3187 | *ancestor_ino = ino; | ||
3188 | break; | ||
3189 | } | ||
3190 | ino = parent_inode; | ||
3191 | gen = parent_gen; | ||
3192 | } | ||
3193 | return ret; | ||
3194 | } | ||
3195 | |||
3116 | static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | 3196 | static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) |
3117 | { | 3197 | { |
3118 | struct fs_path *from_path = NULL; | 3198 | struct fs_path *from_path = NULL; |
@@ -3123,6 +3203,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
3123 | u64 parent_ino, parent_gen; | 3203 | u64 parent_ino, parent_gen; |
3124 | struct waiting_dir_move *dm = NULL; | 3204 | struct waiting_dir_move *dm = NULL; |
3125 | u64 rmdir_ino = 0; | 3205 | u64 rmdir_ino = 0; |
3206 | u64 ancestor; | ||
3207 | bool is_orphan; | ||
3126 | int ret; | 3208 | int ret; |
3127 | 3209 | ||
3128 | name = fs_path_alloc(); | 3210 | name = fs_path_alloc(); |
@@ -3135,9 +3217,10 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
3135 | dm = get_waiting_dir_move(sctx, pm->ino); | 3217 | dm = get_waiting_dir_move(sctx, pm->ino); |
3136 | ASSERT(dm); | 3218 | ASSERT(dm); |
3137 | rmdir_ino = dm->rmdir_ino; | 3219 | rmdir_ino = dm->rmdir_ino; |
3220 | is_orphan = dm->orphanized; | ||
3138 | free_waiting_dir_move(sctx, dm); | 3221 | free_waiting_dir_move(sctx, dm); |
3139 | 3222 | ||
3140 | if (pm->is_orphan) { | 3223 | if (is_orphan) { |
3141 | ret = gen_unique_name(sctx, pm->ino, | 3224 | ret = gen_unique_name(sctx, pm->ino, |
3142 | pm->gen, from_path); | 3225 | pm->gen, from_path); |
3143 | } else { | 3226 | } else { |
@@ -3155,6 +3238,24 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
3155 | goto out; | 3238 | goto out; |
3156 | 3239 | ||
3157 | sctx->send_progress = sctx->cur_ino + 1; | 3240 | sctx->send_progress = sctx->cur_ino + 1; |
3241 | ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor); | ||
3242 | if (ret < 0) | ||
3243 | goto out; | ||
3244 | if (ret) { | ||
3245 | LIST_HEAD(deleted_refs); | ||
3246 | ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); | ||
3247 | ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, | ||
3248 | &pm->update_refs, &deleted_refs, | ||
3249 | is_orphan); | ||
3250 | if (ret < 0) | ||
3251 | goto out; | ||
3252 | if (rmdir_ino) { | ||
3253 | dm = get_waiting_dir_move(sctx, pm->ino); | ||
3254 | ASSERT(dm); | ||
3255 | dm->rmdir_ino = rmdir_ino; | ||
3256 | } | ||
3257 | goto out; | ||
3258 | } | ||
3158 | fs_path_reset(name); | 3259 | fs_path_reset(name); |
3159 | to_path = name; | 3260 | to_path = name; |
3160 | name = NULL; | 3261 | name = NULL; |
@@ -3174,7 +3275,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
3174 | /* already deleted */ | 3275 | /* already deleted */ |
3175 | goto finish; | 3276 | goto finish; |
3176 | } | 3277 | } |
3177 | ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); | 3278 | ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino); |
3178 | if (ret < 0) | 3279 | if (ret < 0) |
3179 | goto out; | 3280 | goto out; |
3180 | if (!ret) | 3281 | if (!ret) |
@@ -3204,8 +3305,18 @@ finish: | |||
3204 | * and old parent(s). | 3305 | * and old parent(s). |
3205 | */ | 3306 | */ |
3206 | list_for_each_entry(cur, &pm->update_refs, list) { | 3307 | list_for_each_entry(cur, &pm->update_refs, list) { |
3207 | if (cur->dir == rmdir_ino) | 3308 | /* |
3309 | * The parent inode might have been deleted in the send snapshot | ||
3310 | */ | ||
3311 | ret = get_inode_info(sctx->send_root, cur->dir, NULL, | ||
3312 | NULL, NULL, NULL, NULL, NULL); | ||
3313 | if (ret == -ENOENT) { | ||
3314 | ret = 0; | ||
3208 | continue; | 3315 | continue; |
3316 | } | ||
3317 | if (ret < 0) | ||
3318 | goto out; | ||
3319 | |||
3209 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); | 3320 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); |
3210 | if (ret < 0) | 3321 | if (ret < 0) |
3211 | goto out; | 3322 | goto out; |
@@ -3325,6 +3436,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx, | |||
3325 | u64 left_gen; | 3436 | u64 left_gen; |
3326 | u64 right_gen; | 3437 | u64 right_gen; |
3327 | int ret = 0; | 3438 | int ret = 0; |
3439 | struct waiting_dir_move *wdm; | ||
3328 | 3440 | ||
3329 | if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) | 3441 | if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) |
3330 | return 0; | 3442 | return 0; |
@@ -3383,7 +3495,8 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx, | |||
3383 | goto out; | 3495 | goto out; |
3384 | } | 3496 | } |
3385 | 3497 | ||
3386 | if (is_waiting_for_move(sctx, di_key.objectid)) { | 3498 | wdm = get_waiting_dir_move(sctx, di_key.objectid); |
3499 | if (wdm && !wdm->orphanized) { | ||
3387 | ret = add_pending_dir_move(sctx, | 3500 | ret = add_pending_dir_move(sctx, |
3388 | sctx->cur_ino, | 3501 | sctx->cur_ino, |
3389 | sctx->cur_inode_gen, | 3502 | sctx->cur_inode_gen, |
@@ -3470,7 +3583,8 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
3470 | ret = is_ancestor(sctx->parent_root, | 3583 | ret = is_ancestor(sctx->parent_root, |
3471 | sctx->cur_ino, sctx->cur_inode_gen, | 3584 | sctx->cur_ino, sctx->cur_inode_gen, |
3472 | ino, path_before); | 3585 | ino, path_before); |
3473 | break; | 3586 | if (ret) |
3587 | break; | ||
3474 | } | 3588 | } |
3475 | 3589 | ||
3476 | fs_path_reset(path_before); | 3590 | fs_path_reset(path_before); |
@@ -3643,11 +3757,26 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3643 | goto out; | 3757 | goto out; |
3644 | if (ret) { | 3758 | if (ret) { |
3645 | struct name_cache_entry *nce; | 3759 | struct name_cache_entry *nce; |
3760 | struct waiting_dir_move *wdm; | ||
3646 | 3761 | ||
3647 | ret = orphanize_inode(sctx, ow_inode, ow_gen, | 3762 | ret = orphanize_inode(sctx, ow_inode, ow_gen, |
3648 | cur->full_path); | 3763 | cur->full_path); |
3649 | if (ret < 0) | 3764 | if (ret < 0) |
3650 | goto out; | 3765 | goto out; |
3766 | |||
3767 | /* | ||
3768 | * If ow_inode has its rename operation delayed | ||
3769 | * make sure that its orphanized name is used in | ||
3770 | * the source path when performing its rename | ||
3771 | * operation. | ||
3772 | */ | ||
3773 | if (is_waiting_for_move(sctx, ow_inode)) { | ||
3774 | wdm = get_waiting_dir_move(sctx, | ||
3775 | ow_inode); | ||
3776 | ASSERT(wdm); | ||
3777 | wdm->orphanized = true; | ||
3778 | } | ||
3779 | |||
3651 | /* | 3780 | /* |
3652 | * Make sure we clear our orphanized inode's | 3781 | * Make sure we clear our orphanized inode's |
3653 | * name from the name cache. This is because the | 3782 | * name from the name cache. This is because the |
@@ -3663,6 +3792,19 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3663 | name_cache_delete(sctx, nce); | 3792 | name_cache_delete(sctx, nce); |
3664 | kfree(nce); | 3793 | kfree(nce); |
3665 | } | 3794 | } |
3795 | |||
3796 | /* | ||
3797 | * ow_inode might currently be an ancestor of | ||
3798 | * cur_ino, therefore compute valid_path (the | ||
3799 | * current path of cur_ino) again because it | ||
3800 | * might contain the pre-orphanization name of | ||
3801 | * ow_inode, which is no longer valid. | ||
3802 | */ | ||
3803 | fs_path_reset(valid_path); | ||
3804 | ret = get_cur_path(sctx, sctx->cur_ino, | ||
3805 | sctx->cur_inode_gen, valid_path); | ||
3806 | if (ret < 0) | ||
3807 | goto out; | ||
3666 | } else { | 3808 | } else { |
3667 | ret = send_unlink(sctx, cur->full_path); | 3809 | ret = send_unlink(sctx, cur->full_path); |
3668 | if (ret < 0) | 3810 | if (ret < 0) |
@@ -5602,7 +5744,10 @@ static int changed_ref(struct send_ctx *sctx, | |||
5602 | { | 5744 | { |
5603 | int ret = 0; | 5745 | int ret = 0; |
5604 | 5746 | ||
5605 | BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); | 5747 | if (sctx->cur_ino != sctx->cmp_key->objectid) { |
5748 | inconsistent_snapshot_error(sctx, result, "reference"); | ||
5749 | return -EIO; | ||
5750 | } | ||
5606 | 5751 | ||
5607 | if (!sctx->cur_inode_new_gen && | 5752 | if (!sctx->cur_inode_new_gen && |
5608 | sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { | 5753 | sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { |
@@ -5627,7 +5772,10 @@ static int changed_xattr(struct send_ctx *sctx, | |||
5627 | { | 5772 | { |
5628 | int ret = 0; | 5773 | int ret = 0; |
5629 | 5774 | ||
5630 | BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); | 5775 | if (sctx->cur_ino != sctx->cmp_key->objectid) { |
5776 | inconsistent_snapshot_error(sctx, result, "xattr"); | ||
5777 | return -EIO; | ||
5778 | } | ||
5631 | 5779 | ||
5632 | if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { | 5780 | if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { |
5633 | if (result == BTRFS_COMPARE_TREE_NEW) | 5781 | if (result == BTRFS_COMPARE_TREE_NEW) |
@@ -5651,7 +5799,10 @@ static int changed_extent(struct send_ctx *sctx, | |||
5651 | { | 5799 | { |
5652 | int ret = 0; | 5800 | int ret = 0; |
5653 | 5801 | ||
5654 | BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); | 5802 | if (sctx->cur_ino != sctx->cmp_key->objectid) { |
5803 | inconsistent_snapshot_error(sctx, result, "extent"); | ||
5804 | return -EIO; | ||
5805 | } | ||
5655 | 5806 | ||
5656 | if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { | 5807 | if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { |
5657 | if (result != BTRFS_COMPARE_TREE_DELETED) | 5808 | if (result != BTRFS_COMPARE_TREE_DELETED) |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 864ce334f696..4071fe2bd098 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -2241,6 +2241,13 @@ static int btrfs_freeze(struct super_block *sb) | |||
2241 | struct btrfs_trans_handle *trans; | 2241 | struct btrfs_trans_handle *trans; |
2242 | struct btrfs_root *root = btrfs_sb(sb)->tree_root; | 2242 | struct btrfs_root *root = btrfs_sb(sb)->tree_root; |
2243 | 2243 | ||
2244 | root->fs_info->fs_frozen = 1; | ||
2245 | /* | ||
2246 | * We don't need a barrier here, we'll wait for any transaction that | ||
2247 | * could be in progress on other threads (and do delayed iputs that | ||
2248 | * we want to avoid on a frozen filesystem), or do the commit | ||
2249 | * ourselves. | ||
2250 | */ | ||
2244 | trans = btrfs_attach_transaction_barrier(root); | 2251 | trans = btrfs_attach_transaction_barrier(root); |
2245 | if (IS_ERR(trans)) { | 2252 | if (IS_ERR(trans)) { |
2246 | /* no transaction, don't bother */ | 2253 | /* no transaction, don't bother */ |
@@ -2251,6 +2258,14 @@ static int btrfs_freeze(struct super_block *sb) | |||
2251 | return btrfs_commit_transaction(trans, root); | 2258 | return btrfs_commit_transaction(trans, root); |
2252 | } | 2259 | } |
2253 | 2260 | ||
2261 | static int btrfs_unfreeze(struct super_block *sb) | ||
2262 | { | ||
2263 | struct btrfs_root *root = btrfs_sb(sb)->tree_root; | ||
2264 | |||
2265 | root->fs_info->fs_frozen = 0; | ||
2266 | return 0; | ||
2267 | } | ||
2268 | |||
2254 | static int btrfs_show_devname(struct seq_file *m, struct dentry *root) | 2269 | static int btrfs_show_devname(struct seq_file *m, struct dentry *root) |
2255 | { | 2270 | { |
2256 | struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); | 2271 | struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); |
@@ -2299,6 +2314,7 @@ static const struct super_operations btrfs_super_ops = { | |||
2299 | .statfs = btrfs_statfs, | 2314 | .statfs = btrfs_statfs, |
2300 | .remount_fs = btrfs_remount, | 2315 | .remount_fs = btrfs_remount, |
2301 | .freeze_fs = btrfs_freeze, | 2316 | .freeze_fs = btrfs_freeze, |
2317 | .unfreeze_fs = btrfs_unfreeze, | ||
2302 | }; | 2318 | }; |
2303 | 2319 | ||
2304 | static const struct file_operations btrfs_ctl_fops = { | 2320 | static const struct file_operations btrfs_ctl_fops = { |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9cca0a721961..95d41919d034 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -2278,8 +2278,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
2278 | 2278 | ||
2279 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 2279 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
2280 | 2280 | ||
2281 | /* | ||
2282 | * If fs has been frozen, we can not handle delayed iputs, otherwise | ||
2283 | * it'll result in deadlock about SB_FREEZE_FS. | ||
2284 | */ | ||
2281 | if (current != root->fs_info->transaction_kthread && | 2285 | if (current != root->fs_info->transaction_kthread && |
2282 | current != root->fs_info->cleaner_kthread) | 2286 | current != root->fs_info->cleaner_kthread && |
2287 | !root->fs_info->fs_frozen) | ||
2283 | btrfs_run_delayed_iputs(root); | 2288 | btrfs_run_delayed_iputs(root); |
2284 | 2289 | ||
2285 | return ret; | 2290 | return ret; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d31a0c4f56be..e935035ac034 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include "backref.h" | 27 | #include "backref.h" |
28 | #include "hash.h" | 28 | #include "hash.h" |
29 | #include "compression.h" | 29 | #include "compression.h" |
30 | #include "qgroup.h" | ||
30 | 31 | ||
31 | /* magic values for the inode_only field in btrfs_log_inode: | 32 | /* magic values for the inode_only field in btrfs_log_inode: |
32 | * | 33 | * |
@@ -680,6 +681,21 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
680 | ins.type = BTRFS_EXTENT_ITEM_KEY; | 681 | ins.type = BTRFS_EXTENT_ITEM_KEY; |
681 | offset = key->offset - btrfs_file_extent_offset(eb, item); | 682 | offset = key->offset - btrfs_file_extent_offset(eb, item); |
682 | 683 | ||
684 | /* | ||
685 | * Manually record dirty extent, as here we did a shallow | ||
686 | * file extent item copy and skip normal backref update, | ||
687 | * but modifying extent tree all by ourselves. | ||
688 | * So need to manually record dirty extent for qgroup, | ||
689 | * as the owner of the file extent changed from log tree | ||
690 | * (doesn't affect qgroup) to fs/file tree(affects qgroup) | ||
691 | */ | ||
692 | ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info, | ||
693 | btrfs_file_extent_disk_bytenr(eb, item), | ||
694 | btrfs_file_extent_disk_num_bytes(eb, item), | ||
695 | GFP_NOFS); | ||
696 | if (ret < 0) | ||
697 | goto out; | ||
698 | |||
683 | if (ins.objectid > 0) { | 699 | if (ins.objectid > 0) { |
684 | u64 csum_start; | 700 | u64 csum_start; |
685 | u64 csum_end; | 701 | u64 csum_end; |
@@ -2807,7 +2823,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2807 | */ | 2823 | */ |
2808 | mutex_unlock(&root->log_mutex); | 2824 | mutex_unlock(&root->log_mutex); |
2809 | 2825 | ||
2810 | btrfs_init_log_ctx(&root_log_ctx); | 2826 | btrfs_init_log_ctx(&root_log_ctx, NULL); |
2811 | 2827 | ||
2812 | mutex_lock(&log_root_tree->log_mutex); | 2828 | mutex_lock(&log_root_tree->log_mutex); |
2813 | atomic_inc(&log_root_tree->log_batch); | 2829 | atomic_inc(&log_root_tree->log_batch); |
@@ -4469,7 +4485,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, | |||
4469 | static int btrfs_check_ref_name_override(struct extent_buffer *eb, | 4485 | static int btrfs_check_ref_name_override(struct extent_buffer *eb, |
4470 | const int slot, | 4486 | const int slot, |
4471 | const struct btrfs_key *key, | 4487 | const struct btrfs_key *key, |
4472 | struct inode *inode) | 4488 | struct inode *inode, |
4489 | u64 *other_ino) | ||
4473 | { | 4490 | { |
4474 | int ret; | 4491 | int ret; |
4475 | struct btrfs_path *search_path; | 4492 | struct btrfs_path *search_path; |
@@ -4528,7 +4545,16 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, | |||
4528 | search_path, parent, | 4545 | search_path, parent, |
4529 | name, this_name_len, 0); | 4546 | name, this_name_len, 0); |
4530 | if (di && !IS_ERR(di)) { | 4547 | if (di && !IS_ERR(di)) { |
4531 | ret = 1; | 4548 | struct btrfs_key di_key; |
4549 | |||
4550 | btrfs_dir_item_key_to_cpu(search_path->nodes[0], | ||
4551 | di, &di_key); | ||
4552 | if (di_key.type == BTRFS_INODE_ITEM_KEY) { | ||
4553 | ret = 1; | ||
4554 | *other_ino = di_key.objectid; | ||
4555 | } else { | ||
4556 | ret = -EAGAIN; | ||
4557 | } | ||
4532 | goto out; | 4558 | goto out; |
4533 | } else if (IS_ERR(di)) { | 4559 | } else if (IS_ERR(di)) { |
4534 | ret = PTR_ERR(di); | 4560 | ret = PTR_ERR(di); |
@@ -4722,16 +4748,72 @@ again: | |||
4722 | if ((min_key.type == BTRFS_INODE_REF_KEY || | 4748 | if ((min_key.type == BTRFS_INODE_REF_KEY || |
4723 | min_key.type == BTRFS_INODE_EXTREF_KEY) && | 4749 | min_key.type == BTRFS_INODE_EXTREF_KEY) && |
4724 | BTRFS_I(inode)->generation == trans->transid) { | 4750 | BTRFS_I(inode)->generation == trans->transid) { |
4751 | u64 other_ino = 0; | ||
4752 | |||
4725 | ret = btrfs_check_ref_name_override(path->nodes[0], | 4753 | ret = btrfs_check_ref_name_override(path->nodes[0], |
4726 | path->slots[0], | 4754 | path->slots[0], |
4727 | &min_key, inode); | 4755 | &min_key, inode, |
4756 | &other_ino); | ||
4728 | if (ret < 0) { | 4757 | if (ret < 0) { |
4729 | err = ret; | 4758 | err = ret; |
4730 | goto out_unlock; | 4759 | goto out_unlock; |
4731 | } else if (ret > 0) { | 4760 | } else if (ret > 0 && ctx && |
4732 | err = 1; | 4761 | other_ino != btrfs_ino(ctx->inode)) { |
4733 | btrfs_set_log_full_commit(root->fs_info, trans); | 4762 | struct btrfs_key inode_key; |
4734 | goto out_unlock; | 4763 | struct inode *other_inode; |
4764 | |||
4765 | if (ins_nr > 0) { | ||
4766 | ins_nr++; | ||
4767 | } else { | ||
4768 | ins_nr = 1; | ||
4769 | ins_start_slot = path->slots[0]; | ||
4770 | } | ||
4771 | ret = copy_items(trans, inode, dst_path, path, | ||
4772 | &last_extent, ins_start_slot, | ||
4773 | ins_nr, inode_only, | ||
4774 | logged_isize); | ||
4775 | if (ret < 0) { | ||
4776 | err = ret; | ||
4777 | goto out_unlock; | ||
4778 | } | ||
4779 | ins_nr = 0; | ||
4780 | btrfs_release_path(path); | ||
4781 | inode_key.objectid = other_ino; | ||
4782 | inode_key.type = BTRFS_INODE_ITEM_KEY; | ||
4783 | inode_key.offset = 0; | ||
4784 | other_inode = btrfs_iget(root->fs_info->sb, | ||
4785 | &inode_key, root, | ||
4786 | NULL); | ||
4787 | /* | ||
4788 | * If the other inode that had a conflicting dir | ||
4789 | * entry was deleted in the current transaction, | ||
4790 | * we don't need to do more work nor fallback to | ||
4791 | * a transaction commit. | ||
4792 | */ | ||
4793 | if (IS_ERR(other_inode) && | ||
4794 | PTR_ERR(other_inode) == -ENOENT) { | ||
4795 | goto next_key; | ||
4796 | } else if (IS_ERR(other_inode)) { | ||
4797 | err = PTR_ERR(other_inode); | ||
4798 | goto out_unlock; | ||
4799 | } | ||
4800 | /* | ||
4801 | * We are safe logging the other inode without | ||
4802 | * acquiring its i_mutex as long as we log with | ||
4803 | * the LOG_INODE_EXISTS mode. We're safe against | ||
4804 | * concurrent renames of the other inode as well | ||
4805 | * because during a rename we pin the log and | ||
4806 | * update the log with the new name before we | ||
4807 | * unpin it. | ||
4808 | */ | ||
4809 | err = btrfs_log_inode(trans, root, other_inode, | ||
4810 | LOG_INODE_EXISTS, | ||
4811 | 0, LLONG_MAX, ctx); | ||
4812 | iput(other_inode); | ||
4813 | if (err) | ||
4814 | goto out_unlock; | ||
4815 | else | ||
4816 | goto next_key; | ||
4735 | } | 4817 | } |
4736 | } | 4818 | } |
4737 | 4819 | ||
@@ -4799,7 +4881,7 @@ next_slot: | |||
4799 | ins_nr = 0; | 4881 | ins_nr = 0; |
4800 | } | 4882 | } |
4801 | btrfs_release_path(path); | 4883 | btrfs_release_path(path); |
4802 | 4884 | next_key: | |
4803 | if (min_key.offset < (u64)-1) { | 4885 | if (min_key.offset < (u64)-1) { |
4804 | min_key.offset++; | 4886 | min_key.offset++; |
4805 | } else if (min_key.type < max_key.type) { | 4887 | } else if (min_key.type < max_key.type) { |
@@ -4993,8 +5075,12 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
4993 | if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) | 5075 | if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) |
4994 | break; | 5076 | break; |
4995 | 5077 | ||
4996 | if (IS_ROOT(parent)) | 5078 | if (IS_ROOT(parent)) { |
5079 | inode = d_inode(parent); | ||
5080 | if (btrfs_must_commit_transaction(trans, inode)) | ||
5081 | ret = 1; | ||
4997 | break; | 5082 | break; |
5083 | } | ||
4998 | 5084 | ||
4999 | parent = dget_parent(parent); | 5085 | parent = dget_parent(parent); |
5000 | dput(old_parent); | 5086 | dput(old_parent); |
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index a9f1b75d080d..ab858e31ccbc 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -30,15 +30,18 @@ struct btrfs_log_ctx { | |||
30 | int log_transid; | 30 | int log_transid; |
31 | int io_err; | 31 | int io_err; |
32 | bool log_new_dentries; | 32 | bool log_new_dentries; |
33 | struct inode *inode; | ||
33 | struct list_head list; | 34 | struct list_head list; |
34 | }; | 35 | }; |
35 | 36 | ||
36 | static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) | 37 | static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, |
38 | struct inode *inode) | ||
37 | { | 39 | { |
38 | ctx->log_ret = 0; | 40 | ctx->log_ret = 0; |
39 | ctx->log_transid = 0; | 41 | ctx->log_transid = 0; |
40 | ctx->io_err = 0; | 42 | ctx->io_err = 0; |
41 | ctx->log_new_dentries = false; | 43 | ctx->log_new_dentries = false; |
44 | ctx->inode = inode; | ||
42 | INIT_LIST_HEAD(&ctx->list); | 45 | INIT_LIST_HEAD(&ctx->list); |
43 | } | 46 | } |
44 | 47 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 51f125508771..035efce603a9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -834,10 +834,6 @@ static void __free_device(struct work_struct *work) | |||
834 | struct btrfs_device *device; | 834 | struct btrfs_device *device; |
835 | 835 | ||
836 | device = container_of(work, struct btrfs_device, rcu_work); | 836 | device = container_of(work, struct btrfs_device, rcu_work); |
837 | |||
838 | if (device->bdev) | ||
839 | blkdev_put(device->bdev, device->mode); | ||
840 | |||
841 | rcu_string_free(device->name); | 837 | rcu_string_free(device->name); |
842 | kfree(device); | 838 | kfree(device); |
843 | } | 839 | } |
@@ -852,6 +848,17 @@ static void free_device(struct rcu_head *head) | |||
852 | schedule_work(&device->rcu_work); | 848 | schedule_work(&device->rcu_work); |
853 | } | 849 | } |
854 | 850 | ||
851 | static void btrfs_close_bdev(struct btrfs_device *device) | ||
852 | { | ||
853 | if (device->bdev && device->writeable) { | ||
854 | sync_blockdev(device->bdev); | ||
855 | invalidate_bdev(device->bdev); | ||
856 | } | ||
857 | |||
858 | if (device->bdev) | ||
859 | blkdev_put(device->bdev, device->mode); | ||
860 | } | ||
861 | |||
855 | static void btrfs_close_one_device(struct btrfs_device *device) | 862 | static void btrfs_close_one_device(struct btrfs_device *device) |
856 | { | 863 | { |
857 | struct btrfs_fs_devices *fs_devices = device->fs_devices; | 864 | struct btrfs_fs_devices *fs_devices = device->fs_devices; |
@@ -870,10 +877,7 @@ static void btrfs_close_one_device(struct btrfs_device *device) | |||
870 | if (device->missing) | 877 | if (device->missing) |
871 | fs_devices->missing_devices--; | 878 | fs_devices->missing_devices--; |
872 | 879 | ||
873 | if (device->bdev && device->writeable) { | 880 | btrfs_close_bdev(device); |
874 | sync_blockdev(device->bdev); | ||
875 | invalidate_bdev(device->bdev); | ||
876 | } | ||
877 | 881 | ||
878 | new_device = btrfs_alloc_device(NULL, &device->devid, | 882 | new_device = btrfs_alloc_device(NULL, &device->devid, |
879 | device->uuid); | 883 | device->uuid); |
@@ -1932,6 +1936,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid) | |||
1932 | btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device); | 1936 | btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device); |
1933 | } | 1937 | } |
1934 | 1938 | ||
1939 | btrfs_close_bdev(device); | ||
1940 | |||
1935 | call_rcu(&device->rcu, free_device); | 1941 | call_rcu(&device->rcu, free_device); |
1936 | 1942 | ||
1937 | num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; | 1943 | num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; |
@@ -2025,6 +2031,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, | |||
2025 | /* zero out the old super if it is writable */ | 2031 | /* zero out the old super if it is writable */ |
2026 | btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); | 2032 | btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); |
2027 | } | 2033 | } |
2034 | |||
2035 | btrfs_close_bdev(srcdev); | ||
2036 | |||
2028 | call_rcu(&srcdev->rcu, free_device); | 2037 | call_rcu(&srcdev->rcu, free_device); |
2029 | 2038 | ||
2030 | /* | 2039 | /* |
@@ -2080,6 +2089,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | |||
2080 | * the device_list_mutex lock. | 2089 | * the device_list_mutex lock. |
2081 | */ | 2090 | */ |
2082 | btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); | 2091 | btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); |
2092 | |||
2093 | btrfs_close_bdev(tgtdev); | ||
2083 | call_rcu(&tgtdev->rcu, free_device); | 2094 | call_rcu(&tgtdev->rcu, free_device); |
2084 | } | 2095 | } |
2085 | 2096 | ||