diff options
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r-- | fs/btrfs/transaction.c | 166 |
1 files changed, 144 insertions, 22 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index dcaae3616728..a605d4e2f2bc 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -76,6 +76,32 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) | |||
76 | } | 76 | } |
77 | } | 77 | } |
78 | 78 | ||
79 | static void clear_btree_io_tree(struct extent_io_tree *tree) | ||
80 | { | ||
81 | spin_lock(&tree->lock); | ||
82 | while (!RB_EMPTY_ROOT(&tree->state)) { | ||
83 | struct rb_node *node; | ||
84 | struct extent_state *state; | ||
85 | |||
86 | node = rb_first(&tree->state); | ||
87 | state = rb_entry(node, struct extent_state, rb_node); | ||
88 | rb_erase(&state->rb_node, &tree->state); | ||
89 | RB_CLEAR_NODE(&state->rb_node); | ||
90 | /* | ||
91 | * btree io trees aren't supposed to have tasks waiting for | ||
92 | * changes in the flags of extent states ever. | ||
93 | */ | ||
94 | ASSERT(!waitqueue_active(&state->wq)); | ||
95 | free_extent_state(state); | ||
96 | if (need_resched()) { | ||
97 | spin_unlock(&tree->lock); | ||
98 | cond_resched(); | ||
99 | spin_lock(&tree->lock); | ||
100 | } | ||
101 | } | ||
102 | spin_unlock(&tree->lock); | ||
103 | } | ||
104 | |||
79 | static noinline void switch_commit_roots(struct btrfs_transaction *trans, | 105 | static noinline void switch_commit_roots(struct btrfs_transaction *trans, |
80 | struct btrfs_fs_info *fs_info) | 106 | struct btrfs_fs_info *fs_info) |
81 | { | 107 | { |
@@ -89,6 +115,7 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans, | |||
89 | root->commit_root = btrfs_root_node(root); | 115 | root->commit_root = btrfs_root_node(root); |
90 | if (is_fstree(root->objectid)) | 116 | if (is_fstree(root->objectid)) |
91 | btrfs_unpin_free_ino(root); | 117 | btrfs_unpin_free_ino(root); |
118 | clear_btree_io_tree(&root->dirty_log_pages); | ||
92 | } | 119 | } |
93 | up_write(&fs_info->commit_root_sem); | 120 | up_write(&fs_info->commit_root_sem); |
94 | } | 121 | } |
@@ -220,6 +247,7 @@ loop: | |||
220 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 247 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
221 | INIT_LIST_HEAD(&cur_trans->pending_chunks); | 248 | INIT_LIST_HEAD(&cur_trans->pending_chunks); |
222 | INIT_LIST_HEAD(&cur_trans->switch_commits); | 249 | INIT_LIST_HEAD(&cur_trans->switch_commits); |
250 | INIT_LIST_HEAD(&cur_trans->pending_ordered); | ||
223 | list_add_tail(&cur_trans->list, &fs_info->trans_list); | 251 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
224 | extent_io_tree_init(&cur_trans->dirty_pages, | 252 | extent_io_tree_init(&cur_trans->dirty_pages, |
225 | fs_info->btree_inode->i_mapping); | 253 | fs_info->btree_inode->i_mapping); |
@@ -488,6 +516,7 @@ again: | |||
488 | h->sync = false; | 516 | h->sync = false; |
489 | INIT_LIST_HEAD(&h->qgroup_ref_list); | 517 | INIT_LIST_HEAD(&h->qgroup_ref_list); |
490 | INIT_LIST_HEAD(&h->new_bgs); | 518 | INIT_LIST_HEAD(&h->new_bgs); |
519 | INIT_LIST_HEAD(&h->ordered); | ||
491 | 520 | ||
492 | smp_mb(); | 521 | smp_mb(); |
493 | if (cur_trans->state >= TRANS_STATE_BLOCKED && | 522 | if (cur_trans->state >= TRANS_STATE_BLOCKED && |
@@ -719,6 +748,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
719 | if (!list_empty(&trans->new_bgs)) | 748 | if (!list_empty(&trans->new_bgs)) |
720 | btrfs_create_pending_block_groups(trans, root); | 749 | btrfs_create_pending_block_groups(trans, root); |
721 | 750 | ||
751 | if (!list_empty(&trans->ordered)) { | ||
752 | spin_lock(&info->trans_lock); | ||
753 | list_splice(&trans->ordered, &cur_trans->pending_ordered); | ||
754 | spin_unlock(&info->trans_lock); | ||
755 | } | ||
756 | |||
722 | trans->delayed_ref_updates = 0; | 757 | trans->delayed_ref_updates = 0; |
723 | if (!trans->sync) { | 758 | if (!trans->sync) { |
724 | must_run_delayed_refs = | 759 | must_run_delayed_refs = |
@@ -828,17 +863,39 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
828 | 863 | ||
829 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 864 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
830 | mark, &cached_state)) { | 865 | mark, &cached_state)) { |
831 | convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, | 866 | bool wait_writeback = false; |
832 | mark, &cached_state, GFP_NOFS); | 867 | |
833 | cached_state = NULL; | 868 | err = convert_extent_bit(dirty_pages, start, end, |
834 | err = filemap_fdatawrite_range(mapping, start, end); | 869 | EXTENT_NEED_WAIT, |
870 | mark, &cached_state, GFP_NOFS); | ||
871 | /* | ||
872 | * convert_extent_bit can return -ENOMEM, which is most of the | ||
873 | * time a temporary error. So when it happens, ignore the error | ||
874 | * and wait for writeback of this range to finish - because we | ||
875 | * failed to set the bit EXTENT_NEED_WAIT for the range, a call | ||
876 | * to btrfs_wait_marked_extents() would not know that writeback | ||
877 | * for this range started and therefore wouldn't wait for it to | ||
878 | * finish - we don't want to commit a superblock that points to | ||
879 | * btree nodes/leafs for which writeback hasn't finished yet | ||
880 | * (and without errors). | ||
881 | * We cleanup any entries left in the io tree when committing | ||
882 | * the transaction (through clear_btree_io_tree()). | ||
883 | */ | ||
884 | if (err == -ENOMEM) { | ||
885 | err = 0; | ||
886 | wait_writeback = true; | ||
887 | } | ||
888 | if (!err) | ||
889 | err = filemap_fdatawrite_range(mapping, start, end); | ||
835 | if (err) | 890 | if (err) |
836 | werr = err; | 891 | werr = err; |
892 | else if (wait_writeback) | ||
893 | werr = filemap_fdatawait_range(mapping, start, end); | ||
894 | free_extent_state(cached_state); | ||
895 | cached_state = NULL; | ||
837 | cond_resched(); | 896 | cond_resched(); |
838 | start = end + 1; | 897 | start = end + 1; |
839 | } | 898 | } |
840 | if (err) | ||
841 | werr = err; | ||
842 | return werr; | 899 | return werr; |
843 | } | 900 | } |
844 | 901 | ||
@@ -862,11 +919,25 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, | |||
862 | 919 | ||
863 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 920 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
864 | EXTENT_NEED_WAIT, &cached_state)) { | 921 | EXTENT_NEED_WAIT, &cached_state)) { |
865 | clear_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, | 922 | /* |
866 | 0, 0, &cached_state, GFP_NOFS); | 923 | * Ignore -ENOMEM errors returned by clear_extent_bit(). |
867 | err = filemap_fdatawait_range(mapping, start, end); | 924 | * When committing the transaction, we'll remove any entries |
925 | * left in the io tree. For a log commit, we don't remove them | ||
926 | * after committing the log because the tree can be accessed | ||
927 | * concurrently - we do it only at transaction commit time when | ||
928 | * it's safe to do it (through clear_btree_io_tree()). | ||
929 | */ | ||
930 | err = clear_extent_bit(dirty_pages, start, end, | ||
931 | EXTENT_NEED_WAIT, | ||
932 | 0, 0, &cached_state, GFP_NOFS); | ||
933 | if (err == -ENOMEM) | ||
934 | err = 0; | ||
935 | if (!err) | ||
936 | err = filemap_fdatawait_range(mapping, start, end); | ||
868 | if (err) | 937 | if (err) |
869 | werr = err; | 938 | werr = err; |
939 | free_extent_state(cached_state); | ||
940 | cached_state = NULL; | ||
870 | cond_resched(); | 941 | cond_resched(); |
871 | start = end + 1; | 942 | start = end + 1; |
872 | } | 943 | } |
@@ -919,17 +990,17 @@ static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | |||
919 | return 0; | 990 | return 0; |
920 | } | 991 | } |
921 | 992 | ||
922 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 993 | static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
923 | struct btrfs_root *root) | 994 | struct btrfs_root *root) |
924 | { | 995 | { |
925 | if (!trans || !trans->transaction) { | 996 | int ret; |
926 | struct inode *btree_inode; | 997 | |
927 | btree_inode = root->fs_info->btree_inode; | 998 | ret = btrfs_write_and_wait_marked_extents(root, |
928 | return filemap_write_and_wait(btree_inode->i_mapping); | ||
929 | } | ||
930 | return btrfs_write_and_wait_marked_extents(root, | ||
931 | &trans->transaction->dirty_pages, | 999 | &trans->transaction->dirty_pages, |
932 | EXTENT_DIRTY); | 1000 | EXTENT_DIRTY); |
1001 | clear_btree_io_tree(&trans->transaction->dirty_pages); | ||
1002 | |||
1003 | return ret; | ||
933 | } | 1004 | } |
934 | 1005 | ||
935 | /* | 1006 | /* |
@@ -1652,6 +1723,28 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) | |||
1652 | btrfs_wait_ordered_roots(fs_info, -1); | 1723 | btrfs_wait_ordered_roots(fs_info, -1); |
1653 | } | 1724 | } |
1654 | 1725 | ||
1726 | static inline void | ||
1727 | btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans, | ||
1728 | struct btrfs_fs_info *fs_info) | ||
1729 | { | ||
1730 | struct btrfs_ordered_extent *ordered; | ||
1731 | |||
1732 | spin_lock(&fs_info->trans_lock); | ||
1733 | while (!list_empty(&cur_trans->pending_ordered)) { | ||
1734 | ordered = list_first_entry(&cur_trans->pending_ordered, | ||
1735 | struct btrfs_ordered_extent, | ||
1736 | trans_list); | ||
1737 | list_del_init(&ordered->trans_list); | ||
1738 | spin_unlock(&fs_info->trans_lock); | ||
1739 | |||
1740 | wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE, | ||
1741 | &ordered->flags)); | ||
1742 | btrfs_put_ordered_extent(ordered); | ||
1743 | spin_lock(&fs_info->trans_lock); | ||
1744 | } | ||
1745 | spin_unlock(&fs_info->trans_lock); | ||
1746 | } | ||
1747 | |||
1655 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 1748 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
1656 | struct btrfs_root *root) | 1749 | struct btrfs_root *root) |
1657 | { | 1750 | { |
@@ -1702,6 +1795,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1702 | } | 1795 | } |
1703 | 1796 | ||
1704 | spin_lock(&root->fs_info->trans_lock); | 1797 | spin_lock(&root->fs_info->trans_lock); |
1798 | list_splice(&trans->ordered, &cur_trans->pending_ordered); | ||
1705 | if (cur_trans->state >= TRANS_STATE_COMMIT_START) { | 1799 | if (cur_trans->state >= TRANS_STATE_COMMIT_START) { |
1706 | spin_unlock(&root->fs_info->trans_lock); | 1800 | spin_unlock(&root->fs_info->trans_lock); |
1707 | atomic_inc(&cur_trans->use_count); | 1801 | atomic_inc(&cur_trans->use_count); |
@@ -1754,6 +1848,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1754 | 1848 | ||
1755 | btrfs_wait_delalloc_flush(root->fs_info); | 1849 | btrfs_wait_delalloc_flush(root->fs_info); |
1756 | 1850 | ||
1851 | btrfs_wait_pending_ordered(cur_trans, root->fs_info); | ||
1852 | |||
1757 | btrfs_scrub_pause(root); | 1853 | btrfs_scrub_pause(root); |
1758 | /* | 1854 | /* |
1759 | * Ok now we need to make sure to block out any other joins while we | 1855 | * Ok now we need to make sure to block out any other joins while we |
@@ -1842,13 +1938,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1842 | } | 1938 | } |
1843 | 1939 | ||
1844 | /* | 1940 | /* |
1845 | * Since the transaction is done, we should set the inode map cache flag | 1941 | * Since the transaction is done, we can apply the pending changes |
1846 | * before any other comming transaction. | 1942 | * before the next transaction. |
1847 | */ | 1943 | */ |
1848 | if (btrfs_test_opt(root, CHANGE_INODE_CACHE)) | 1944 | btrfs_apply_pending_changes(root->fs_info); |
1849 | btrfs_set_opt(root->fs_info->mount_opt, INODE_MAP_CACHE); | ||
1850 | else | ||
1851 | btrfs_clear_opt(root->fs_info->mount_opt, INODE_MAP_CACHE); | ||
1852 | 1945 | ||
1853 | /* commit_fs_roots gets rid of all the tree log roots, it is now | 1946 | /* commit_fs_roots gets rid of all the tree log roots, it is now |
1854 | * safe to free the root of tree log roots | 1947 | * safe to free the root of tree log roots |
@@ -2019,3 +2112,32 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) | |||
2019 | 2112 | ||
2020 | return (ret < 0) ? 0 : 1; | 2113 | return (ret < 0) ? 0 : 1; |
2021 | } | 2114 | } |
2115 | |||
2116 | void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info) | ||
2117 | { | ||
2118 | unsigned long prev; | ||
2119 | unsigned long bit; | ||
2120 | |||
2121 | prev = cmpxchg(&fs_info->pending_changes, 0, 0); | ||
2122 | if (!prev) | ||
2123 | return; | ||
2124 | |||
2125 | bit = 1 << BTRFS_PENDING_SET_INODE_MAP_CACHE; | ||
2126 | if (prev & bit) | ||
2127 | btrfs_set_opt(fs_info->mount_opt, INODE_MAP_CACHE); | ||
2128 | prev &= ~bit; | ||
2129 | |||
2130 | bit = 1 << BTRFS_PENDING_CLEAR_INODE_MAP_CACHE; | ||
2131 | if (prev & bit) | ||
2132 | btrfs_clear_opt(fs_info->mount_opt, INODE_MAP_CACHE); | ||
2133 | prev &= ~bit; | ||
2134 | |||
2135 | bit = 1 << BTRFS_PENDING_COMMIT; | ||
2136 | if (prev & bit) | ||
2137 | btrfs_debug(fs_info, "pending commit done"); | ||
2138 | prev &= ~bit; | ||
2139 | |||
2140 | if (prev) | ||
2141 | btrfs_warn(fs_info, | ||
2142 | "unknown pending changes left 0x%lx, ignoring", prev); | ||
2143 | } | ||