aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/transaction.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r--fs/btrfs/transaction.c166
1 files changed, 144 insertions, 22 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index dcaae3616728..a605d4e2f2bc 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -76,6 +76,32 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
76 } 76 }
77} 77}
78 78
79static void clear_btree_io_tree(struct extent_io_tree *tree)
80{
81 spin_lock(&tree->lock);
82 while (!RB_EMPTY_ROOT(&tree->state)) {
83 struct rb_node *node;
84 struct extent_state *state;
85
86 node = rb_first(&tree->state);
87 state = rb_entry(node, struct extent_state, rb_node);
88 rb_erase(&state->rb_node, &tree->state);
89 RB_CLEAR_NODE(&state->rb_node);
90 /*
91 * btree io trees aren't supposed to have tasks waiting for
92 * changes in the flags of extent states ever.
93 */
94 ASSERT(!waitqueue_active(&state->wq));
95 free_extent_state(state);
96 if (need_resched()) {
97 spin_unlock(&tree->lock);
98 cond_resched();
99 spin_lock(&tree->lock);
100 }
101 }
102 spin_unlock(&tree->lock);
103}
104
79static noinline void switch_commit_roots(struct btrfs_transaction *trans, 105static noinline void switch_commit_roots(struct btrfs_transaction *trans,
80 struct btrfs_fs_info *fs_info) 106 struct btrfs_fs_info *fs_info)
81{ 107{
@@ -89,6 +115,7 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans,
89 root->commit_root = btrfs_root_node(root); 115 root->commit_root = btrfs_root_node(root);
90 if (is_fstree(root->objectid)) 116 if (is_fstree(root->objectid))
91 btrfs_unpin_free_ino(root); 117 btrfs_unpin_free_ino(root);
118 clear_btree_io_tree(&root->dirty_log_pages);
92 } 119 }
93 up_write(&fs_info->commit_root_sem); 120 up_write(&fs_info->commit_root_sem);
94} 121}
@@ -220,6 +247,7 @@ loop:
220 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 247 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
221 INIT_LIST_HEAD(&cur_trans->pending_chunks); 248 INIT_LIST_HEAD(&cur_trans->pending_chunks);
222 INIT_LIST_HEAD(&cur_trans->switch_commits); 249 INIT_LIST_HEAD(&cur_trans->switch_commits);
250 INIT_LIST_HEAD(&cur_trans->pending_ordered);
223 list_add_tail(&cur_trans->list, &fs_info->trans_list); 251 list_add_tail(&cur_trans->list, &fs_info->trans_list);
224 extent_io_tree_init(&cur_trans->dirty_pages, 252 extent_io_tree_init(&cur_trans->dirty_pages,
225 fs_info->btree_inode->i_mapping); 253 fs_info->btree_inode->i_mapping);
@@ -488,6 +516,7 @@ again:
488 h->sync = false; 516 h->sync = false;
489 INIT_LIST_HEAD(&h->qgroup_ref_list); 517 INIT_LIST_HEAD(&h->qgroup_ref_list);
490 INIT_LIST_HEAD(&h->new_bgs); 518 INIT_LIST_HEAD(&h->new_bgs);
519 INIT_LIST_HEAD(&h->ordered);
491 520
492 smp_mb(); 521 smp_mb();
493 if (cur_trans->state >= TRANS_STATE_BLOCKED && 522 if (cur_trans->state >= TRANS_STATE_BLOCKED &&
@@ -719,6 +748,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
719 if (!list_empty(&trans->new_bgs)) 748 if (!list_empty(&trans->new_bgs))
720 btrfs_create_pending_block_groups(trans, root); 749 btrfs_create_pending_block_groups(trans, root);
721 750
751 if (!list_empty(&trans->ordered)) {
752 spin_lock(&info->trans_lock);
753 list_splice(&trans->ordered, &cur_trans->pending_ordered);
754 spin_unlock(&info->trans_lock);
755 }
756
722 trans->delayed_ref_updates = 0; 757 trans->delayed_ref_updates = 0;
723 if (!trans->sync) { 758 if (!trans->sync) {
724 must_run_delayed_refs = 759 must_run_delayed_refs =
@@ -828,17 +863,39 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
828 863
829 while (!find_first_extent_bit(dirty_pages, start, &start, &end, 864 while (!find_first_extent_bit(dirty_pages, start, &start, &end,
830 mark, &cached_state)) { 865 mark, &cached_state)) {
831 convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, 866 bool wait_writeback = false;
832 mark, &cached_state, GFP_NOFS); 867
833 cached_state = NULL; 868 err = convert_extent_bit(dirty_pages, start, end,
834 err = filemap_fdatawrite_range(mapping, start, end); 869 EXTENT_NEED_WAIT,
870 mark, &cached_state, GFP_NOFS);
871 /*
872 * convert_extent_bit can return -ENOMEM, which is most of the
873 * time a temporary error. So when it happens, ignore the error
874 * and wait for writeback of this range to finish - because we
875 * failed to set the bit EXTENT_NEED_WAIT for the range, a call
876 * to btrfs_wait_marked_extents() would not know that writeback
877 * for this range started and therefore wouldn't wait for it to
878 * finish - we don't want to commit a superblock that points to
879 * btree nodes/leafs for which writeback hasn't finished yet
880 * (and without errors).
881 * We cleanup any entries left in the io tree when committing
882 * the transaction (through clear_btree_io_tree()).
883 */
884 if (err == -ENOMEM) {
885 err = 0;
886 wait_writeback = true;
887 }
888 if (!err)
889 err = filemap_fdatawrite_range(mapping, start, end);
835 if (err) 890 if (err)
836 werr = err; 891 werr = err;
892 else if (wait_writeback)
893 werr = filemap_fdatawait_range(mapping, start, end);
894 free_extent_state(cached_state);
895 cached_state = NULL;
837 cond_resched(); 896 cond_resched();
838 start = end + 1; 897 start = end + 1;
839 } 898 }
840 if (err)
841 werr = err;
842 return werr; 899 return werr;
843} 900}
844 901
@@ -862,11 +919,25 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
862 919
863 while (!find_first_extent_bit(dirty_pages, start, &start, &end, 920 while (!find_first_extent_bit(dirty_pages, start, &start, &end,
864 EXTENT_NEED_WAIT, &cached_state)) { 921 EXTENT_NEED_WAIT, &cached_state)) {
865 clear_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, 922 /*
866 0, 0, &cached_state, GFP_NOFS); 923 * Ignore -ENOMEM errors returned by clear_extent_bit().
867 err = filemap_fdatawait_range(mapping, start, end); 924 * When committing the transaction, we'll remove any entries
925 * left in the io tree. For a log commit, we don't remove them
926 * after committing the log because the tree can be accessed
927 * concurrently - we do it only at transaction commit time when
928 * it's safe to do it (through clear_btree_io_tree()).
929 */
930 err = clear_extent_bit(dirty_pages, start, end,
931 EXTENT_NEED_WAIT,
932 0, 0, &cached_state, GFP_NOFS);
933 if (err == -ENOMEM)
934 err = 0;
935 if (!err)
936 err = filemap_fdatawait_range(mapping, start, end);
868 if (err) 937 if (err)
869 werr = err; 938 werr = err;
939 free_extent_state(cached_state);
940 cached_state = NULL;
870 cond_resched(); 941 cond_resched();
871 start = end + 1; 942 start = end + 1;
872 } 943 }
@@ -919,17 +990,17 @@ static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
919 return 0; 990 return 0;
920} 991}
921 992
922int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 993static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
923 struct btrfs_root *root) 994 struct btrfs_root *root)
924{ 995{
925 if (!trans || !trans->transaction) { 996 int ret;
926 struct inode *btree_inode; 997
927 btree_inode = root->fs_info->btree_inode; 998 ret = btrfs_write_and_wait_marked_extents(root,
928 return filemap_write_and_wait(btree_inode->i_mapping);
929 }
930 return btrfs_write_and_wait_marked_extents(root,
931 &trans->transaction->dirty_pages, 999 &trans->transaction->dirty_pages,
932 EXTENT_DIRTY); 1000 EXTENT_DIRTY);
1001 clear_btree_io_tree(&trans->transaction->dirty_pages);
1002
1003 return ret;
933} 1004}
934 1005
935/* 1006/*
@@ -1652,6 +1723,28 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
1652 btrfs_wait_ordered_roots(fs_info, -1); 1723 btrfs_wait_ordered_roots(fs_info, -1);
1653} 1724}
1654 1725
1726static inline void
1727btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans,
1728 struct btrfs_fs_info *fs_info)
1729{
1730 struct btrfs_ordered_extent *ordered;
1731
1732 spin_lock(&fs_info->trans_lock);
1733 while (!list_empty(&cur_trans->pending_ordered)) {
1734 ordered = list_first_entry(&cur_trans->pending_ordered,
1735 struct btrfs_ordered_extent,
1736 trans_list);
1737 list_del_init(&ordered->trans_list);
1738 spin_unlock(&fs_info->trans_lock);
1739
1740 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE,
1741 &ordered->flags));
1742 btrfs_put_ordered_extent(ordered);
1743 spin_lock(&fs_info->trans_lock);
1744 }
1745 spin_unlock(&fs_info->trans_lock);
1746}
1747
1655int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 1748int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1656 struct btrfs_root *root) 1749 struct btrfs_root *root)
1657{ 1750{
@@ -1702,6 +1795,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1702 } 1795 }
1703 1796
1704 spin_lock(&root->fs_info->trans_lock); 1797 spin_lock(&root->fs_info->trans_lock);
1798 list_splice(&trans->ordered, &cur_trans->pending_ordered);
1705 if (cur_trans->state >= TRANS_STATE_COMMIT_START) { 1799 if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
1706 spin_unlock(&root->fs_info->trans_lock); 1800 spin_unlock(&root->fs_info->trans_lock);
1707 atomic_inc(&cur_trans->use_count); 1801 atomic_inc(&cur_trans->use_count);
@@ -1754,6 +1848,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1754 1848
1755 btrfs_wait_delalloc_flush(root->fs_info); 1849 btrfs_wait_delalloc_flush(root->fs_info);
1756 1850
1851 btrfs_wait_pending_ordered(cur_trans, root->fs_info);
1852
1757 btrfs_scrub_pause(root); 1853 btrfs_scrub_pause(root);
1758 /* 1854 /*
1759 * Ok now we need to make sure to block out any other joins while we 1855 * Ok now we need to make sure to block out any other joins while we
@@ -1842,13 +1938,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1842 } 1938 }
1843 1939
1844 /* 1940 /*
1845 * Since the transaction is done, we should set the inode map cache flag 1941 * Since the transaction is done, we can apply the pending changes
1846 * before any other comming transaction. 1942 * before the next transaction.
1847 */ 1943 */
1848 if (btrfs_test_opt(root, CHANGE_INODE_CACHE)) 1944 btrfs_apply_pending_changes(root->fs_info);
1849 btrfs_set_opt(root->fs_info->mount_opt, INODE_MAP_CACHE);
1850 else
1851 btrfs_clear_opt(root->fs_info->mount_opt, INODE_MAP_CACHE);
1852 1945
1853 /* commit_fs_roots gets rid of all the tree log roots, it is now 1946 /* commit_fs_roots gets rid of all the tree log roots, it is now
1854 * safe to free the root of tree log roots 1947 * safe to free the root of tree log roots
@@ -2019,3 +2112,32 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
2019 2112
2020 return (ret < 0) ? 0 : 1; 2113 return (ret < 0) ? 0 : 1;
2021} 2114}
2115
2116void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info)
2117{
2118 unsigned long prev;
2119 unsigned long bit;
2120
2121 prev = cmpxchg(&fs_info->pending_changes, 0, 0);
2122 if (!prev)
2123 return;
2124
2125 bit = 1 << BTRFS_PENDING_SET_INODE_MAP_CACHE;
2126 if (prev & bit)
2127 btrfs_set_opt(fs_info->mount_opt, INODE_MAP_CACHE);
2128 prev &= ~bit;
2129
2130 bit = 1 << BTRFS_PENDING_CLEAR_INODE_MAP_CACHE;
2131 if (prev & bit)
2132 btrfs_clear_opt(fs_info->mount_opt, INODE_MAP_CACHE);
2133 prev &= ~bit;
2134
2135 bit = 1 << BTRFS_PENDING_COMMIT;
2136 if (prev & bit)
2137 btrfs_debug(fs_info, "pending commit done");
2138 prev &= ~bit;
2139
2140 if (prev)
2141 btrfs_warn(fs_info,
2142 "unknown pending changes left 0x%lx, ignoring", prev);
2143}