aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan Zheng <zheng.yan@oracle.com>2009-01-21 12:54:03 -0500
committerChris Mason <chris.mason@oracle.com>2009-01-21 12:54:03 -0500
commit7237f1833601dcc435a64176c2c347ec4bd959f9 (patch)
tree5e12b9a7655f03181605e02fd91b42e68ee92c2e
parent7e6628544abad773222d8b177f738ac2db1859de (diff)
Btrfs: fix tree logs parallel sync
To improve performance, btrfs_sync_log merges tree log sync requests. But it wrongly merges sync requests for different tree logs. If multiple tree logs are synced at the same time, only one of them actually gets synced. This patch has following changes to fix the bug: Move most tree log related fields in btrfs_fs_info to btrfs_root. This allows merging sync requests separately for each tree log. Don't insert root item into the log root tree immediately after log tree is allocated. Root item for log tree is inserted when log tree get synced for the first time. This allows syncing the log root tree without first syncing all log trees. At tree-log sync, btrfs_sync_log first sync the log tree; then updates corresponding root item in the log root tree; sync the log root tree; then update the super block. Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
-rw-r--r--fs/btrfs/ctree.h13
-rw-r--r--fs/btrfs/disk-io.c79
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c10
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/btrfs/tree-log.c350
6 files changed, 248 insertions, 210 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e1fec636f37f..de103a8a815e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -695,9 +695,7 @@ struct btrfs_fs_info {
695 struct btrfs_transaction *running_transaction; 695 struct btrfs_transaction *running_transaction;
696 wait_queue_head_t transaction_throttle; 696 wait_queue_head_t transaction_throttle;
697 wait_queue_head_t transaction_wait; 697 wait_queue_head_t transaction_wait;
698
699 wait_queue_head_t async_submit_wait; 698 wait_queue_head_t async_submit_wait;
700 wait_queue_head_t tree_log_wait;
701 699
702 struct btrfs_super_block super_copy; 700 struct btrfs_super_block super_copy;
703 struct btrfs_super_block super_for_commit; 701 struct btrfs_super_block super_for_commit;
@@ -724,10 +722,6 @@ struct btrfs_fs_info {
724 atomic_t async_submit_draining; 722 atomic_t async_submit_draining;
725 atomic_t nr_async_bios; 723 atomic_t nr_async_bios;
726 atomic_t async_delalloc_pages; 724 atomic_t async_delalloc_pages;
727 atomic_t tree_log_writers;
728 atomic_t tree_log_commit;
729 unsigned long tree_log_batch;
730 u64 tree_log_transid;
731 725
732 /* 726 /*
733 * this is used by the balancing code to wait for all the pending 727 * this is used by the balancing code to wait for all the pending
@@ -827,7 +821,14 @@ struct btrfs_root {
827 struct kobject root_kobj; 821 struct kobject root_kobj;
828 struct completion kobj_unregister; 822 struct completion kobj_unregister;
829 struct mutex objectid_mutex; 823 struct mutex objectid_mutex;
824
830 struct mutex log_mutex; 825 struct mutex log_mutex;
826 wait_queue_head_t log_writer_wait;
827 wait_queue_head_t log_commit_wait[2];
828 atomic_t log_writers;
829 atomic_t log_commit[2];
830 unsigned long log_transid;
831 unsigned long log_batch;
831 832
832 u64 objectid; 833 u64 objectid;
833 u64 last_trans; 834 u64 last_trans;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3cf17257f89d..7feac5a475e9 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -849,6 +849,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
849 spin_lock_init(&root->list_lock); 849 spin_lock_init(&root->list_lock);
850 mutex_init(&root->objectid_mutex); 850 mutex_init(&root->objectid_mutex);
851 mutex_init(&root->log_mutex); 851 mutex_init(&root->log_mutex);
852 init_waitqueue_head(&root->log_writer_wait);
853 init_waitqueue_head(&root->log_commit_wait[0]);
854 init_waitqueue_head(&root->log_commit_wait[1]);
855 atomic_set(&root->log_commit[0], 0);
856 atomic_set(&root->log_commit[1], 0);
857 atomic_set(&root->log_writers, 0);
858 root->log_batch = 0;
859 root->log_transid = 0;
852 extent_io_tree_init(&root->dirty_log_pages, 860 extent_io_tree_init(&root->dirty_log_pages,
853 fs_info->btree_inode->i_mapping, GFP_NOFS); 861 fs_info->btree_inode->i_mapping, GFP_NOFS);
854 862
@@ -933,15 +941,16 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
933 return 0; 941 return 0;
934} 942}
935 943
936int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, 944static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
937 struct btrfs_fs_info *fs_info) 945 struct btrfs_fs_info *fs_info)
938{ 946{
939 struct btrfs_root *root; 947 struct btrfs_root *root;
940 struct btrfs_root *tree_root = fs_info->tree_root; 948 struct btrfs_root *tree_root = fs_info->tree_root;
949 struct extent_buffer *leaf;
941 950
942 root = kzalloc(sizeof(*root), GFP_NOFS); 951 root = kzalloc(sizeof(*root), GFP_NOFS);
943 if (!root) 952 if (!root)
944 return -ENOMEM; 953 return ERR_PTR(-ENOMEM);
945 954
946 __setup_root(tree_root->nodesize, tree_root->leafsize, 955 __setup_root(tree_root->nodesize, tree_root->leafsize,
947 tree_root->sectorsize, tree_root->stripesize, 956 tree_root->sectorsize, tree_root->stripesize,
@@ -950,12 +959,23 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
950 root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; 959 root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
951 root->root_key.type = BTRFS_ROOT_ITEM_KEY; 960 root->root_key.type = BTRFS_ROOT_ITEM_KEY;
952 root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; 961 root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
962 /*
963 * log trees do not get reference counted because they go away
964 * before a real commit is actually done. They do store pointers
965 * to file data extents, and those reference counts still get
966 * updated (along with back refs to the log tree).
967 */
953 root->ref_cows = 0; 968 root->ref_cows = 0;
954 969
955 root->node = btrfs_alloc_free_block(trans, root, root->leafsize, 970 leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
956 0, BTRFS_TREE_LOG_OBJECTID, 971 0, BTRFS_TREE_LOG_OBJECTID,
957 trans->transid, 0, 0, 0); 972 trans->transid, 0, 0, 0);
973 if (IS_ERR(leaf)) {
974 kfree(root);
975 return ERR_CAST(leaf);
976 }
958 977
978 root->node = leaf;
959 btrfs_set_header_nritems(root->node, 0); 979 btrfs_set_header_nritems(root->node, 0);
960 btrfs_set_header_level(root->node, 0); 980 btrfs_set_header_level(root->node, 0);
961 btrfs_set_header_bytenr(root->node, root->node->start); 981 btrfs_set_header_bytenr(root->node, root->node->start);
@@ -967,7 +987,48 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
967 BTRFS_FSID_SIZE); 987 BTRFS_FSID_SIZE);
968 btrfs_mark_buffer_dirty(root->node); 988 btrfs_mark_buffer_dirty(root->node);
969 btrfs_tree_unlock(root->node); 989 btrfs_tree_unlock(root->node);
970 fs_info->log_root_tree = root; 990 return root;
991}
992
993int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
994 struct btrfs_fs_info *fs_info)
995{
996 struct btrfs_root *log_root;
997
998 log_root = alloc_log_tree(trans, fs_info);
999 if (IS_ERR(log_root))
1000 return PTR_ERR(log_root);
1001 WARN_ON(fs_info->log_root_tree);
1002 fs_info->log_root_tree = log_root;
1003 return 0;
1004}
1005
1006int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
1007 struct btrfs_root *root)
1008{
1009 struct btrfs_root *log_root;
1010 struct btrfs_inode_item *inode_item;
1011
1012 log_root = alloc_log_tree(trans, root->fs_info);
1013 if (IS_ERR(log_root))
1014 return PTR_ERR(log_root);
1015
1016 log_root->last_trans = trans->transid;
1017 log_root->root_key.offset = root->root_key.objectid;
1018
1019 inode_item = &log_root->root_item.inode;
1020 inode_item->generation = cpu_to_le64(1);
1021 inode_item->size = cpu_to_le64(3);
1022 inode_item->nlink = cpu_to_le32(1);
1023 inode_item->nbytes = cpu_to_le64(root->leafsize);
1024 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
1025
1026 btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start);
1027 btrfs_set_root_generation(&log_root->root_item, trans->transid);
1028
1029 WARN_ON(root->log_root);
1030 root->log_root = log_root;
1031 root->log_transid = 0;
971 return 0; 1032 return 0;
972} 1033}
973 1034
@@ -1530,10 +1591,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1530 init_waitqueue_head(&fs_info->transaction_throttle); 1591 init_waitqueue_head(&fs_info->transaction_throttle);
1531 init_waitqueue_head(&fs_info->transaction_wait); 1592 init_waitqueue_head(&fs_info->transaction_wait);
1532 init_waitqueue_head(&fs_info->async_submit_wait); 1593 init_waitqueue_head(&fs_info->async_submit_wait);
1533 init_waitqueue_head(&fs_info->tree_log_wait);
1534 atomic_set(&fs_info->tree_log_commit, 0);
1535 atomic_set(&fs_info->tree_log_writers, 0);
1536 fs_info->tree_log_transid = 0;
1537 1594
1538 __setup_root(4096, 4096, 4096, 4096, tree_root, 1595 __setup_root(4096, 4096, 4096, 4096, tree_root,
1539 fs_info, BTRFS_ROOT_TREE_OBJECTID); 1596 fs_info, BTRFS_ROOT_TREE_OBJECTID);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index c0ff404c31b7..494a56eb2986 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -98,5 +98,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
98 struct btrfs_fs_info *fs_info); 98 struct btrfs_fs_info *fs_info);
99int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, 99int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
100 struct btrfs_fs_info *fs_info); 100 struct btrfs_fs_info *fs_info);
101int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
102 struct btrfs_root *root);
101int btree_lock_page_hook(struct page *page); 103int btree_lock_page_hook(struct page *page);
102#endif 104#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1d7f043152b0..3b26f0980946 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2698,13 +2698,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2698 /* if metadata always pin */ 2698 /* if metadata always pin */
2699 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 2699 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2700 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 2700 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2701 struct btrfs_block_group_cache *cache; 2701 mutex_lock(&root->fs_info->pinned_mutex);
2702 2702 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2703 /* btrfs_free_reserved_extent */ 2703 mutex_unlock(&root->fs_info->pinned_mutex);
2704 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
2705 BUG_ON(!cache);
2706 btrfs_add_free_space(cache, bytenr, num_bytes);
2707 put_block_group(cache);
2708 update_reserved_extents(root, bytenr, num_bytes, 0); 2704 update_reserved_extents(root, bytenr, num_bytes, 0);
2709 return 0; 2705 return 0;
2710 } 2706 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fbcbf43f5114..3e8023efaff7 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1214,10 +1214,10 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
1214 } 1214 }
1215 mutex_unlock(&root->fs_info->trans_mutex); 1215 mutex_unlock(&root->fs_info->trans_mutex);
1216 1216
1217 root->fs_info->tree_log_batch++; 1217 root->log_batch++;
1218 filemap_fdatawrite(inode->i_mapping); 1218 filemap_fdatawrite(inode->i_mapping);
1219 btrfs_wait_ordered_range(inode, 0, (u64)-1); 1219 btrfs_wait_ordered_range(inode, 0, (u64)-1);
1220 root->fs_info->tree_log_batch++; 1220 root->log_batch++;
1221 1221
1222 /* 1222 /*
1223 * ok we haven't committed the transaction yet, lets do a commit 1223 * ok we haven't committed the transaction yet, lets do a commit
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d81cda2e077c..4f26f3ed0c87 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -78,104 +78,6 @@ static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
78 */ 78 */
79 79
80/* 80/*
81 * btrfs_add_log_tree adds a new per-subvolume log tree into the
82 * tree of log tree roots. This must be called with a tree log transaction
83 * running (see start_log_trans).
84 */
85static int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
86 struct btrfs_root *root)
87{
88 struct btrfs_key key;
89 struct btrfs_root_item root_item;
90 struct btrfs_inode_item *inode_item;
91 struct extent_buffer *leaf;
92 struct btrfs_root *new_root = root;
93 int ret;
94 u64 objectid = root->root_key.objectid;
95
96 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
97 BTRFS_TREE_LOG_OBJECTID,
98 trans->transid, 0, 0, 0);
99 if (IS_ERR(leaf)) {
100 ret = PTR_ERR(leaf);
101 return ret;
102 }
103
104 btrfs_set_header_nritems(leaf, 0);
105 btrfs_set_header_level(leaf, 0);
106 btrfs_set_header_bytenr(leaf, leaf->start);
107 btrfs_set_header_generation(leaf, trans->transid);
108 btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
109
110 write_extent_buffer(leaf, root->fs_info->fsid,
111 (unsigned long)btrfs_header_fsid(leaf),
112 BTRFS_FSID_SIZE);
113 btrfs_mark_buffer_dirty(leaf);
114
115 inode_item = &root_item.inode;
116 memset(inode_item, 0, sizeof(*inode_item));
117 inode_item->generation = cpu_to_le64(1);
118 inode_item->size = cpu_to_le64(3);
119 inode_item->nlink = cpu_to_le32(1);
120 inode_item->nbytes = cpu_to_le64(root->leafsize);
121 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
122
123 btrfs_set_root_bytenr(&root_item, leaf->start);
124 btrfs_set_root_generation(&root_item, trans->transid);
125 btrfs_set_root_level(&root_item, 0);
126 btrfs_set_root_refs(&root_item, 0);
127 btrfs_set_root_used(&root_item, 0);
128
129 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
130 root_item.drop_level = 0;
131
132 btrfs_tree_unlock(leaf);
133 free_extent_buffer(leaf);
134 leaf = NULL;
135
136 btrfs_set_root_dirid(&root_item, 0);
137
138 key.objectid = BTRFS_TREE_LOG_OBJECTID;
139 key.offset = objectid;
140 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
141 ret = btrfs_insert_root(trans, root->fs_info->log_root_tree, &key,
142 &root_item);
143 if (ret)
144 goto fail;
145
146 new_root = btrfs_read_fs_root_no_radix(root->fs_info->log_root_tree,
147 &key);
148 BUG_ON(!new_root);
149
150 WARN_ON(root->log_root);
151 root->log_root = new_root;
152
153 /*
154 * log trees do not get reference counted because they go away
155 * before a real commit is actually done. They do store pointers
156 * to file data extents, and those reference counts still get
157 * updated (along with back refs to the log tree).
158 */
159 new_root->ref_cows = 0;
160 new_root->last_trans = trans->transid;
161
162 /*
163 * we need to make sure the root block for this new tree
164 * is marked as dirty in the dirty_log_pages tree. This
165 * is how it gets flushed down to disk at tree log commit time.
166 *
167 * the tree logging mutex keeps others from coming in and changing
168 * the new_root->node, so we can safely access it here
169 */
170 set_extent_dirty(&new_root->dirty_log_pages, new_root->node->start,
171 new_root->node->start + new_root->node->len - 1,
172 GFP_NOFS);
173
174fail:
175 return ret;
176}
177
178/*
179 * start a sub transaction and setup the log tree 81 * start a sub transaction and setup the log tree
180 * this increments the log tree writer count to make the people 82 * this increments the log tree writer count to make the people
181 * syncing the tree wait for us to finish 83 * syncing the tree wait for us to finish
@@ -184,6 +86,14 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
184 struct btrfs_root *root) 86 struct btrfs_root *root)
185{ 87{
186 int ret; 88 int ret;
89
90 mutex_lock(&root->log_mutex);
91 if (root->log_root) {
92 root->log_batch++;
93 atomic_inc(&root->log_writers);
94 mutex_unlock(&root->log_mutex);
95 return 0;
96 }
187 mutex_lock(&root->fs_info->tree_log_mutex); 97 mutex_lock(&root->fs_info->tree_log_mutex);
188 if (!root->fs_info->log_root_tree) { 98 if (!root->fs_info->log_root_tree) {
189 ret = btrfs_init_log_root_tree(trans, root->fs_info); 99 ret = btrfs_init_log_root_tree(trans, root->fs_info);
@@ -193,9 +103,10 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
193 ret = btrfs_add_log_tree(trans, root); 103 ret = btrfs_add_log_tree(trans, root);
194 BUG_ON(ret); 104 BUG_ON(ret);
195 } 105 }
196 atomic_inc(&root->fs_info->tree_log_writers);
197 root->fs_info->tree_log_batch++;
198 mutex_unlock(&root->fs_info->tree_log_mutex); 106 mutex_unlock(&root->fs_info->tree_log_mutex);
107 root->log_batch++;
108 atomic_inc(&root->log_writers);
109 mutex_unlock(&root->log_mutex);
199 return 0; 110 return 0;
200} 111}
201 112
@@ -212,13 +123,12 @@ static int join_running_log_trans(struct btrfs_root *root)
212 if (!root->log_root) 123 if (!root->log_root)
213 return -ENOENT; 124 return -ENOENT;
214 125
215 mutex_lock(&root->fs_info->tree_log_mutex); 126 mutex_lock(&root->log_mutex);
216 if (root->log_root) { 127 if (root->log_root) {
217 ret = 0; 128 ret = 0;
218 atomic_inc(&root->fs_info->tree_log_writers); 129 atomic_inc(&root->log_writers);
219 root->fs_info->tree_log_batch++;
220 } 130 }
221 mutex_unlock(&root->fs_info->tree_log_mutex); 131 mutex_unlock(&root->log_mutex);
222 return ret; 132 return ret;
223} 133}
224 134
@@ -228,10 +138,11 @@ static int join_running_log_trans(struct btrfs_root *root)
228 */ 138 */
229static int end_log_trans(struct btrfs_root *root) 139static int end_log_trans(struct btrfs_root *root)
230{ 140{
231 atomic_dec(&root->fs_info->tree_log_writers); 141 if (atomic_dec_and_test(&root->log_writers)) {
232 smp_mb(); 142 smp_mb();
233 if (waitqueue_active(&root->fs_info->tree_log_wait)) 143 if (waitqueue_active(&root->log_writer_wait))
234 wake_up(&root->fs_info->tree_log_wait); 144 wake_up(&root->log_writer_wait);
145 }
235 return 0; 146 return 0;
236} 147}
237 148
@@ -1902,26 +1813,65 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
1902 } 1813 }
1903 } 1814 }
1904 btrfs_free_path(path); 1815 btrfs_free_path(path);
1905 if (wc->free)
1906 free_extent_buffer(log->node);
1907 return ret; 1816 return ret;
1908} 1817}
1909 1818
1910static int wait_log_commit(struct btrfs_root *log) 1819/*
1820 * helper function to update the item for a given subvolumes log root
1821 * in the tree of log roots
1822 */
1823static int update_log_root(struct btrfs_trans_handle *trans,
1824 struct btrfs_root *log)
1825{
1826 int ret;
1827
1828 if (log->log_transid == 1) {
1829 /* insert root item on the first sync */
1830 ret = btrfs_insert_root(trans, log->fs_info->log_root_tree,
1831 &log->root_key, &log->root_item);
1832 } else {
1833 ret = btrfs_update_root(trans, log->fs_info->log_root_tree,
1834 &log->root_key, &log->root_item);
1835 }
1836 return ret;
1837}
1838
1839static int wait_log_commit(struct btrfs_root *root, unsigned long transid)
1911{ 1840{
1912 DEFINE_WAIT(wait); 1841 DEFINE_WAIT(wait);
1913 u64 transid = log->fs_info->tree_log_transid; 1842 int index = transid % 2;
1914 1843
1844 /*
1845 * we only allow two pending log transactions at a time,
1846 * so we know that if ours is more than 2 older than the
1847 * current transaction, we're done
1848 */
1915 do { 1849 do {
1916 prepare_to_wait(&log->fs_info->tree_log_wait, &wait, 1850 prepare_to_wait(&root->log_commit_wait[index],
1917 TASK_UNINTERRUPTIBLE); 1851 &wait, TASK_UNINTERRUPTIBLE);
1918 mutex_unlock(&log->fs_info->tree_log_mutex); 1852 mutex_unlock(&root->log_mutex);
1919 if (atomic_read(&log->fs_info->tree_log_commit)) 1853 if (root->log_transid < transid + 2 &&
1854 atomic_read(&root->log_commit[index]))
1920 schedule(); 1855 schedule();
1921 finish_wait(&log->fs_info->tree_log_wait, &wait); 1856 finish_wait(&root->log_commit_wait[index], &wait);
1922 mutex_lock(&log->fs_info->tree_log_mutex); 1857 mutex_lock(&root->log_mutex);
1923 } while (transid == log->fs_info->tree_log_transid && 1858 } while (root->log_transid < transid + 2 &&
1924 atomic_read(&log->fs_info->tree_log_commit)); 1859 atomic_read(&root->log_commit[index]));
1860 return 0;
1861}
1862
1863static int wait_for_writer(struct btrfs_root *root)
1864{
1865 DEFINE_WAIT(wait);
1866 while (atomic_read(&root->log_writers)) {
1867 prepare_to_wait(&root->log_writer_wait,
1868 &wait, TASK_UNINTERRUPTIBLE);
1869 mutex_unlock(&root->log_mutex);
1870 if (atomic_read(&root->log_writers))
1871 schedule();
1872 mutex_lock(&root->log_mutex);
1873 finish_wait(&root->log_writer_wait, &wait);
1874 }
1925 return 0; 1875 return 0;
1926} 1876}
1927 1877
@@ -1933,57 +1883,114 @@ static int wait_log_commit(struct btrfs_root *log)
1933int btrfs_sync_log(struct btrfs_trans_handle *trans, 1883int btrfs_sync_log(struct btrfs_trans_handle *trans,
1934 struct btrfs_root *root) 1884 struct btrfs_root *root)
1935{ 1885{
1886 int index1;
1887 int index2;
1936 int ret; 1888 int ret;
1937 unsigned long batch;
1938 struct btrfs_root *log = root->log_root; 1889 struct btrfs_root *log = root->log_root;
1890 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
1939 1891
1940 mutex_lock(&log->fs_info->tree_log_mutex); 1892 mutex_lock(&root->log_mutex);
1941 if (atomic_read(&log->fs_info->tree_log_commit)) { 1893 index1 = root->log_transid % 2;
1942 wait_log_commit(log); 1894 if (atomic_read(&root->log_commit[index1])) {
1943 goto out; 1895 wait_log_commit(root, root->log_transid);
1896 mutex_unlock(&root->log_mutex);
1897 return 0;
1944 } 1898 }
1945 atomic_set(&log->fs_info->tree_log_commit, 1); 1899 atomic_set(&root->log_commit[index1], 1);
1900
1901 /* wait for previous tree log sync to complete */
1902 if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
1903 wait_log_commit(root, root->log_transid - 1);
1946 1904
1947 while (1) { 1905 while (1) {
1948 batch = log->fs_info->tree_log_batch; 1906 unsigned long batch = root->log_batch;
1949 mutex_unlock(&log->fs_info->tree_log_mutex); 1907 mutex_unlock(&root->log_mutex);
1950 schedule_timeout_uninterruptible(1); 1908 schedule_timeout_uninterruptible(1);
1951 mutex_lock(&log->fs_info->tree_log_mutex); 1909 mutex_lock(&root->log_mutex);
1952 1910 wait_for_writer(root);
1953 while (atomic_read(&log->fs_info->tree_log_writers)) { 1911 if (batch == root->log_batch)
1954 DEFINE_WAIT(wait);
1955 prepare_to_wait(&log->fs_info->tree_log_wait, &wait,
1956 TASK_UNINTERRUPTIBLE);
1957 mutex_unlock(&log->fs_info->tree_log_mutex);
1958 if (atomic_read(&log->fs_info->tree_log_writers))
1959 schedule();
1960 mutex_lock(&log->fs_info->tree_log_mutex);
1961 finish_wait(&log->fs_info->tree_log_wait, &wait);
1962 }
1963 if (batch == log->fs_info->tree_log_batch)
1964 break; 1912 break;
1965 } 1913 }
1966 1914
1967 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); 1915 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
1968 BUG_ON(ret); 1916 BUG_ON(ret);
1969 ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree, 1917
1970 &root->fs_info->log_root_tree->dirty_log_pages); 1918 btrfs_set_root_bytenr(&log->root_item, log->node->start);
1919 btrfs_set_root_generation(&log->root_item, trans->transid);
1920 btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
1921
1922 root->log_batch = 0;
1923 root->log_transid++;
1924 log->log_transid = root->log_transid;
1925 smp_mb();
1926 /*
1927 * log tree has been flushed to disk, new modifications of
1928 * the log will be written to new positions. so it's safe to
1929 * allow log writers to go in.
1930 */
1931 mutex_unlock(&root->log_mutex);
1932
1933 mutex_lock(&log_root_tree->log_mutex);
1934 log_root_tree->log_batch++;
1935 atomic_inc(&log_root_tree->log_writers);
1936 mutex_unlock(&log_root_tree->log_mutex);
1937
1938 ret = update_log_root(trans, log);
1939 BUG_ON(ret);
1940
1941 mutex_lock(&log_root_tree->log_mutex);
1942 if (atomic_dec_and_test(&log_root_tree->log_writers)) {
1943 smp_mb();
1944 if (waitqueue_active(&log_root_tree->log_writer_wait))
1945 wake_up(&log_root_tree->log_writer_wait);
1946 }
1947
1948 index2 = log_root_tree->log_transid % 2;
1949 if (atomic_read(&log_root_tree->log_commit[index2])) {
1950 wait_log_commit(log_root_tree, log_root_tree->log_transid);
1951 mutex_unlock(&log_root_tree->log_mutex);
1952 goto out;
1953 }
1954 atomic_set(&log_root_tree->log_commit[index2], 1);
1955
1956 if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2]))
1957 wait_log_commit(log_root_tree, log_root_tree->log_transid - 1);
1958
1959 wait_for_writer(log_root_tree);
1960
1961 ret = btrfs_write_and_wait_marked_extents(log_root_tree,
1962 &log_root_tree->dirty_log_pages);
1971 BUG_ON(ret); 1963 BUG_ON(ret);
1972 1964
1973 btrfs_set_super_log_root(&root->fs_info->super_for_commit, 1965 btrfs_set_super_log_root(&root->fs_info->super_for_commit,
1974 log->fs_info->log_root_tree->node->start); 1966 log_root_tree->node->start);
1975 btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, 1967 btrfs_set_super_log_root_level(&root->fs_info->super_for_commit,
1976 btrfs_header_level(log->fs_info->log_root_tree->node)); 1968 btrfs_header_level(log_root_tree->node));
1969
1970 log_root_tree->log_batch = 0;
1971 log_root_tree->log_transid++;
1972 smp_mb();
1973
1974 mutex_unlock(&log_root_tree->log_mutex);
1975
1976 /*
1977 * nobody else is going to jump in and write the the ctree
1978 * super here because the log_commit atomic below is protecting
1979 * us. We must be called with a transaction handle pinning
1980 * the running transaction open, so a full commit can't hop
1981 * in and cause problems either.
1982 */
1983 write_ctree_super(trans, root->fs_info->tree_root, 2);
1977 1984
1978 write_ctree_super(trans, log->fs_info->tree_root, 2); 1985 atomic_set(&log_root_tree->log_commit[index2], 0);
1979 log->fs_info->tree_log_transid++;
1980 log->fs_info->tree_log_batch = 0;
1981 atomic_set(&log->fs_info->tree_log_commit, 0);
1982 smp_mb(); 1986 smp_mb();
1983 if (waitqueue_active(&log->fs_info->tree_log_wait)) 1987 if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
1984 wake_up(&log->fs_info->tree_log_wait); 1988 wake_up(&log_root_tree->log_commit_wait[index2]);
1985out: 1989out:
1986 mutex_unlock(&log->fs_info->tree_log_mutex); 1990 atomic_set(&root->log_commit[index1], 0);
1991 smp_mb();
1992 if (waitqueue_active(&root->log_commit_wait[index1]))
1993 wake_up(&root->log_commit_wait[index1]);
1987 return 0; 1994 return 0;
1988} 1995}
1989 1996
@@ -2019,38 +2026,18 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
2019 start, end, GFP_NOFS); 2026 start, end, GFP_NOFS);
2020 } 2027 }
2021 2028
2022 log = root->log_root; 2029 if (log->log_transid > 0) {
2023 ret = btrfs_del_root(trans, root->fs_info->log_root_tree, 2030 ret = btrfs_del_root(trans, root->fs_info->log_root_tree,
2024 &log->root_key); 2031 &log->root_key);
2025 BUG_ON(ret); 2032 BUG_ON(ret);
2033 }
2026 root->log_root = NULL; 2034 root->log_root = NULL;
2027 kfree(root->log_root); 2035 free_extent_buffer(log->node);
2036 kfree(log);
2028 return 0; 2037 return 0;
2029} 2038}
2030 2039
2031/* 2040/*
2032 * helper function to update the item for a given subvolumes log root
2033 * in the tree of log roots
2034 */
2035static int update_log_root(struct btrfs_trans_handle *trans,
2036 struct btrfs_root *log)
2037{
2038 u64 bytenr = btrfs_root_bytenr(&log->root_item);
2039 int ret;
2040
2041 if (log->node->start == bytenr)
2042 return 0;
2043
2044 btrfs_set_root_bytenr(&log->root_item, log->node->start);
2045 btrfs_set_root_generation(&log->root_item, trans->transid);
2046 btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
2047 ret = btrfs_update_root(trans, log->fs_info->log_root_tree,
2048 &log->root_key, &log->root_item);
2049 BUG_ON(ret);
2050 return ret;
2051}
2052
2053/*
2054 * If both a file and directory are logged, and unlinks or renames are 2041 * If both a file and directory are logged, and unlinks or renames are
2055 * mixed in, we have a few interesting corners: 2042 * mixed in, we have a few interesting corners:
2056 * 2043 *
@@ -2711,11 +2698,6 @@ next_slot:
2711 2698
2712 btrfs_free_path(path); 2699 btrfs_free_path(path);
2713 btrfs_free_path(dst_path); 2700 btrfs_free_path(dst_path);
2714
2715 mutex_lock(&root->fs_info->tree_log_mutex);
2716 ret = update_log_root(trans, log);
2717 BUG_ON(ret);
2718 mutex_unlock(&root->fs_info->tree_log_mutex);
2719out: 2701out:
2720 return 0; 2702 return 0;
2721} 2703}