aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/transaction.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2011-04-11 17:25:13 -0400
committerJosef Bacik <josef@redhat.com>2011-05-23 13:00:57 -0400
commita4abeea41adfa3c143c289045f4625dfaeba2212 (patch)
tree792e2a398d8ba77447ba3f9f2c4266a1ce2f611c /fs/btrfs/transaction.c
parent2a1eb4614d984d5cd4c928784e9afcf5c07f93be (diff)
Btrfs: kill trans_mutex
We use trans_mutex for lots of things, here's a basic list 1) To serialize trans_handles joining the currently running transaction 2) To make sure that no new trans handles are started while we are committing 3) To protect the dead_roots list and the transaction lists Really the serializing trans_handles joining is not too hard, and can really get bogged down in acquiring a reference to the transaction. So replace the trans_mutex with a trans_lock spinlock and use it to do the following 1) Protect fs_info->running_transaction. All trans handles have to do is check this, and then take a reference of the transaction and keep on going. 2) Protect the fs_info->trans_list. This doesn't get used too much, basically it just holds the current transactions, which will usually just be the currently committing transaction and the currently running transaction at most. 3) Protect the dead roots list. This is only ever processed by splicing the list so this is relatively simple. 4) Protect the fs_info->reloc_ctl stuff. This is very lightweight and was using the trans_mutex before, so this is a pretty straightforward change. 5) Protect fs_info->no_trans_join. Because we don't hold the trans_lock over the entirety of the commit we need to have a way to block new people from creating a new transaction while we're doing our work. So we set no_trans_join and in join_transaction we test to see if that is set, and if it is we do a wait_on_commit. 6) Make the transaction use count atomic so we don't need to take locks to modify it when we're dropping references. 7) Add a commit_lock to the transaction to make sure multiple people trying to commit the same transaction don't race and commit at the same time. 8) Make open_ioctl_trans an atomic so we don't have to take any locks for ioctl trans. I have tested this with xfstests, but obviously it is a pretty hairy change so lots of testing is greatly appreciated. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r--fs/btrfs/transaction.c271
1 files changed, 142 insertions, 129 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 46f40564c16..43816f8b23e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -34,6 +34,7 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
34{ 34{
35 WARN_ON(atomic_read(&transaction->use_count) == 0); 35 WARN_ON(atomic_read(&transaction->use_count) == 0);
36 if (atomic_dec_and_test(&transaction->use_count)) { 36 if (atomic_dec_and_test(&transaction->use_count)) {
37 BUG_ON(!list_empty(&transaction->list));
37 memset(transaction, 0, sizeof(*transaction)); 38 memset(transaction, 0, sizeof(*transaction));
38 kmem_cache_free(btrfs_transaction_cachep, transaction); 39 kmem_cache_free(btrfs_transaction_cachep, transaction);
39 } 40 }
@@ -48,47 +49,73 @@ static noinline void switch_commit_root(struct btrfs_root *root)
48/* 49/*
49 * either allocate a new transaction or hop into the existing one 50 * either allocate a new transaction or hop into the existing one
50 */ 51 */
51static noinline int join_transaction(struct btrfs_root *root) 52static noinline int join_transaction(struct btrfs_root *root, int nofail)
52{ 53{
53 struct btrfs_transaction *cur_trans; 54 struct btrfs_transaction *cur_trans;
55
56 spin_lock(&root->fs_info->trans_lock);
57 if (root->fs_info->trans_no_join) {
58 if (!nofail) {
59 spin_unlock(&root->fs_info->trans_lock);
60 return -EBUSY;
61 }
62 }
63
54 cur_trans = root->fs_info->running_transaction; 64 cur_trans = root->fs_info->running_transaction;
55 if (!cur_trans) { 65 if (cur_trans) {
56 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, 66 atomic_inc(&cur_trans->use_count);
57 GFP_NOFS); 67 atomic_inc(&cur_trans->num_writers);
58 if (!cur_trans) 68 cur_trans->num_joined++;
59 return -ENOMEM; 69 spin_unlock(&root->fs_info->trans_lock);
60 root->fs_info->generation++; 70 return 0;
61 atomic_set(&cur_trans->num_writers, 1); 71 }
62 cur_trans->num_joined = 0; 72 spin_unlock(&root->fs_info->trans_lock);
63 cur_trans->transid = root->fs_info->generation; 73
64 init_waitqueue_head(&cur_trans->writer_wait); 74 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
65 init_waitqueue_head(&cur_trans->commit_wait); 75 if (!cur_trans)
66 cur_trans->in_commit = 0; 76 return -ENOMEM;
67 cur_trans->blocked = 0; 77 spin_lock(&root->fs_info->trans_lock);
68 atomic_set(&cur_trans->use_count, 1); 78 if (root->fs_info->running_transaction) {
69 cur_trans->commit_done = 0; 79 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
70 cur_trans->start_time = get_seconds(); 80 cur_trans = root->fs_info->running_transaction;
71 81 atomic_inc(&cur_trans->use_count);
72 cur_trans->delayed_refs.root = RB_ROOT;
73 cur_trans->delayed_refs.num_entries = 0;
74 cur_trans->delayed_refs.num_heads_ready = 0;
75 cur_trans->delayed_refs.num_heads = 0;
76 cur_trans->delayed_refs.flushing = 0;
77 cur_trans->delayed_refs.run_delayed_start = 0;
78 spin_lock_init(&cur_trans->delayed_refs.lock);
79
80 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
81 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
82 extent_io_tree_init(&cur_trans->dirty_pages,
83 root->fs_info->btree_inode->i_mapping,
84 GFP_NOFS);
85 spin_lock(&root->fs_info->new_trans_lock);
86 root->fs_info->running_transaction = cur_trans;
87 spin_unlock(&root->fs_info->new_trans_lock);
88 } else {
89 atomic_inc(&cur_trans->num_writers); 82 atomic_inc(&cur_trans->num_writers);
90 cur_trans->num_joined++; 83 cur_trans->num_joined++;
84 spin_unlock(&root->fs_info->trans_lock);
85 return 0;
91 } 86 }
87 atomic_set(&cur_trans->num_writers, 1);
88 cur_trans->num_joined = 0;
89 init_waitqueue_head(&cur_trans->writer_wait);
90 init_waitqueue_head(&cur_trans->commit_wait);
91 cur_trans->in_commit = 0;
92 cur_trans->blocked = 0;
93 /*
94 * One for this trans handle, one so it will live on until we
95 * commit the transaction.
96 */
97 atomic_set(&cur_trans->use_count, 2);
98 cur_trans->commit_done = 0;
99 cur_trans->start_time = get_seconds();
100
101 cur_trans->delayed_refs.root = RB_ROOT;
102 cur_trans->delayed_refs.num_entries = 0;
103 cur_trans->delayed_refs.num_heads_ready = 0;
104 cur_trans->delayed_refs.num_heads = 0;
105 cur_trans->delayed_refs.flushing = 0;
106 cur_trans->delayed_refs.run_delayed_start = 0;
107 spin_lock_init(&cur_trans->commit_lock);
108 spin_lock_init(&cur_trans->delayed_refs.lock);
109
110 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
111 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
112 extent_io_tree_init(&cur_trans->dirty_pages,
113 root->fs_info->btree_inode->i_mapping,
114 GFP_NOFS);
115 root->fs_info->generation++;
116 cur_trans->transid = root->fs_info->generation;
117 root->fs_info->running_transaction = cur_trans;
118 spin_unlock(&root->fs_info->trans_lock);
92 119
93 return 0; 120 return 0;
94} 121}
@@ -99,39 +126,28 @@ static noinline int join_transaction(struct btrfs_root *root)
99 * to make sure the old root from before we joined the transaction is deleted 126 * to make sure the old root from before we joined the transaction is deleted
100 * when the transaction commits 127 * when the transaction commits
101 */ 128 */
102static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, 129int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root) 130 struct btrfs_root *root)
104{ 131{
105 if (root->ref_cows && root->last_trans < trans->transid) { 132 if (root->ref_cows && root->last_trans < trans->transid) {
106 WARN_ON(root == root->fs_info->extent_root); 133 WARN_ON(root == root->fs_info->extent_root);
107 WARN_ON(root->commit_root != root->node); 134 WARN_ON(root->commit_root != root->node);
108 135
136 spin_lock(&root->fs_info->fs_roots_radix_lock);
137 if (root->last_trans == trans->transid) {
138 spin_unlock(&root->fs_info->fs_roots_radix_lock);
139 return 0;
140 }
141 root->last_trans = trans->transid;
109 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 142 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
110 (unsigned long)root->root_key.objectid, 143 (unsigned long)root->root_key.objectid,
111 BTRFS_ROOT_TRANS_TAG); 144 BTRFS_ROOT_TRANS_TAG);
112 root->last_trans = trans->transid; 145 spin_unlock(&root->fs_info->fs_roots_radix_lock);
113 btrfs_init_reloc_root(trans, root); 146 btrfs_init_reloc_root(trans, root);
114 } 147 }
115 return 0; 148 return 0;
116} 149}
117 150
118int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
119 struct btrfs_root *root)
120{
121 if (!root->ref_cows)
122 return 0;
123
124 mutex_lock(&root->fs_info->trans_mutex);
125 if (root->last_trans == trans->transid) {
126 mutex_unlock(&root->fs_info->trans_mutex);
127 return 0;
128 }
129
130 record_root_in_trans(trans, root);
131 mutex_unlock(&root->fs_info->trans_mutex);
132 return 0;
133}
134
135/* wait for commit against the current transaction to become unblocked 151/* wait for commit against the current transaction to become unblocked
136 * when this is done, it is safe to start a new transaction, but the current 152 * when this is done, it is safe to start a new transaction, but the current
137 * transaction might not be fully on disk. 153 * transaction might not be fully on disk.
@@ -140,21 +156,23 @@ static void wait_current_trans(struct btrfs_root *root)
140{ 156{
141 struct btrfs_transaction *cur_trans; 157 struct btrfs_transaction *cur_trans;
142 158
159 spin_lock(&root->fs_info->trans_lock);
143 cur_trans = root->fs_info->running_transaction; 160 cur_trans = root->fs_info->running_transaction;
144 if (cur_trans && cur_trans->blocked) { 161 if (cur_trans && cur_trans->blocked) {
145 DEFINE_WAIT(wait); 162 DEFINE_WAIT(wait);
146 atomic_inc(&cur_trans->use_count); 163 atomic_inc(&cur_trans->use_count);
164 spin_unlock(&root->fs_info->trans_lock);
147 while (1) { 165 while (1) {
148 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 166 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
149 TASK_UNINTERRUPTIBLE); 167 TASK_UNINTERRUPTIBLE);
150 if (!cur_trans->blocked) 168 if (!cur_trans->blocked)
151 break; 169 break;
152 mutex_unlock(&root->fs_info->trans_mutex);
153 schedule(); 170 schedule();
154 mutex_lock(&root->fs_info->trans_mutex);
155 } 171 }
156 finish_wait(&root->fs_info->transaction_wait, &wait); 172 finish_wait(&root->fs_info->transaction_wait, &wait);
157 put_transaction(cur_trans); 173 put_transaction(cur_trans);
174 } else {
175 spin_unlock(&root->fs_info->trans_lock);
158 } 176 }
159} 177}
160 178
@@ -167,10 +185,16 @@ enum btrfs_trans_type {
167 185
168static int may_wait_transaction(struct btrfs_root *root, int type) 186static int may_wait_transaction(struct btrfs_root *root, int type)
169{ 187{
170 if (!root->fs_info->log_root_recovering && 188 if (root->fs_info->log_root_recovering)
171 ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || 189 return 0;
172 type == TRANS_USERSPACE)) 190
191 if (type == TRANS_USERSPACE)
192 return 1;
193
194 if (type == TRANS_START &&
195 !atomic_read(&root->fs_info->open_ioctl_trans))
173 return 1; 196 return 1;
197
174 return 0; 198 return 0;
175} 199}
176 200
@@ -198,23 +222,21 @@ again:
198 if (!h) 222 if (!h)
199 return ERR_PTR(-ENOMEM); 223 return ERR_PTR(-ENOMEM);
200 224
201 if (type != TRANS_JOIN_NOLOCK)
202 mutex_lock(&root->fs_info->trans_mutex);
203 if (may_wait_transaction(root, type)) 225 if (may_wait_transaction(root, type))
204 wait_current_trans(root); 226 wait_current_trans(root);
205 227
206 ret = join_transaction(root); 228 do {
229 ret = join_transaction(root, type == TRANS_JOIN_NOLOCK);
230 if (ret == -EBUSY)
231 wait_current_trans(root);
232 } while (ret == -EBUSY);
233
207 if (ret < 0) { 234 if (ret < 0) {
208 kmem_cache_free(btrfs_trans_handle_cachep, h); 235 kmem_cache_free(btrfs_trans_handle_cachep, h);
209 if (type != TRANS_JOIN_NOLOCK)
210 mutex_unlock(&root->fs_info->trans_mutex);
211 return ERR_PTR(ret); 236 return ERR_PTR(ret);
212 } 237 }
213 238
214 cur_trans = root->fs_info->running_transaction; 239 cur_trans = root->fs_info->running_transaction;
215 atomic_inc(&cur_trans->use_count);
216 if (type != TRANS_JOIN_NOLOCK)
217 mutex_unlock(&root->fs_info->trans_mutex);
218 240
219 h->transid = cur_trans->transid; 241 h->transid = cur_trans->transid;
220 h->transaction = cur_trans; 242 h->transaction = cur_trans;
@@ -253,11 +275,7 @@ again:
253 } 275 }
254 276
255got_it: 277got_it:
256 if (type != TRANS_JOIN_NOLOCK) 278 btrfs_record_root_in_trans(h, root);
257 mutex_lock(&root->fs_info->trans_mutex);
258 record_root_in_trans(h, root);
259 if (type != TRANS_JOIN_NOLOCK)
260 mutex_unlock(&root->fs_info->trans_mutex);
261 279
262 if (!current->journal_info && type != TRANS_USERSPACE) 280 if (!current->journal_info && type != TRANS_USERSPACE)
263 current->journal_info = h; 281 current->journal_info = h;
@@ -289,17 +307,13 @@ static noinline int wait_for_commit(struct btrfs_root *root,
289 struct btrfs_transaction *commit) 307 struct btrfs_transaction *commit)
290{ 308{
291 DEFINE_WAIT(wait); 309 DEFINE_WAIT(wait);
292 mutex_lock(&root->fs_info->trans_mutex);
293 while (!commit->commit_done) { 310 while (!commit->commit_done) {
294 prepare_to_wait(&commit->commit_wait, &wait, 311 prepare_to_wait(&commit->commit_wait, &wait,
295 TASK_UNINTERRUPTIBLE); 312 TASK_UNINTERRUPTIBLE);
296 if (commit->commit_done) 313 if (commit->commit_done)
297 break; 314 break;
298 mutex_unlock(&root->fs_info->trans_mutex);
299 schedule(); 315 schedule();
300 mutex_lock(&root->fs_info->trans_mutex);
301 } 316 }
302 mutex_unlock(&root->fs_info->trans_mutex);
303 finish_wait(&commit->commit_wait, &wait); 317 finish_wait(&commit->commit_wait, &wait);
304 return 0; 318 return 0;
305} 319}
@@ -309,50 +323,49 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
309 struct btrfs_transaction *cur_trans = NULL, *t; 323 struct btrfs_transaction *cur_trans = NULL, *t;
310 int ret; 324 int ret;
311 325
312 mutex_lock(&root->fs_info->trans_mutex);
313
314 ret = 0; 326 ret = 0;
315 if (transid) { 327 if (transid) {
316 if (transid <= root->fs_info->last_trans_committed) 328 if (transid <= root->fs_info->last_trans_committed)
317 goto out_unlock; 329 goto out;
318 330
319 /* find specified transaction */ 331 /* find specified transaction */
332 spin_lock(&root->fs_info->trans_lock);
320 list_for_each_entry(t, &root->fs_info->trans_list, list) { 333 list_for_each_entry(t, &root->fs_info->trans_list, list) {
321 if (t->transid == transid) { 334 if (t->transid == transid) {
322 cur_trans = t; 335 cur_trans = t;
336 atomic_inc(&cur_trans->use_count);
323 break; 337 break;
324 } 338 }
325 if (t->transid > transid) 339 if (t->transid > transid)
326 break; 340 break;
327 } 341 }
342 spin_unlock(&root->fs_info->trans_lock);
328 ret = -EINVAL; 343 ret = -EINVAL;
329 if (!cur_trans) 344 if (!cur_trans)
330 goto out_unlock; /* bad transid */ 345 goto out; /* bad transid */
331 } else { 346 } else {
332 /* find newest transaction that is committing | committed */ 347 /* find newest transaction that is committing | committed */
348 spin_lock(&root->fs_info->trans_lock);
333 list_for_each_entry_reverse(t, &root->fs_info->trans_list, 349 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
334 list) { 350 list) {
335 if (t->in_commit) { 351 if (t->in_commit) {
336 if (t->commit_done) 352 if (t->commit_done)
337 goto out_unlock; 353 goto out;
338 cur_trans = t; 354 cur_trans = t;
355 atomic_inc(&cur_trans->use_count);
339 break; 356 break;
340 } 357 }
341 } 358 }
359 spin_unlock(&root->fs_info->trans_lock);
342 if (!cur_trans) 360 if (!cur_trans)
343 goto out_unlock; /* nothing committing|committed */ 361 goto out; /* nothing committing|committed */
344 } 362 }
345 363
346 atomic_inc(&cur_trans->use_count);
347 mutex_unlock(&root->fs_info->trans_mutex);
348
349 wait_for_commit(root, cur_trans); 364 wait_for_commit(root, cur_trans);
350 365
351 mutex_lock(&root->fs_info->trans_mutex);
352 put_transaction(cur_trans); 366 put_transaction(cur_trans);
353 ret = 0; 367 ret = 0;
354out_unlock: 368out:
355 mutex_unlock(&root->fs_info->trans_mutex);
356 return ret; 369 return ret;
357} 370}
358 371
@@ -401,10 +414,8 @@ harder:
401 414
402void btrfs_throttle(struct btrfs_root *root) 415void btrfs_throttle(struct btrfs_root *root)
403{ 416{
404 mutex_lock(&root->fs_info->trans_mutex); 417 if (!atomic_read(&root->fs_info->open_ioctl_trans))
405 if (!root->fs_info->open_ioctl_trans)
406 wait_current_trans(root); 418 wait_current_trans(root);
407 mutex_unlock(&root->fs_info->trans_mutex);
408} 419}
409 420
410static int should_end_transaction(struct btrfs_trans_handle *trans, 421static int should_end_transaction(struct btrfs_trans_handle *trans,
@@ -422,6 +433,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
422 struct btrfs_transaction *cur_trans = trans->transaction; 433 struct btrfs_transaction *cur_trans = trans->transaction;
423 int updates; 434 int updates;
424 435
436 smp_mb();
425 if (cur_trans->blocked || cur_trans->delayed_refs.flushing) 437 if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
426 return 1; 438 return 1;
427 439
@@ -467,9 +479,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
467 479
468 btrfs_trans_release_metadata(trans, root); 480 btrfs_trans_release_metadata(trans, root);
469 481
470 if (lock && !root->fs_info->open_ioctl_trans && 482 if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
471 should_end_transaction(trans, root)) 483 should_end_transaction(trans, root)) {
472 trans->transaction->blocked = 1; 484 trans->transaction->blocked = 1;
485 smp_wmb();
486 }
473 487
474 if (lock && cur_trans->blocked && !cur_trans->in_commit) { 488 if (lock && cur_trans->blocked && !cur_trans->in_commit) {
475 if (throttle) 489 if (throttle)
@@ -739,9 +753,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
739 */ 753 */
740int btrfs_add_dead_root(struct btrfs_root *root) 754int btrfs_add_dead_root(struct btrfs_root *root)
741{ 755{
742 mutex_lock(&root->fs_info->trans_mutex); 756 spin_lock(&root->fs_info->trans_lock);
743 list_add(&root->root_list, &root->fs_info->dead_roots); 757 list_add(&root->root_list, &root->fs_info->dead_roots);
744 mutex_unlock(&root->fs_info->trans_mutex); 758 spin_unlock(&root->fs_info->trans_lock);
745 return 0; 759 return 0;
746} 760}
747 761
@@ -757,6 +771,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
757 int ret; 771 int ret;
758 int err = 0; 772 int err = 0;
759 773
774 spin_lock(&fs_info->fs_roots_radix_lock);
760 while (1) { 775 while (1) {
761 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, 776 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
762 (void **)gang, 0, 777 (void **)gang, 0,
@@ -769,6 +784,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
769 radix_tree_tag_clear(&fs_info->fs_roots_radix, 784 radix_tree_tag_clear(&fs_info->fs_roots_radix,
770 (unsigned long)root->root_key.objectid, 785 (unsigned long)root->root_key.objectid,
771 BTRFS_ROOT_TRANS_TAG); 786 BTRFS_ROOT_TRANS_TAG);
787 spin_unlock(&fs_info->fs_roots_radix_lock);
772 788
773 btrfs_free_log(trans, root); 789 btrfs_free_log(trans, root);
774 btrfs_update_reloc_root(trans, root); 790 btrfs_update_reloc_root(trans, root);
@@ -783,10 +799,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
783 err = btrfs_update_root(trans, fs_info->tree_root, 799 err = btrfs_update_root(trans, fs_info->tree_root,
784 &root->root_key, 800 &root->root_key,
785 &root->root_item); 801 &root->root_item);
802 spin_lock(&fs_info->fs_roots_radix_lock);
786 if (err) 803 if (err)
787 break; 804 break;
788 } 805 }
789 } 806 }
807 spin_unlock(&fs_info->fs_roots_radix_lock);
790 return err; 808 return err;
791} 809}
792 810
@@ -972,7 +990,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
972 parent = dget_parent(dentry); 990 parent = dget_parent(dentry);
973 parent_inode = parent->d_inode; 991 parent_inode = parent->d_inode;
974 parent_root = BTRFS_I(parent_inode)->root; 992 parent_root = BTRFS_I(parent_inode)->root;
975 record_root_in_trans(trans, parent_root); 993 btrfs_record_root_in_trans(trans, parent_root);
976 994
977 /* 995 /*
978 * insert the directory item 996 * insert the directory item
@@ -990,7 +1008,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
990 ret = btrfs_update_inode(trans, parent_root, parent_inode); 1008 ret = btrfs_update_inode(trans, parent_root, parent_inode);
991 BUG_ON(ret); 1009 BUG_ON(ret);
992 1010
993 record_root_in_trans(trans, root); 1011 btrfs_record_root_in_trans(trans, root);
994 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 1012 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
995 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 1013 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
996 btrfs_check_and_init_root_item(new_root_item); 1014 btrfs_check_and_init_root_item(new_root_item);
@@ -1080,20 +1098,20 @@ static void update_super_roots(struct btrfs_root *root)
1080int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 1098int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
1081{ 1099{
1082 int ret = 0; 1100 int ret = 0;
1083 spin_lock(&info->new_trans_lock); 1101 spin_lock(&info->trans_lock);
1084 if (info->running_transaction) 1102 if (info->running_transaction)
1085 ret = info->running_transaction->in_commit; 1103 ret = info->running_transaction->in_commit;
1086 spin_unlock(&info->new_trans_lock); 1104 spin_unlock(&info->trans_lock);
1087 return ret; 1105 return ret;
1088} 1106}
1089 1107
1090int btrfs_transaction_blocked(struct btrfs_fs_info *info) 1108int btrfs_transaction_blocked(struct btrfs_fs_info *info)
1091{ 1109{
1092 int ret = 0; 1110 int ret = 0;
1093 spin_lock(&info->new_trans_lock); 1111 spin_lock(&info->trans_lock);
1094 if (info->running_transaction) 1112 if (info->running_transaction)
1095 ret = info->running_transaction->blocked; 1113 ret = info->running_transaction->blocked;
1096 spin_unlock(&info->new_trans_lock); 1114 spin_unlock(&info->trans_lock);
1097 return ret; 1115 return ret;
1098} 1116}
1099 1117
@@ -1117,9 +1135,7 @@ static void wait_current_trans_commit_start(struct btrfs_root *root,
1117 &wait); 1135 &wait);
1118 break; 1136 break;
1119 } 1137 }
1120 mutex_unlock(&root->fs_info->trans_mutex);
1121 schedule(); 1138 schedule();
1122 mutex_lock(&root->fs_info->trans_mutex);
1123 finish_wait(&root->fs_info->transaction_blocked_wait, &wait); 1139 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1124 } 1140 }
1125} 1141}
@@ -1145,9 +1161,7 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1145 &wait); 1161 &wait);
1146 break; 1162 break;
1147 } 1163 }
1148 mutex_unlock(&root->fs_info->trans_mutex);
1149 schedule(); 1164 schedule();
1150 mutex_lock(&root->fs_info->trans_mutex);
1151 finish_wait(&root->fs_info->transaction_wait, 1165 finish_wait(&root->fs_info->transaction_wait,
1152 &wait); 1166 &wait);
1153 } 1167 }
@@ -1193,22 +1207,18 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1193 } 1207 }
1194 1208
1195 /* take transaction reference */ 1209 /* take transaction reference */
1196 mutex_lock(&root->fs_info->trans_mutex);
1197 cur_trans = trans->transaction; 1210 cur_trans = trans->transaction;
1198 atomic_inc(&cur_trans->use_count); 1211 atomic_inc(&cur_trans->use_count);
1199 mutex_unlock(&root->fs_info->trans_mutex);
1200 1212
1201 btrfs_end_transaction(trans, root); 1213 btrfs_end_transaction(trans, root);
1202 schedule_delayed_work(&ac->work, 0); 1214 schedule_delayed_work(&ac->work, 0);
1203 1215
1204 /* wait for transaction to start and unblock */ 1216 /* wait for transaction to start and unblock */
1205 mutex_lock(&root->fs_info->trans_mutex);
1206 if (wait_for_unblock) 1217 if (wait_for_unblock)
1207 wait_current_trans_commit_start_and_unblock(root, cur_trans); 1218 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1208 else 1219 else
1209 wait_current_trans_commit_start(root, cur_trans); 1220 wait_current_trans_commit_start(root, cur_trans);
1210 put_transaction(cur_trans); 1221 put_transaction(cur_trans);
1211 mutex_unlock(&root->fs_info->trans_mutex);
1212 1222
1213 return 0; 1223 return 0;
1214} 1224}
@@ -1252,38 +1262,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1252 ret = btrfs_run_delayed_refs(trans, root, 0); 1262 ret = btrfs_run_delayed_refs(trans, root, 0);
1253 BUG_ON(ret); 1263 BUG_ON(ret);
1254 1264
1255 mutex_lock(&root->fs_info->trans_mutex); 1265 spin_lock(&cur_trans->commit_lock);
1256 if (cur_trans->in_commit) { 1266 if (cur_trans->in_commit) {
1267 spin_unlock(&cur_trans->commit_lock);
1257 atomic_inc(&cur_trans->use_count); 1268 atomic_inc(&cur_trans->use_count);
1258 mutex_unlock(&root->fs_info->trans_mutex);
1259 btrfs_end_transaction(trans, root); 1269 btrfs_end_transaction(trans, root);
1260 1270
1261 ret = wait_for_commit(root, cur_trans); 1271 ret = wait_for_commit(root, cur_trans);
1262 BUG_ON(ret); 1272 BUG_ON(ret);
1263 1273
1264 mutex_lock(&root->fs_info->trans_mutex);
1265 put_transaction(cur_trans); 1274 put_transaction(cur_trans);
1266 mutex_unlock(&root->fs_info->trans_mutex);
1267 1275
1268 return 0; 1276 return 0;
1269 } 1277 }
1270 1278
1271 trans->transaction->in_commit = 1; 1279 trans->transaction->in_commit = 1;
1272 trans->transaction->blocked = 1; 1280 trans->transaction->blocked = 1;
1281 spin_unlock(&cur_trans->commit_lock);
1273 wake_up(&root->fs_info->transaction_blocked_wait); 1282 wake_up(&root->fs_info->transaction_blocked_wait);
1274 1283
1284 spin_lock(&root->fs_info->trans_lock);
1275 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1285 if (cur_trans->list.prev != &root->fs_info->trans_list) {
1276 prev_trans = list_entry(cur_trans->list.prev, 1286 prev_trans = list_entry(cur_trans->list.prev,
1277 struct btrfs_transaction, list); 1287 struct btrfs_transaction, list);
1278 if (!prev_trans->commit_done) { 1288 if (!prev_trans->commit_done) {
1279 atomic_inc(&prev_trans->use_count); 1289 atomic_inc(&prev_trans->use_count);
1280 mutex_unlock(&root->fs_info->trans_mutex); 1290 spin_unlock(&root->fs_info->trans_lock);
1281 1291
1282 wait_for_commit(root, prev_trans); 1292 wait_for_commit(root, prev_trans);
1283 1293
1284 mutex_lock(&root->fs_info->trans_mutex);
1285 put_transaction(prev_trans); 1294 put_transaction(prev_trans);
1295 } else {
1296 spin_unlock(&root->fs_info->trans_lock);
1286 } 1297 }
1298 } else {
1299 spin_unlock(&root->fs_info->trans_lock);
1287 } 1300 }
1288 1301
1289 if (now < cur_trans->start_time || now - cur_trans->start_time < 1) 1302 if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
@@ -1291,12 +1304,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1291 1304
1292 do { 1305 do {
1293 int snap_pending = 0; 1306 int snap_pending = 0;
1307
1294 joined = cur_trans->num_joined; 1308 joined = cur_trans->num_joined;
1295 if (!list_empty(&trans->transaction->pending_snapshots)) 1309 if (!list_empty(&trans->transaction->pending_snapshots))
1296 snap_pending = 1; 1310 snap_pending = 1;
1297 1311
1298 WARN_ON(cur_trans != trans->transaction); 1312 WARN_ON(cur_trans != trans->transaction);
1299 mutex_unlock(&root->fs_info->trans_mutex);
1300 1313
1301 if (flush_on_commit || snap_pending) { 1314 if (flush_on_commit || snap_pending) {
1302 btrfs_start_delalloc_inodes(root, 1); 1315 btrfs_start_delalloc_inodes(root, 1);
@@ -1316,14 +1329,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1316 prepare_to_wait(&cur_trans->writer_wait, &wait, 1329 prepare_to_wait(&cur_trans->writer_wait, &wait,
1317 TASK_UNINTERRUPTIBLE); 1330 TASK_UNINTERRUPTIBLE);
1318 1331
1319 smp_mb();
1320 if (atomic_read(&cur_trans->num_writers) > 1) 1332 if (atomic_read(&cur_trans->num_writers) > 1)
1321 schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1333 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1322 else if (should_grow) 1334 else if (should_grow)
1323 schedule_timeout(1); 1335 schedule_timeout(1);
1324 1336
1325 mutex_lock(&root->fs_info->trans_mutex);
1326 finish_wait(&cur_trans->writer_wait, &wait); 1337 finish_wait(&cur_trans->writer_wait, &wait);
1338 spin_lock(&root->fs_info->trans_lock);
1339 root->fs_info->trans_no_join = 1;
1340 spin_unlock(&root->fs_info->trans_lock);
1327 } while (atomic_read(&cur_trans->num_writers) > 1 || 1341 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1328 (should_grow && cur_trans->num_joined != joined)); 1342 (should_grow && cur_trans->num_joined != joined));
1329 1343
@@ -1364,9 +1378,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1364 btrfs_prepare_extent_commit(trans, root); 1378 btrfs_prepare_extent_commit(trans, root);
1365 1379
1366 cur_trans = root->fs_info->running_transaction; 1380 cur_trans = root->fs_info->running_transaction;
1367 spin_lock(&root->fs_info->new_trans_lock);
1368 root->fs_info->running_transaction = NULL;
1369 spin_unlock(&root->fs_info->new_trans_lock);
1370 1381
1371 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1382 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1372 root->fs_info->tree_root->node); 1383 root->fs_info->tree_root->node);
@@ -1387,10 +1398,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1387 sizeof(root->fs_info->super_copy)); 1398 sizeof(root->fs_info->super_copy));
1388 1399
1389 trans->transaction->blocked = 0; 1400 trans->transaction->blocked = 0;
1401 spin_lock(&root->fs_info->trans_lock);
1402 root->fs_info->running_transaction = NULL;
1403 root->fs_info->trans_no_join = 0;
1404 spin_unlock(&root->fs_info->trans_lock);
1390 1405
1391 wake_up(&root->fs_info->transaction_wait); 1406 wake_up(&root->fs_info->transaction_wait);
1392 1407
1393 mutex_unlock(&root->fs_info->trans_mutex);
1394 ret = btrfs_write_and_wait_transaction(trans, root); 1408 ret = btrfs_write_and_wait_transaction(trans, root);
1395 BUG_ON(ret); 1409 BUG_ON(ret);
1396 write_ctree_super(trans, root, 0); 1410 write_ctree_super(trans, root, 0);
@@ -1403,22 +1417,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1403 1417
1404 btrfs_finish_extent_commit(trans, root); 1418 btrfs_finish_extent_commit(trans, root);
1405 1419
1406 mutex_lock(&root->fs_info->trans_mutex);
1407
1408 cur_trans->commit_done = 1; 1420 cur_trans->commit_done = 1;
1409 1421
1410 root->fs_info->last_trans_committed = cur_trans->transid; 1422 root->fs_info->last_trans_committed = cur_trans->transid;
1411 1423
1412 wake_up(&cur_trans->commit_wait); 1424 wake_up(&cur_trans->commit_wait);
1413 1425
1426 spin_lock(&root->fs_info->trans_lock);
1414 list_del_init(&cur_trans->list); 1427 list_del_init(&cur_trans->list);
1428 spin_unlock(&root->fs_info->trans_lock);
1429
1415 put_transaction(cur_trans); 1430 put_transaction(cur_trans);
1416 put_transaction(cur_trans); 1431 put_transaction(cur_trans);
1417 1432
1418 trace_btrfs_transaction_commit(root); 1433 trace_btrfs_transaction_commit(root);
1419 1434
1420 mutex_unlock(&root->fs_info->trans_mutex);
1421
1422 if (current->journal_info == trans) 1435 if (current->journal_info == trans)
1423 current->journal_info = NULL; 1436 current->journal_info = NULL;
1424 1437
@@ -1438,9 +1451,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1438 LIST_HEAD(list); 1451 LIST_HEAD(list);
1439 struct btrfs_fs_info *fs_info = root->fs_info; 1452 struct btrfs_fs_info *fs_info = root->fs_info;
1440 1453
1441 mutex_lock(&fs_info->trans_mutex); 1454 spin_lock(&fs_info->trans_lock);
1442 list_splice_init(&fs_info->dead_roots, &list); 1455 list_splice_init(&fs_info->dead_roots, &list);
1443 mutex_unlock(&fs_info->trans_mutex); 1456 spin_unlock(&fs_info->trans_lock);
1444 1457
1445 while (!list_empty(&list)) { 1458 while (!list_empty(&list)) {
1446 root = list_entry(list.next, struct btrfs_root, root_list); 1459 root = list_entry(list.next, struct btrfs_root, root_list);