aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/transaction.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r--fs/btrfs/transaction.c748
1 files changed, 494 insertions, 254 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 66e4c66cc63b..51dcec86757f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -27,15 +27,15 @@
27#include "transaction.h" 27#include "transaction.h"
28#include "locking.h" 28#include "locking.h"
29#include "tree-log.h" 29#include "tree-log.h"
30#include "inode-map.h"
30 31
31#define BTRFS_ROOT_TRANS_TAG 0 32#define BTRFS_ROOT_TRANS_TAG 0
32 33
33static noinline void put_transaction(struct btrfs_transaction *transaction) 34static noinline void put_transaction(struct btrfs_transaction *transaction)
34{ 35{
35 WARN_ON(transaction->use_count == 0); 36 WARN_ON(atomic_read(&transaction->use_count) == 0);
36 transaction->use_count--; 37 if (atomic_dec_and_test(&transaction->use_count)) {
37 if (transaction->use_count == 0) { 38 BUG_ON(!list_empty(&transaction->list));
38 list_del_init(&transaction->list);
39 memset(transaction, 0, sizeof(*transaction)); 39 memset(transaction, 0, sizeof(*transaction));
40 kmem_cache_free(btrfs_transaction_cachep, transaction); 40 kmem_cache_free(btrfs_transaction_cachep, transaction);
41 } 41 }
@@ -50,46 +50,72 @@ static noinline void switch_commit_root(struct btrfs_root *root)
50/* 50/*
51 * either allocate a new transaction or hop into the existing one 51 * either allocate a new transaction or hop into the existing one
52 */ 52 */
53static noinline int join_transaction(struct btrfs_root *root) 53static noinline int join_transaction(struct btrfs_root *root, int nofail)
54{ 54{
55 struct btrfs_transaction *cur_trans; 55 struct btrfs_transaction *cur_trans;
56
57 spin_lock(&root->fs_info->trans_lock);
58 if (root->fs_info->trans_no_join) {
59 if (!nofail) {
60 spin_unlock(&root->fs_info->trans_lock);
61 return -EBUSY;
62 }
63 }
64
56 cur_trans = root->fs_info->running_transaction; 65 cur_trans = root->fs_info->running_transaction;
57 if (!cur_trans) { 66 if (cur_trans) {
58 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, 67 atomic_inc(&cur_trans->use_count);
59 GFP_NOFS); 68 atomic_inc(&cur_trans->num_writers);
60 BUG_ON(!cur_trans);
61 root->fs_info->generation++;
62 cur_trans->num_writers = 1;
63 cur_trans->num_joined = 0;
64 cur_trans->transid = root->fs_info->generation;
65 init_waitqueue_head(&cur_trans->writer_wait);
66 init_waitqueue_head(&cur_trans->commit_wait);
67 cur_trans->in_commit = 0;
68 cur_trans->blocked = 0;
69 cur_trans->use_count = 1;
70 cur_trans->commit_done = 0;
71 cur_trans->start_time = get_seconds();
72
73 cur_trans->delayed_refs.root = RB_ROOT;
74 cur_trans->delayed_refs.num_entries = 0;
75 cur_trans->delayed_refs.num_heads_ready = 0;
76 cur_trans->delayed_refs.num_heads = 0;
77 cur_trans->delayed_refs.flushing = 0;
78 cur_trans->delayed_refs.run_delayed_start = 0;
79 spin_lock_init(&cur_trans->delayed_refs.lock);
80
81 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
82 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
83 extent_io_tree_init(&cur_trans->dirty_pages,
84 root->fs_info->btree_inode->i_mapping,
85 GFP_NOFS);
86 spin_lock(&root->fs_info->new_trans_lock);
87 root->fs_info->running_transaction = cur_trans;
88 spin_unlock(&root->fs_info->new_trans_lock);
89 } else {
90 cur_trans->num_writers++;
91 cur_trans->num_joined++; 69 cur_trans->num_joined++;
70 spin_unlock(&root->fs_info->trans_lock);
71 return 0;
92 } 72 }
73 spin_unlock(&root->fs_info->trans_lock);
74
75 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
76 if (!cur_trans)
77 return -ENOMEM;
78 spin_lock(&root->fs_info->trans_lock);
79 if (root->fs_info->running_transaction) {
80 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
81 cur_trans = root->fs_info->running_transaction;
82 atomic_inc(&cur_trans->use_count);
83 atomic_inc(&cur_trans->num_writers);
84 cur_trans->num_joined++;
85 spin_unlock(&root->fs_info->trans_lock);
86 return 0;
87 }
88 atomic_set(&cur_trans->num_writers, 1);
89 cur_trans->num_joined = 0;
90 init_waitqueue_head(&cur_trans->writer_wait);
91 init_waitqueue_head(&cur_trans->commit_wait);
92 cur_trans->in_commit = 0;
93 cur_trans->blocked = 0;
94 /*
95 * One for this trans handle, one so it will live on until we
96 * commit the transaction.
97 */
98 atomic_set(&cur_trans->use_count, 2);
99 cur_trans->commit_done = 0;
100 cur_trans->start_time = get_seconds();
101
102 cur_trans->delayed_refs.root = RB_ROOT;
103 cur_trans->delayed_refs.num_entries = 0;
104 cur_trans->delayed_refs.num_heads_ready = 0;
105 cur_trans->delayed_refs.num_heads = 0;
106 cur_trans->delayed_refs.flushing = 0;
107 cur_trans->delayed_refs.run_delayed_start = 0;
108 spin_lock_init(&cur_trans->commit_lock);
109 spin_lock_init(&cur_trans->delayed_refs.lock);
110
111 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
112 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
113 extent_io_tree_init(&cur_trans->dirty_pages,
114 root->fs_info->btree_inode->i_mapping);
115 root->fs_info->generation++;
116 cur_trans->transid = root->fs_info->generation;
117 root->fs_info->running_transaction = cur_trans;
118 spin_unlock(&root->fs_info->trans_lock);
93 119
94 return 0; 120 return 0;
95} 121}
@@ -100,36 +126,82 @@ static noinline int join_transaction(struct btrfs_root *root)
100 * to make sure the old root from before we joined the transaction is deleted 126 * to make sure the old root from before we joined the transaction is deleted
101 * when the transaction commits 127 * when the transaction commits
102 */ 128 */
103static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, 129static int record_root_in_trans(struct btrfs_trans_handle *trans,
104 struct btrfs_root *root) 130 struct btrfs_root *root)
105{ 131{
106 if (root->ref_cows && root->last_trans < trans->transid) { 132 if (root->ref_cows && root->last_trans < trans->transid) {
107 WARN_ON(root == root->fs_info->extent_root); 133 WARN_ON(root == root->fs_info->extent_root);
108 WARN_ON(root->commit_root != root->node); 134 WARN_ON(root->commit_root != root->node);
109 135
136 /*
137 * see below for in_trans_setup usage rules
138 * we have the reloc mutex held now, so there
139 * is only one writer in this function
140 */
141 root->in_trans_setup = 1;
142
143 /* make sure readers find in_trans_setup before
144 * they find our root->last_trans update
145 */
146 smp_wmb();
147
148 spin_lock(&root->fs_info->fs_roots_radix_lock);
149 if (root->last_trans == trans->transid) {
150 spin_unlock(&root->fs_info->fs_roots_radix_lock);
151 return 0;
152 }
110 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 153 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
111 (unsigned long)root->root_key.objectid, 154 (unsigned long)root->root_key.objectid,
112 BTRFS_ROOT_TRANS_TAG); 155 BTRFS_ROOT_TRANS_TAG);
156 spin_unlock(&root->fs_info->fs_roots_radix_lock);
113 root->last_trans = trans->transid; 157 root->last_trans = trans->transid;
158
159 /* this is pretty tricky. We don't want to
160 * take the relocation lock in btrfs_record_root_in_trans
161 * unless we're really doing the first setup for this root in
162 * this transaction.
163 *
164 * Normally we'd use root->last_trans as a flag to decide
165 * if we want to take the expensive mutex.
166 *
167 * But, we have to set root->last_trans before we
168 * init the relocation root, otherwise, we trip over warnings
169 * in ctree.c. The solution used here is to flag ourselves
170 * with root->in_trans_setup. When this is 1, we're still
171 * fixing up the reloc trees and everyone must wait.
172 *
173 * When this is zero, they can trust root->last_trans and fly
174 * through btrfs_record_root_in_trans without having to take the
175 * lock. smp_wmb() makes sure that all the writes above are
176 * done before we pop in the zero below
177 */
114 btrfs_init_reloc_root(trans, root); 178 btrfs_init_reloc_root(trans, root);
179 smp_wmb();
180 root->in_trans_setup = 0;
115 } 181 }
116 return 0; 182 return 0;
117} 183}
118 184
185
119int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 186int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
120 struct btrfs_root *root) 187 struct btrfs_root *root)
121{ 188{
122 if (!root->ref_cows) 189 if (!root->ref_cows)
123 return 0; 190 return 0;
124 191
125 mutex_lock(&root->fs_info->trans_mutex); 192 /*
126 if (root->last_trans == trans->transid) { 193 * see record_root_in_trans for comments about in_trans_setup usage
127 mutex_unlock(&root->fs_info->trans_mutex); 194 * and barriers
195 */
196 smp_rmb();
197 if (root->last_trans == trans->transid &&
198 !root->in_trans_setup)
128 return 0; 199 return 0;
129 }
130 200
201 mutex_lock(&root->fs_info->reloc_mutex);
131 record_root_in_trans(trans, root); 202 record_root_in_trans(trans, root);
132 mutex_unlock(&root->fs_info->trans_mutex); 203 mutex_unlock(&root->fs_info->reloc_mutex);
204
133 return 0; 205 return 0;
134} 206}
135 207
@@ -141,21 +213,23 @@ static void wait_current_trans(struct btrfs_root *root)
141{ 213{
142 struct btrfs_transaction *cur_trans; 214 struct btrfs_transaction *cur_trans;
143 215
216 spin_lock(&root->fs_info->trans_lock);
144 cur_trans = root->fs_info->running_transaction; 217 cur_trans = root->fs_info->running_transaction;
145 if (cur_trans && cur_trans->blocked) { 218 if (cur_trans && cur_trans->blocked) {
146 DEFINE_WAIT(wait); 219 DEFINE_WAIT(wait);
147 cur_trans->use_count++; 220 atomic_inc(&cur_trans->use_count);
221 spin_unlock(&root->fs_info->trans_lock);
148 while (1) { 222 while (1) {
149 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 223 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
150 TASK_UNINTERRUPTIBLE); 224 TASK_UNINTERRUPTIBLE);
151 if (!cur_trans->blocked) 225 if (!cur_trans->blocked)
152 break; 226 break;
153 mutex_unlock(&root->fs_info->trans_mutex);
154 schedule(); 227 schedule();
155 mutex_lock(&root->fs_info->trans_mutex);
156 } 228 }
157 finish_wait(&root->fs_info->transaction_wait, &wait); 229 finish_wait(&root->fs_info->transaction_wait, &wait);
158 put_transaction(cur_trans); 230 put_transaction(cur_trans);
231 } else {
232 spin_unlock(&root->fs_info->trans_lock);
159 } 233 }
160} 234}
161 235
@@ -163,14 +237,21 @@ enum btrfs_trans_type {
163 TRANS_START, 237 TRANS_START,
164 TRANS_JOIN, 238 TRANS_JOIN,
165 TRANS_USERSPACE, 239 TRANS_USERSPACE,
240 TRANS_JOIN_NOLOCK,
166}; 241};
167 242
168static int may_wait_transaction(struct btrfs_root *root, int type) 243static int may_wait_transaction(struct btrfs_root *root, int type)
169{ 244{
170 if (!root->fs_info->log_root_recovering && 245 if (root->fs_info->log_root_recovering)
171 ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || 246 return 0;
172 type == TRANS_USERSPACE)) 247
248 if (type == TRANS_USERSPACE)
173 return 1; 249 return 1;
250
251 if (type == TRANS_START &&
252 !atomic_read(&root->fs_info->open_ioctl_trans))
253 return 1;
254
174 return 0; 255 return 0;
175} 256}
176 257
@@ -181,29 +262,47 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
181 struct btrfs_transaction *cur_trans; 262 struct btrfs_transaction *cur_trans;
182 int retries = 0; 263 int retries = 0;
183 int ret; 264 int ret;
265
266 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
267 return ERR_PTR(-EROFS);
268
269 if (current->journal_info) {
270 WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK);
271 h = current->journal_info;
272 h->use_count++;
273 h->orig_rsv = h->block_rsv;
274 h->block_rsv = NULL;
275 goto got_it;
276 }
184again: 277again:
185 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 278 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
186 if (!h) 279 if (!h)
187 return ERR_PTR(-ENOMEM); 280 return ERR_PTR(-ENOMEM);
188 281
189 mutex_lock(&root->fs_info->trans_mutex);
190 if (may_wait_transaction(root, type)) 282 if (may_wait_transaction(root, type))
191 wait_current_trans(root); 283 wait_current_trans(root);
192 284
193 ret = join_transaction(root); 285 do {
194 BUG_ON(ret); 286 ret = join_transaction(root, type == TRANS_JOIN_NOLOCK);
287 if (ret == -EBUSY)
288 wait_current_trans(root);
289 } while (ret == -EBUSY);
290
291 if (ret < 0) {
292 kmem_cache_free(btrfs_trans_handle_cachep, h);
293 return ERR_PTR(ret);
294 }
195 295
196 cur_trans = root->fs_info->running_transaction; 296 cur_trans = root->fs_info->running_transaction;
197 cur_trans->use_count++;
198 mutex_unlock(&root->fs_info->trans_mutex);
199 297
200 h->transid = cur_trans->transid; 298 h->transid = cur_trans->transid;
201 h->transaction = cur_trans; 299 h->transaction = cur_trans;
202 h->blocks_used = 0; 300 h->blocks_used = 0;
203 h->block_group = 0;
204 h->bytes_reserved = 0; 301 h->bytes_reserved = 0;
205 h->delayed_ref_updates = 0; 302 h->delayed_ref_updates = 0;
303 h->use_count = 1;
206 h->block_rsv = NULL; 304 h->block_rsv = NULL;
305 h->orig_rsv = NULL;
207 306
208 smp_mb(); 307 smp_mb();
209 if (cur_trans->blocked && may_wait_transaction(root, type)) { 308 if (cur_trans->blocked && may_wait_transaction(root, type)) {
@@ -212,21 +311,27 @@ again:
212 } 311 }
213 312
214 if (num_items > 0) { 313 if (num_items > 0) {
215 ret = btrfs_trans_reserve_metadata(h, root, num_items, 314 ret = btrfs_trans_reserve_metadata(h, root, num_items);
216 &retries); 315 if (ret == -EAGAIN && !retries) {
217 if (ret == -EAGAIN) { 316 retries++;
218 btrfs_commit_transaction(h, root); 317 btrfs_commit_transaction(h, root);
219 goto again; 318 goto again;
319 } else if (ret == -EAGAIN) {
320 /*
321 * We have already retried and got EAGAIN, so really we
322 * don't have space, so set ret to -ENOSPC.
323 */
324 ret = -ENOSPC;
220 } 325 }
326
221 if (ret < 0) { 327 if (ret < 0) {
222 btrfs_end_transaction(h, root); 328 btrfs_end_transaction(h, root);
223 return ERR_PTR(ret); 329 return ERR_PTR(ret);
224 } 330 }
225 } 331 }
226 332
227 mutex_lock(&root->fs_info->trans_mutex); 333got_it:
228 record_root_in_trans(h, root); 334 btrfs_record_root_in_trans(h, root);
229 mutex_unlock(&root->fs_info->trans_mutex);
230 335
231 if (!current->journal_info && type != TRANS_USERSPACE) 336 if (!current->journal_info && type != TRANS_USERSPACE)
232 current->journal_info = h; 337 current->journal_info = h;
@@ -238,16 +343,19 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
238{ 343{
239 return start_transaction(root, num_items, TRANS_START); 344 return start_transaction(root, num_items, TRANS_START);
240} 345}
241struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 346struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
242 int num_blocks)
243{ 347{
244 return start_transaction(root, 0, TRANS_JOIN); 348 return start_transaction(root, 0, TRANS_JOIN);
245} 349}
246 350
247struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 351struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
248 int num_blocks) 352{
353 return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
354}
355
356struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
249{ 357{
250 return start_transaction(r, 0, TRANS_USERSPACE); 358 return start_transaction(root, 0, TRANS_USERSPACE);
251} 359}
252 360
253/* wait for a transaction commit to be fully complete */ 361/* wait for a transaction commit to be fully complete */
@@ -255,70 +363,72 @@ static noinline int wait_for_commit(struct btrfs_root *root,
255 struct btrfs_transaction *commit) 363 struct btrfs_transaction *commit)
256{ 364{
257 DEFINE_WAIT(wait); 365 DEFINE_WAIT(wait);
258 mutex_lock(&root->fs_info->trans_mutex);
259 while (!commit->commit_done) { 366 while (!commit->commit_done) {
260 prepare_to_wait(&commit->commit_wait, &wait, 367 prepare_to_wait(&commit->commit_wait, &wait,
261 TASK_UNINTERRUPTIBLE); 368 TASK_UNINTERRUPTIBLE);
262 if (commit->commit_done) 369 if (commit->commit_done)
263 break; 370 break;
264 mutex_unlock(&root->fs_info->trans_mutex);
265 schedule(); 371 schedule();
266 mutex_lock(&root->fs_info->trans_mutex);
267 } 372 }
268 mutex_unlock(&root->fs_info->trans_mutex);
269 finish_wait(&commit->commit_wait, &wait); 373 finish_wait(&commit->commit_wait, &wait);
270 return 0; 374 return 0;
271} 375}
272 376
273#if 0 377int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
274/*
275 * rate limit against the drop_snapshot code. This helps to slow down new
276 * operations if the drop_snapshot code isn't able to keep up.
277 */
278static void throttle_on_drops(struct btrfs_root *root)
279{ 378{
280 struct btrfs_fs_info *info = root->fs_info; 379 struct btrfs_transaction *cur_trans = NULL, *t;
281 int harder_count = 0; 380 int ret;
282 381
283harder: 382 ret = 0;
284 if (atomic_read(&info->throttles)) { 383 if (transid) {
285 DEFINE_WAIT(wait); 384 if (transid <= root->fs_info->last_trans_committed)
286 int thr; 385 goto out;
287 thr = atomic_read(&info->throttle_gen); 386
288 387 /* find specified transaction */
289 do { 388 spin_lock(&root->fs_info->trans_lock);
290 prepare_to_wait(&info->transaction_throttle, 389 list_for_each_entry(t, &root->fs_info->trans_list, list) {
291 &wait, TASK_UNINTERRUPTIBLE); 390 if (t->transid == transid) {
292 if (!atomic_read(&info->throttles)) { 391 cur_trans = t;
293 finish_wait(&info->transaction_throttle, &wait); 392 atomic_inc(&cur_trans->use_count);
294 break; 393 break;
295 } 394 }
296 schedule(); 395 if (t->transid > transid)
297 finish_wait(&info->transaction_throttle, &wait); 396 break;
298 } while (thr == atomic_read(&info->throttle_gen)); 397 }
299 harder_count++; 398 spin_unlock(&root->fs_info->trans_lock);
300 399 ret = -EINVAL;
301 if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 && 400 if (!cur_trans)
302 harder_count < 2) 401 goto out; /* bad transid */
303 goto harder; 402 } else {
403 /* find newest transaction that is committing | committed */
404 spin_lock(&root->fs_info->trans_lock);
405 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
406 list) {
407 if (t->in_commit) {
408 if (t->commit_done)
409 break;
410 cur_trans = t;
411 atomic_inc(&cur_trans->use_count);
412 break;
413 }
414 }
415 spin_unlock(&root->fs_info->trans_lock);
416 if (!cur_trans)
417 goto out; /* nothing committing|committed */
418 }
304 419
305 if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 && 420 wait_for_commit(root, cur_trans);
306 harder_count < 10)
307 goto harder;
308 421
309 if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 && 422 put_transaction(cur_trans);
310 harder_count < 20) 423 ret = 0;
311 goto harder; 424out:
312 } 425 return ret;
313} 426}
314#endif
315 427
316void btrfs_throttle(struct btrfs_root *root) 428void btrfs_throttle(struct btrfs_root *root)
317{ 429{
318 mutex_lock(&root->fs_info->trans_mutex); 430 if (!atomic_read(&root->fs_info->open_ioctl_trans))
319 if (!root->fs_info->open_ioctl_trans)
320 wait_current_trans(root); 431 wait_current_trans(root);
321 mutex_unlock(&root->fs_info->trans_mutex);
322} 432}
323 433
324static int should_end_transaction(struct btrfs_trans_handle *trans, 434static int should_end_transaction(struct btrfs_trans_handle *trans,
@@ -336,6 +446,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
336 struct btrfs_transaction *cur_trans = trans->transaction; 446 struct btrfs_transaction *cur_trans = trans->transaction;
337 int updates; 447 int updates;
338 448
449 smp_mb();
339 if (cur_trans->blocked || cur_trans->delayed_refs.flushing) 450 if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
340 return 1; 451 return 1;
341 452
@@ -348,12 +459,17 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
348} 459}
349 460
350static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 461static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
351 struct btrfs_root *root, int throttle) 462 struct btrfs_root *root, int throttle, int lock)
352{ 463{
353 struct btrfs_transaction *cur_trans = trans->transaction; 464 struct btrfs_transaction *cur_trans = trans->transaction;
354 struct btrfs_fs_info *info = root->fs_info; 465 struct btrfs_fs_info *info = root->fs_info;
355 int count = 0; 466 int count = 0;
356 467
468 if (--trans->use_count) {
469 trans->block_rsv = trans->orig_rsv;
470 return 0;
471 }
472
357 while (count < 4) { 473 while (count < 4) {
358 unsigned long cur = trans->delayed_ref_updates; 474 unsigned long cur = trans->delayed_ref_updates;
359 trans->delayed_ref_updates = 0; 475 trans->delayed_ref_updates = 0;
@@ -376,26 +492,27 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
376 492
377 btrfs_trans_release_metadata(trans, root); 493 btrfs_trans_release_metadata(trans, root);
378 494
379 if (!root->fs_info->open_ioctl_trans && 495 if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
380 should_end_transaction(trans, root)) 496 should_end_transaction(trans, root)) {
381 trans->transaction->blocked = 1; 497 trans->transaction->blocked = 1;
498 smp_wmb();
499 }
382 500
383 if (cur_trans->blocked && !cur_trans->in_commit) { 501 if (lock && cur_trans->blocked && !cur_trans->in_commit) {
384 if (throttle) 502 if (throttle)
385 return btrfs_commit_transaction(trans, root); 503 return btrfs_commit_transaction(trans, root);
386 else 504 else
387 wake_up_process(info->transaction_kthread); 505 wake_up_process(info->transaction_kthread);
388 } 506 }
389 507
390 mutex_lock(&info->trans_mutex);
391 WARN_ON(cur_trans != info->running_transaction); 508 WARN_ON(cur_trans != info->running_transaction);
392 WARN_ON(cur_trans->num_writers < 1); 509 WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
393 cur_trans->num_writers--; 510 atomic_dec(&cur_trans->num_writers);
394 511
512 smp_mb();
395 if (waitqueue_active(&cur_trans->writer_wait)) 513 if (waitqueue_active(&cur_trans->writer_wait))
396 wake_up(&cur_trans->writer_wait); 514 wake_up(&cur_trans->writer_wait);
397 put_transaction(cur_trans); 515 put_transaction(cur_trans);
398 mutex_unlock(&info->trans_mutex);
399 516
400 if (current->journal_info == trans) 517 if (current->journal_info == trans)
401 current->journal_info = NULL; 518 current->journal_info = NULL;
@@ -411,13 +528,40 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
411int btrfs_end_transaction(struct btrfs_trans_handle *trans, 528int btrfs_end_transaction(struct btrfs_trans_handle *trans,
412 struct btrfs_root *root) 529 struct btrfs_root *root)
413{ 530{
414 return __btrfs_end_transaction(trans, root, 0); 531 int ret;
532
533 ret = __btrfs_end_transaction(trans, root, 0, 1);
534 if (ret)
535 return ret;
536 return 0;
415} 537}
416 538
417int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 539int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
418 struct btrfs_root *root) 540 struct btrfs_root *root)
419{ 541{
420 return __btrfs_end_transaction(trans, root, 1); 542 int ret;
543
544 ret = __btrfs_end_transaction(trans, root, 1, 1);
545 if (ret)
546 return ret;
547 return 0;
548}
549
550int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
551 struct btrfs_root *root)
552{
553 int ret;
554
555 ret = __btrfs_end_transaction(trans, root, 0, 0);
556 if (ret)
557 return ret;
558 return 0;
559}
560
561int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
562 struct btrfs_root *root)
563{
564 return __btrfs_end_transaction(trans, root, 1, 1);
421} 565}
422 566
423/* 567/*
@@ -643,9 +787,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
643 */ 787 */
644int btrfs_add_dead_root(struct btrfs_root *root) 788int btrfs_add_dead_root(struct btrfs_root *root)
645{ 789{
646 mutex_lock(&root->fs_info->trans_mutex); 790 spin_lock(&root->fs_info->trans_lock);
647 list_add(&root->root_list, &root->fs_info->dead_roots); 791 list_add(&root->root_list, &root->fs_info->dead_roots);
648 mutex_unlock(&root->fs_info->trans_mutex); 792 spin_unlock(&root->fs_info->trans_lock);
649 return 0; 793 return 0;
650} 794}
651 795
@@ -661,6 +805,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
661 int ret; 805 int ret;
662 int err = 0; 806 int err = 0;
663 807
808 spin_lock(&fs_info->fs_roots_radix_lock);
664 while (1) { 809 while (1) {
665 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, 810 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
666 (void **)gang, 0, 811 (void **)gang, 0,
@@ -673,13 +818,20 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
673 radix_tree_tag_clear(&fs_info->fs_roots_radix, 818 radix_tree_tag_clear(&fs_info->fs_roots_radix,
674 (unsigned long)root->root_key.objectid, 819 (unsigned long)root->root_key.objectid,
675 BTRFS_ROOT_TRANS_TAG); 820 BTRFS_ROOT_TRANS_TAG);
821 spin_unlock(&fs_info->fs_roots_radix_lock);
676 822
677 btrfs_free_log(trans, root); 823 btrfs_free_log(trans, root);
678 btrfs_update_reloc_root(trans, root); 824 btrfs_update_reloc_root(trans, root);
679 btrfs_orphan_commit_root(trans, root); 825 btrfs_orphan_commit_root(trans, root);
680 826
827 btrfs_save_ino_cache(root, trans);
828
681 if (root->commit_root != root->node) { 829 if (root->commit_root != root->node) {
830 mutex_lock(&root->fs_commit_mutex);
682 switch_commit_root(root); 831 switch_commit_root(root);
832 btrfs_unpin_free_ino(root);
833 mutex_unlock(&root->fs_commit_mutex);
834
683 btrfs_set_root_node(&root->root_item, 835 btrfs_set_root_node(&root->root_item,
684 root->node); 836 root->node);
685 } 837 }
@@ -687,10 +839,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
687 err = btrfs_update_root(trans, fs_info->tree_root, 839 err = btrfs_update_root(trans, fs_info->tree_root,
688 &root->root_key, 840 &root->root_key,
689 &root->root_item); 841 &root->root_item);
842 spin_lock(&fs_info->fs_roots_radix_lock);
690 if (err) 843 if (err)
691 break; 844 break;
692 } 845 }
693 } 846 }
847 spin_unlock(&fs_info->fs_roots_radix_lock);
694 return err; 848 return err;
695} 849}
696 850
@@ -720,104 +874,13 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
720 btrfs_btree_balance_dirty(info->tree_root, nr); 874 btrfs_btree_balance_dirty(info->tree_root, nr);
721 cond_resched(); 875 cond_resched();
722 876
723 if (root->fs_info->closing || ret != -EAGAIN) 877 if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
724 break; 878 break;
725 } 879 }
726 root->defrag_running = 0; 880 root->defrag_running = 0;
727 return ret; 881 return ret;
728} 882}
729 883
730#if 0
731/*
732 * when dropping snapshots, we generate a ton of delayed refs, and it makes
733 * sense not to join the transaction while it is trying to flush the current
734 * queue of delayed refs out.
735 *
736 * This is used by the drop snapshot code only
737 */
738static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
739{
740 DEFINE_WAIT(wait);
741
742 mutex_lock(&info->trans_mutex);
743 while (info->running_transaction &&
744 info->running_transaction->delayed_refs.flushing) {
745 prepare_to_wait(&info->transaction_wait, &wait,
746 TASK_UNINTERRUPTIBLE);
747 mutex_unlock(&info->trans_mutex);
748
749 schedule();
750
751 mutex_lock(&info->trans_mutex);
752 finish_wait(&info->transaction_wait, &wait);
753 }
754 mutex_unlock(&info->trans_mutex);
755 return 0;
756}
757
758/*
759 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
760 * all of them
761 */
762int btrfs_drop_dead_root(struct btrfs_root *root)
763{
764 struct btrfs_trans_handle *trans;
765 struct btrfs_root *tree_root = root->fs_info->tree_root;
766 unsigned long nr;
767 int ret;
768
769 while (1) {
770 /*
771 * we don't want to jump in and create a bunch of
772 * delayed refs if the transaction is starting to close
773 */
774 wait_transaction_pre_flush(tree_root->fs_info);
775 trans = btrfs_start_transaction(tree_root, 1);
776
777 /*
778 * we've joined a transaction, make sure it isn't
779 * closing right now
780 */
781 if (trans->transaction->delayed_refs.flushing) {
782 btrfs_end_transaction(trans, tree_root);
783 continue;
784 }
785
786 ret = btrfs_drop_snapshot(trans, root);
787 if (ret != -EAGAIN)
788 break;
789
790 ret = btrfs_update_root(trans, tree_root,
791 &root->root_key,
792 &root->root_item);
793 if (ret)
794 break;
795
796 nr = trans->blocks_used;
797 ret = btrfs_end_transaction(trans, tree_root);
798 BUG_ON(ret);
799
800 btrfs_btree_balance_dirty(tree_root, nr);
801 cond_resched();
802 }
803 BUG_ON(ret);
804
805 ret = btrfs_del_root(trans, tree_root, &root->root_key);
806 BUG_ON(ret);
807
808 nr = trans->blocks_used;
809 ret = btrfs_end_transaction(trans, tree_root);
810 BUG_ON(ret);
811
812 free_extent_buffer(root->node);
813 free_extent_buffer(root->commit_root);
814 kfree(root);
815
816 btrfs_btree_balance_dirty(tree_root, nr);
817 return ret;
818}
819#endif
820
821/* 884/*
822 * new snapshots need to be created at a very specific time in the 885 * new snapshots need to be created at a very specific time in the
823 * transaction commit. This does the actual creation 886 * transaction commit. This does the actual creation
@@ -832,14 +895,15 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
832 struct btrfs_root *root = pending->root; 895 struct btrfs_root *root = pending->root;
833 struct btrfs_root *parent_root; 896 struct btrfs_root *parent_root;
834 struct inode *parent_inode; 897 struct inode *parent_inode;
898 struct dentry *parent;
835 struct dentry *dentry; 899 struct dentry *dentry;
836 struct extent_buffer *tmp; 900 struct extent_buffer *tmp;
837 struct extent_buffer *old; 901 struct extent_buffer *old;
838 int ret; 902 int ret;
839 int retries = 0;
840 u64 to_reserve = 0; 903 u64 to_reserve = 0;
841 u64 index = 0; 904 u64 index = 0;
842 u64 objectid; 905 u64 objectid;
906 u64 root_flags;
843 907
844 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); 908 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
845 if (!new_root_item) { 909 if (!new_root_item) {
@@ -847,7 +911,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
847 goto fail; 911 goto fail;
848 } 912 }
849 913
850 ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); 914 ret = btrfs_find_free_objectid(tree_root, &objectid);
851 if (ret) { 915 if (ret) {
852 pending->error = ret; 916 pending->error = ret;
853 goto fail; 917 goto fail;
@@ -858,7 +922,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
858 922
859 if (to_reserve > 0) { 923 if (to_reserve > 0) {
860 ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, 924 ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
861 to_reserve, &retries); 925 to_reserve);
862 if (ret) { 926 if (ret) {
863 pending->error = ret; 927 pending->error = ret;
864 goto fail; 928 goto fail;
@@ -872,7 +936,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
872 trans->block_rsv = &pending->block_rsv; 936 trans->block_rsv = &pending->block_rsv;
873 937
874 dentry = pending->dentry; 938 dentry = pending->dentry;
875 parent_inode = dentry->d_parent->d_inode; 939 parent = dget_parent(dentry);
940 parent_inode = parent->d_inode;
876 parent_root = BTRFS_I(parent_inode)->root; 941 parent_root = BTRFS_I(parent_inode)->root;
877 record_root_in_trans(trans, parent_root); 942 record_root_in_trans(trans, parent_root);
878 943
@@ -883,7 +948,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
883 BUG_ON(ret); 948 BUG_ON(ret);
884 ret = btrfs_insert_dir_item(trans, parent_root, 949 ret = btrfs_insert_dir_item(trans, parent_root,
885 dentry->d_name.name, dentry->d_name.len, 950 dentry->d_name.name, dentry->d_name.len,
886 parent_inode->i_ino, &key, 951 parent_inode, &key,
887 BTRFS_FT_DIR, index); 952 BTRFS_FT_DIR, index);
888 BUG_ON(ret); 953 BUG_ON(ret);
889 954
@@ -892,9 +957,26 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
892 ret = btrfs_update_inode(trans, parent_root, parent_inode); 957 ret = btrfs_update_inode(trans, parent_root, parent_inode);
893 BUG_ON(ret); 958 BUG_ON(ret);
894 959
960 /*
961 * pull in the delayed directory update
962 * and the delayed inode item
963 * otherwise we corrupt the FS during
964 * snapshot
965 */
966 ret = btrfs_run_delayed_items(trans, root);
967 BUG_ON(ret);
968
895 record_root_in_trans(trans, root); 969 record_root_in_trans(trans, root);
896 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 970 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
897 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 971 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
972 btrfs_check_and_init_root_item(new_root_item);
973
974 root_flags = btrfs_root_flags(new_root_item);
975 if (pending->readonly)
976 root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
977 else
978 root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
979 btrfs_set_root_flags(new_root_item, root_flags);
898 980
899 old = btrfs_lock_root_node(root); 981 old = btrfs_lock_root_node(root);
900 btrfs_cow_block(trans, root, old, NULL, 0, &old); 982 btrfs_cow_block(trans, root, old, NULL, 0, &old);
@@ -917,9 +999,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
917 */ 999 */
918 ret = btrfs_add_root_ref(trans, tree_root, objectid, 1000 ret = btrfs_add_root_ref(trans, tree_root, objectid,
919 parent_root->root_key.objectid, 1001 parent_root->root_key.objectid,
920 parent_inode->i_ino, index, 1002 btrfs_ino(parent_inode), index,
921 dentry->d_name.name, dentry->d_name.len); 1003 dentry->d_name.name, dentry->d_name.len);
922 BUG_ON(ret); 1004 BUG_ON(ret);
1005 dput(parent);
923 1006
924 key.offset = (u64)-1; 1007 key.offset = (u64)-1;
925 pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); 1008 pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
@@ -966,33 +1049,152 @@ static void update_super_roots(struct btrfs_root *root)
966 super->root = root_item->bytenr; 1049 super->root = root_item->bytenr;
967 super->generation = root_item->generation; 1050 super->generation = root_item->generation;
968 super->root_level = root_item->level; 1051 super->root_level = root_item->level;
1052 if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
1053 super->cache_generation = root_item->generation;
969} 1054}
970 1055
971int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 1056int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
972{ 1057{
973 int ret = 0; 1058 int ret = 0;
974 spin_lock(&info->new_trans_lock); 1059 spin_lock(&info->trans_lock);
975 if (info->running_transaction) 1060 if (info->running_transaction)
976 ret = info->running_transaction->in_commit; 1061 ret = info->running_transaction->in_commit;
977 spin_unlock(&info->new_trans_lock); 1062 spin_unlock(&info->trans_lock);
978 return ret; 1063 return ret;
979} 1064}
980 1065
981int btrfs_transaction_blocked(struct btrfs_fs_info *info) 1066int btrfs_transaction_blocked(struct btrfs_fs_info *info)
982{ 1067{
983 int ret = 0; 1068 int ret = 0;
984 spin_lock(&info->new_trans_lock); 1069 spin_lock(&info->trans_lock);
985 if (info->running_transaction) 1070 if (info->running_transaction)
986 ret = info->running_transaction->blocked; 1071 ret = info->running_transaction->blocked;
987 spin_unlock(&info->new_trans_lock); 1072 spin_unlock(&info->trans_lock);
988 return ret; 1073 return ret;
989} 1074}
990 1075
1076/*
1077 * wait for the current transaction commit to start and block subsequent
1078 * transaction joins
1079 */
1080static void wait_current_trans_commit_start(struct btrfs_root *root,
1081 struct btrfs_transaction *trans)
1082{
1083 DEFINE_WAIT(wait);
1084
1085 if (trans->in_commit)
1086 return;
1087
1088 while (1) {
1089 prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
1090 TASK_UNINTERRUPTIBLE);
1091 if (trans->in_commit) {
1092 finish_wait(&root->fs_info->transaction_blocked_wait,
1093 &wait);
1094 break;
1095 }
1096 schedule();
1097 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1098 }
1099}
1100
1101/*
1102 * wait for the current transaction to start and then become unblocked.
1103 * caller holds ref.
1104 */
1105static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1106 struct btrfs_transaction *trans)
1107{
1108 DEFINE_WAIT(wait);
1109
1110 if (trans->commit_done || (trans->in_commit && !trans->blocked))
1111 return;
1112
1113 while (1) {
1114 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
1115 TASK_UNINTERRUPTIBLE);
1116 if (trans->commit_done ||
1117 (trans->in_commit && !trans->blocked)) {
1118 finish_wait(&root->fs_info->transaction_wait,
1119 &wait);
1120 break;
1121 }
1122 schedule();
1123 finish_wait(&root->fs_info->transaction_wait,
1124 &wait);
1125 }
1126}
1127
1128/*
1129 * commit transactions asynchronously. once btrfs_commit_transaction_async
1130 * returns, any subsequent transaction will not be allowed to join.
1131 */
1132struct btrfs_async_commit {
1133 struct btrfs_trans_handle *newtrans;
1134 struct btrfs_root *root;
1135 struct delayed_work work;
1136};
1137
1138static void do_async_commit(struct work_struct *work)
1139{
1140 struct btrfs_async_commit *ac =
1141 container_of(work, struct btrfs_async_commit, work.work);
1142
1143 btrfs_commit_transaction(ac->newtrans, ac->root);
1144 kfree(ac);
1145}
1146
1147int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1148 struct btrfs_root *root,
1149 int wait_for_unblock)
1150{
1151 struct btrfs_async_commit *ac;
1152 struct btrfs_transaction *cur_trans;
1153
1154 ac = kmalloc(sizeof(*ac), GFP_NOFS);
1155 if (!ac)
1156 return -ENOMEM;
1157
1158 INIT_DELAYED_WORK(&ac->work, do_async_commit);
1159 ac->root = root;
1160 ac->newtrans = btrfs_join_transaction(root);
1161 if (IS_ERR(ac->newtrans)) {
1162 int err = PTR_ERR(ac->newtrans);
1163 kfree(ac);
1164 return err;
1165 }
1166
1167 /* take transaction reference */
1168 cur_trans = trans->transaction;
1169 atomic_inc(&cur_trans->use_count);
1170
1171 btrfs_end_transaction(trans, root);
1172 schedule_delayed_work(&ac->work, 0);
1173
1174 /* wait for transaction to start and unblock */
1175 if (wait_for_unblock)
1176 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1177 else
1178 wait_current_trans_commit_start(root, cur_trans);
1179
1180 if (current->journal_info == trans)
1181 current->journal_info = NULL;
1182
1183 put_transaction(cur_trans);
1184 return 0;
1185}
1186
1187/*
1188 * btrfs_transaction state sequence:
1189 * in_commit = 0, blocked = 0 (initial)
1190 * in_commit = 1, blocked = 1
1191 * blocked = 0
1192 * commit_done = 1
1193 */
991int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 1194int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
992 struct btrfs_root *root) 1195 struct btrfs_root *root)
993{ 1196{
994 unsigned long joined = 0; 1197 unsigned long joined = 0;
995 unsigned long timeout = 1;
996 struct btrfs_transaction *cur_trans; 1198 struct btrfs_transaction *cur_trans;
997 struct btrfs_transaction *prev_trans = NULL; 1199 struct btrfs_transaction *prev_trans = NULL;
998 DEFINE_WAIT(wait); 1200 DEFINE_WAIT(wait);
@@ -1021,36 +1223,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1021 ret = btrfs_run_delayed_refs(trans, root, 0); 1223 ret = btrfs_run_delayed_refs(trans, root, 0);
1022 BUG_ON(ret); 1224 BUG_ON(ret);
1023 1225
1024 mutex_lock(&root->fs_info->trans_mutex); 1226 spin_lock(&cur_trans->commit_lock);
1025 if (cur_trans->in_commit) { 1227 if (cur_trans->in_commit) {
1026 cur_trans->use_count++; 1228 spin_unlock(&cur_trans->commit_lock);
1027 mutex_unlock(&root->fs_info->trans_mutex); 1229 atomic_inc(&cur_trans->use_count);
1028 btrfs_end_transaction(trans, root); 1230 btrfs_end_transaction(trans, root);
1029 1231
1030 ret = wait_for_commit(root, cur_trans); 1232 ret = wait_for_commit(root, cur_trans);
1031 BUG_ON(ret); 1233 BUG_ON(ret);
1032 1234
1033 mutex_lock(&root->fs_info->trans_mutex);
1034 put_transaction(cur_trans); 1235 put_transaction(cur_trans);
1035 mutex_unlock(&root->fs_info->trans_mutex);
1036 1236
1037 return 0; 1237 return 0;
1038 } 1238 }
1039 1239
1040 trans->transaction->in_commit = 1; 1240 trans->transaction->in_commit = 1;
1041 trans->transaction->blocked = 1; 1241 trans->transaction->blocked = 1;
1242 spin_unlock(&cur_trans->commit_lock);
1243 wake_up(&root->fs_info->transaction_blocked_wait);
1244
1245 spin_lock(&root->fs_info->trans_lock);
1042 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1246 if (cur_trans->list.prev != &root->fs_info->trans_list) {
1043 prev_trans = list_entry(cur_trans->list.prev, 1247 prev_trans = list_entry(cur_trans->list.prev,
1044 struct btrfs_transaction, list); 1248 struct btrfs_transaction, list);
1045 if (!prev_trans->commit_done) { 1249 if (!prev_trans->commit_done) {
1046 prev_trans->use_count++; 1250 atomic_inc(&prev_trans->use_count);
1047 mutex_unlock(&root->fs_info->trans_mutex); 1251 spin_unlock(&root->fs_info->trans_lock);
1048 1252
1049 wait_for_commit(root, prev_trans); 1253 wait_for_commit(root, prev_trans);
1050 1254
1051 mutex_lock(&root->fs_info->trans_mutex);
1052 put_transaction(prev_trans); 1255 put_transaction(prev_trans);
1256 } else {
1257 spin_unlock(&root->fs_info->trans_lock);
1053 } 1258 }
1259 } else {
1260 spin_unlock(&root->fs_info->trans_lock);
1054 } 1261 }
1055 1262
1056 if (now < cur_trans->start_time || now - cur_trans->start_time < 1) 1263 if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
@@ -1058,17 +1265,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1058 1265
1059 do { 1266 do {
1060 int snap_pending = 0; 1267 int snap_pending = 0;
1268
1061 joined = cur_trans->num_joined; 1269 joined = cur_trans->num_joined;
1062 if (!list_empty(&trans->transaction->pending_snapshots)) 1270 if (!list_empty(&trans->transaction->pending_snapshots))
1063 snap_pending = 1; 1271 snap_pending = 1;
1064 1272
1065 WARN_ON(cur_trans != trans->transaction); 1273 WARN_ON(cur_trans != trans->transaction);
1066 if (cur_trans->num_writers > 1)
1067 timeout = MAX_SCHEDULE_TIMEOUT;
1068 else if (should_grow)
1069 timeout = 1;
1070
1071 mutex_unlock(&root->fs_info->trans_mutex);
1072 1274
1073 if (flush_on_commit || snap_pending) { 1275 if (flush_on_commit || snap_pending) {
1074 btrfs_start_delalloc_inodes(root, 1); 1276 btrfs_start_delalloc_inodes(root, 1);
@@ -1076,6 +1278,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1076 BUG_ON(ret); 1278 BUG_ON(ret);
1077 } 1279 }
1078 1280
1281 ret = btrfs_run_delayed_items(trans, root);
1282 BUG_ON(ret);
1283
1079 /* 1284 /*
1080 * rename don't use btrfs_join_transaction, so, once we 1285 * rename don't use btrfs_join_transaction, so, once we
1081 * set the transaction to blocked above, we aren't going 1286 * set the transaction to blocked above, we aren't going
@@ -1088,23 +1293,51 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1088 prepare_to_wait(&cur_trans->writer_wait, &wait, 1293 prepare_to_wait(&cur_trans->writer_wait, &wait,
1089 TASK_UNINTERRUPTIBLE); 1294 TASK_UNINTERRUPTIBLE);
1090 1295
1091 smp_mb(); 1296 if (atomic_read(&cur_trans->num_writers) > 1)
1092 if (cur_trans->num_writers > 1 || should_grow) 1297 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1093 schedule_timeout(timeout); 1298 else if (should_grow)
1299 schedule_timeout(1);
1094 1300
1095 mutex_lock(&root->fs_info->trans_mutex);
1096 finish_wait(&cur_trans->writer_wait, &wait); 1301 finish_wait(&cur_trans->writer_wait, &wait);
1097 } while (cur_trans->num_writers > 1 || 1302 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1098 (should_grow && cur_trans->num_joined != joined)); 1303 (should_grow && cur_trans->num_joined != joined));
1099 1304
1305 /*
1306 * Ok now we need to make sure to block out any other joins while we
1307 * commit the transaction. We could have started a join before setting
1308 * no_join so make sure to wait for num_writers to == 1 again.
1309 */
1310 spin_lock(&root->fs_info->trans_lock);
1311 root->fs_info->trans_no_join = 1;
1312 spin_unlock(&root->fs_info->trans_lock);
1313 wait_event(cur_trans->writer_wait,
1314 atomic_read(&cur_trans->num_writers) == 1);
1315
1316 /*
1317 * the reloc mutex makes sure that we stop
1318 * the balancing code from coming in and moving
1319 * extents around in the middle of the commit
1320 */
1321 mutex_lock(&root->fs_info->reloc_mutex);
1322
1323 ret = btrfs_run_delayed_items(trans, root);
1324 BUG_ON(ret);
1325
1100 ret = create_pending_snapshots(trans, root->fs_info); 1326 ret = create_pending_snapshots(trans, root->fs_info);
1101 BUG_ON(ret); 1327 BUG_ON(ret);
1102 1328
1103 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1329 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1104 BUG_ON(ret); 1330 BUG_ON(ret);
1105 1331
1332 /*
1333 * make sure none of the code above managed to slip in a
1334 * delayed item
1335 */
1336 btrfs_assert_delayed_root_empty(root);
1337
1106 WARN_ON(cur_trans != trans->transaction); 1338 WARN_ON(cur_trans != trans->transaction);
1107 1339
1340 btrfs_scrub_pause(root);
1108 /* btrfs_commit_tree_roots is responsible for getting the 1341 /* btrfs_commit_tree_roots is responsible for getting the
1109 * various roots consistent with each other. Every pointer 1342 * various roots consistent with each other. Every pointer
1110 * in the tree of tree roots has to point to the most up to date 1343 * in the tree of tree roots has to point to the most up to date
@@ -1134,9 +1367,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1134 btrfs_prepare_extent_commit(trans, root); 1367 btrfs_prepare_extent_commit(trans, root);
1135 1368
1136 cur_trans = root->fs_info->running_transaction; 1369 cur_trans = root->fs_info->running_transaction;
1137 spin_lock(&root->fs_info->new_trans_lock);
1138 root->fs_info->running_transaction = NULL;
1139 spin_unlock(&root->fs_info->new_trans_lock);
1140 1370
1141 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1371 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1142 root->fs_info->tree_root->node); 1372 root->fs_info->tree_root->node);
@@ -1157,10 +1387,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1157 sizeof(root->fs_info->super_copy)); 1387 sizeof(root->fs_info->super_copy));
1158 1388
1159 trans->transaction->blocked = 0; 1389 trans->transaction->blocked = 0;
1390 spin_lock(&root->fs_info->trans_lock);
1391 root->fs_info->running_transaction = NULL;
1392 root->fs_info->trans_no_join = 0;
1393 spin_unlock(&root->fs_info->trans_lock);
1394 mutex_unlock(&root->fs_info->reloc_mutex);
1160 1395
1161 wake_up(&root->fs_info->transaction_wait); 1396 wake_up(&root->fs_info->transaction_wait);
1162 1397
1163 mutex_unlock(&root->fs_info->trans_mutex);
1164 ret = btrfs_write_and_wait_transaction(trans, root); 1398 ret = btrfs_write_and_wait_transaction(trans, root);
1165 BUG_ON(ret); 1399 BUG_ON(ret);
1166 write_ctree_super(trans, root, 0); 1400 write_ctree_super(trans, root, 0);
@@ -1173,18 +1407,22 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1173 1407
1174 btrfs_finish_extent_commit(trans, root); 1408 btrfs_finish_extent_commit(trans, root);
1175 1409
1176 mutex_lock(&root->fs_info->trans_mutex);
1177
1178 cur_trans->commit_done = 1; 1410 cur_trans->commit_done = 1;
1179 1411
1180 root->fs_info->last_trans_committed = cur_trans->transid; 1412 root->fs_info->last_trans_committed = cur_trans->transid;
1181 1413
1182 wake_up(&cur_trans->commit_wait); 1414 wake_up(&cur_trans->commit_wait);
1183 1415
1416 spin_lock(&root->fs_info->trans_lock);
1417 list_del_init(&cur_trans->list);
1418 spin_unlock(&root->fs_info->trans_lock);
1419
1184 put_transaction(cur_trans); 1420 put_transaction(cur_trans);
1185 put_transaction(cur_trans); 1421 put_transaction(cur_trans);
1186 1422
1187 mutex_unlock(&root->fs_info->trans_mutex); 1423 trace_btrfs_transaction_commit(root);
1424
1425 btrfs_scrub_continue(root);
1188 1426
1189 if (current->journal_info == trans) 1427 if (current->journal_info == trans)
1190 current->journal_info = NULL; 1428 current->journal_info = NULL;
@@ -1205,14 +1443,16 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1205 LIST_HEAD(list); 1443 LIST_HEAD(list);
1206 struct btrfs_fs_info *fs_info = root->fs_info; 1444 struct btrfs_fs_info *fs_info = root->fs_info;
1207 1445
1208 mutex_lock(&fs_info->trans_mutex); 1446 spin_lock(&fs_info->trans_lock);
1209 list_splice_init(&fs_info->dead_roots, &list); 1447 list_splice_init(&fs_info->dead_roots, &list);
1210 mutex_unlock(&fs_info->trans_mutex); 1448 spin_unlock(&fs_info->trans_lock);
1211 1449
1212 while (!list_empty(&list)) { 1450 while (!list_empty(&list)) {
1213 root = list_entry(list.next, struct btrfs_root, root_list); 1451 root = list_entry(list.next, struct btrfs_root, root_list);
1214 list_del(&root->root_list); 1452 list_del(&root->root_list);
1215 1453
1454 btrfs_kill_all_delayed_nodes(root);
1455
1216 if (btrfs_header_backref_rev(root->node) < 1456 if (btrfs_header_backref_rev(root->node) <
1217 BTRFS_MIXED_BACKREF_REV) 1457 BTRFS_MIXED_BACKREF_REV)
1218 btrfs_drop_snapshot(root, NULL, 0); 1458 btrfs_drop_snapshot(root, NULL, 0);