aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJan Schmidt <list.btrfs@jan-o-sch.net>2012-06-21 05:08:04 -0400
committerJan Schmidt <list.btrfs@jan-o-sch.net>2012-07-10 09:14:41 -0400
commit097b8a7c9e48e2cb50fd0eb9315791921beaf484 (patch)
tree03588f0e29000e415f7177d31a8f5b4c1689d6ad /fs
parentcf5388307a2b4faab4b11d732b61c85741be6169 (diff)
Btrfs: join tree mod log code with the code holding back delayed refs
We've got two mechanisms both required for reliable backref resolving (tree mod log and holding back delayed refs). You cannot make use of one without the other. So instead of requiring the user of this mechanism to setup both correctly, we join them into a single interface. Additionally, we stop inserting non-blockers into fs_info->tree_mod_seq_list as we did before, which was of no value. Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c30
-rw-r--r--fs/btrfs/backref.h3
-rw-r--r--fs/btrfs/ctree.c275
-rw-r--r--fs/btrfs/ctree.h31
-rw-r--r--fs/btrfs/delayed-ref.c44
-rw-r--r--fs/btrfs/delayed-ref.h49
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c21
-rw-r--r--fs/btrfs/transaction.c4
9 files changed, 240 insertions, 219 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index a383c18e74e8..7d80ddd8f544 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -773,9 +773,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
773 */ 773 */
774static int find_parent_nodes(struct btrfs_trans_handle *trans, 774static int find_parent_nodes(struct btrfs_trans_handle *trans,
775 struct btrfs_fs_info *fs_info, u64 bytenr, 775 struct btrfs_fs_info *fs_info, u64 bytenr,
776 u64 delayed_ref_seq, u64 time_seq, 776 u64 time_seq, struct ulist *refs,
777 struct ulist *refs, struct ulist *roots, 777 struct ulist *roots, const u64 *extent_item_pos)
778 const u64 *extent_item_pos)
779{ 778{
780 struct btrfs_key key; 779 struct btrfs_key key;
781 struct btrfs_path *path; 780 struct btrfs_path *path;
@@ -837,7 +836,7 @@ again:
837 btrfs_put_delayed_ref(&head->node); 836 btrfs_put_delayed_ref(&head->node);
838 goto again; 837 goto again;
839 } 838 }
840 ret = __add_delayed_refs(head, delayed_ref_seq, 839 ret = __add_delayed_refs(head, time_seq,
841 &prefs_delayed); 840 &prefs_delayed);
842 mutex_unlock(&head->mutex); 841 mutex_unlock(&head->mutex);
843 if (ret) { 842 if (ret) {
@@ -981,8 +980,7 @@ static void free_leaf_list(struct ulist *blocks)
981 */ 980 */
982static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, 981static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
983 struct btrfs_fs_info *fs_info, u64 bytenr, 982 struct btrfs_fs_info *fs_info, u64 bytenr,
984 u64 delayed_ref_seq, u64 time_seq, 983 u64 time_seq, struct ulist **leafs,
985 struct ulist **leafs,
986 const u64 *extent_item_pos) 984 const u64 *extent_item_pos)
987{ 985{
988 struct ulist *tmp; 986 struct ulist *tmp;
@@ -997,7 +995,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
997 return -ENOMEM; 995 return -ENOMEM;
998 } 996 }
999 997
1000 ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, 998 ret = find_parent_nodes(trans, fs_info, bytenr,
1001 time_seq, *leafs, tmp, extent_item_pos); 999 time_seq, *leafs, tmp, extent_item_pos);
1002 ulist_free(tmp); 1000 ulist_free(tmp);
1003 1001
@@ -1024,8 +1022,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
1024 */ 1022 */
1025int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 1023int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
1026 struct btrfs_fs_info *fs_info, u64 bytenr, 1024 struct btrfs_fs_info *fs_info, u64 bytenr,
1027 u64 delayed_ref_seq, u64 time_seq, 1025 u64 time_seq, struct ulist **roots)
1028 struct ulist **roots)
1029{ 1026{
1030 struct ulist *tmp; 1027 struct ulist *tmp;
1031 struct ulist_node *node = NULL; 1028 struct ulist_node *node = NULL;
@@ -1043,7 +1040,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
1043 1040
1044 ULIST_ITER_INIT(&uiter); 1041 ULIST_ITER_INIT(&uiter);
1045 while (1) { 1042 while (1) {
1046 ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, 1043 ret = find_parent_nodes(trans, fs_info, bytenr,
1047 time_seq, tmp, *roots, NULL); 1044 time_seq, tmp, *roots, NULL);
1048 if (ret < 0 && ret != -ENOENT) { 1045 if (ret < 0 && ret != -ENOENT) {
1049 ulist_free(tmp); 1046 ulist_free(tmp);
@@ -1376,11 +1373,9 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1376 struct ulist *roots = NULL; 1373 struct ulist *roots = NULL;
1377 struct ulist_node *ref_node = NULL; 1374 struct ulist_node *ref_node = NULL;
1378 struct ulist_node *root_node = NULL; 1375 struct ulist_node *root_node = NULL;
1379 struct seq_list seq_elem = {};
1380 struct seq_list tree_mod_seq_elem = {}; 1376 struct seq_list tree_mod_seq_elem = {};
1381 struct ulist_iterator ref_uiter; 1377 struct ulist_iterator ref_uiter;
1382 struct ulist_iterator root_uiter; 1378 struct ulist_iterator root_uiter;
1383 struct btrfs_delayed_ref_root *delayed_refs = NULL;
1384 1379
1385 pr_debug("resolving all inodes for extent %llu\n", 1380 pr_debug("resolving all inodes for extent %llu\n",
1386 extent_item_objectid); 1381 extent_item_objectid);
@@ -1391,16 +1386,11 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1391 trans = btrfs_join_transaction(fs_info->extent_root); 1386 trans = btrfs_join_transaction(fs_info->extent_root);
1392 if (IS_ERR(trans)) 1387 if (IS_ERR(trans))
1393 return PTR_ERR(trans); 1388 return PTR_ERR(trans);
1394
1395 delayed_refs = &trans->transaction->delayed_refs;
1396 spin_lock(&delayed_refs->lock);
1397 btrfs_get_delayed_seq(delayed_refs, &seq_elem);
1398 spin_unlock(&delayed_refs->lock);
1399 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 1389 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1400 } 1390 }
1401 1391
1402 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, 1392 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
1403 seq_elem.seq, tree_mod_seq_elem.seq, &refs, 1393 tree_mod_seq_elem.seq, &refs,
1404 &extent_item_pos); 1394 &extent_item_pos);
1405 if (ret) 1395 if (ret)
1406 goto out; 1396 goto out;
@@ -1408,8 +1398,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1408 ULIST_ITER_INIT(&ref_uiter); 1398 ULIST_ITER_INIT(&ref_uiter);
1409 while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { 1399 while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
1410 ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, 1400 ret = btrfs_find_all_roots(trans, fs_info, ref_node->val,
1411 seq_elem.seq, 1401 tree_mod_seq_elem.seq, &roots);
1412 tree_mod_seq_elem.seq, &roots);
1413 if (ret) 1402 if (ret)
1414 break; 1403 break;
1415 ULIST_ITER_INIT(&root_uiter); 1404 ULIST_ITER_INIT(&root_uiter);
@@ -1431,7 +1420,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1431out: 1420out:
1432 if (!search_commit_root) { 1421 if (!search_commit_root) {
1433 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 1422 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1434 btrfs_put_delayed_seq(delayed_refs, &seq_elem);
1435 btrfs_end_transaction(trans, fs_info->extent_root); 1423 btrfs_end_transaction(trans, fs_info->extent_root);
1436 } 1424 }
1437 1425
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index c18d8ac7b795..3a1ad3e2dcb0 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -58,8 +58,7 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
58 58
59int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 59int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
60 struct btrfs_fs_info *fs_info, u64 bytenr, 60 struct btrfs_fs_info *fs_info, u64 bytenr,
61 u64 delayed_ref_seq, u64 time_seq, 61 u64 time_seq, struct ulist **roots);
62 struct ulist **roots);
63 62
64struct btrfs_data_container *init_data_container(u32 total_bytes); 63struct btrfs_data_container *init_data_container(u32 total_bytes);
65struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, 64struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 67fe46fdee6f..bef68ab32204 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -321,7 +321,7 @@ struct tree_mod_root {
321struct tree_mod_elem { 321struct tree_mod_elem {
322 struct rb_node node; 322 struct rb_node node;
323 u64 index; /* shifted logical */ 323 u64 index; /* shifted logical */
324 struct seq_list elem; 324 u64 seq;
325 enum mod_log_op op; 325 enum mod_log_op op;
326 326
327 /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ 327 /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
@@ -341,20 +341,50 @@ struct tree_mod_elem {
341 struct tree_mod_root old_root; 341 struct tree_mod_root old_root;
342}; 342};
343 343
344static inline void 344static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info)
345__get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem)
346{ 345{
347 elem->seq = atomic_inc_return(&fs_info->tree_mod_seq); 346 read_lock(&fs_info->tree_mod_log_lock);
348 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
349} 347}
350 348
351void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, 349static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info)
352 struct seq_list *elem) 350{
351 read_unlock(&fs_info->tree_mod_log_lock);
352}
353
354static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info)
355{
356 write_lock(&fs_info->tree_mod_log_lock);
357}
358
359static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
353{ 360{
354 elem->flags = 1; 361 write_unlock(&fs_info->tree_mod_log_lock);
362}
363
364/*
365 * This adds a new blocker to the tree mod log's blocker list if the @elem
366 * passed does not already have a sequence number set. So when a caller expects
367 * to record tree modifications, it should ensure to set elem->seq to zero
368 * before calling btrfs_get_tree_mod_seq.
369 * Returns a fresh, unused tree log modification sequence number, even if no new
370 * blocker was added.
371 */
372u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
373 struct seq_list *elem)
374{
375 u64 seq;
376
377 tree_mod_log_write_lock(fs_info);
355 spin_lock(&fs_info->tree_mod_seq_lock); 378 spin_lock(&fs_info->tree_mod_seq_lock);
356 __get_tree_mod_seq(fs_info, elem); 379 if (!elem->seq) {
380 elem->seq = btrfs_inc_tree_mod_seq(fs_info);
381 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
382 }
383 seq = btrfs_inc_tree_mod_seq(fs_info);
357 spin_unlock(&fs_info->tree_mod_seq_lock); 384 spin_unlock(&fs_info->tree_mod_seq_lock);
385 tree_mod_log_write_unlock(fs_info);
386
387 return seq;
358} 388}
359 389
360void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, 390void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
@@ -371,41 +401,46 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
371 if (!seq_putting) 401 if (!seq_putting)
372 return; 402 return;
373 403
374 BUG_ON(!(elem->flags & 1));
375 spin_lock(&fs_info->tree_mod_seq_lock); 404 spin_lock(&fs_info->tree_mod_seq_lock);
376 list_del(&elem->list); 405 list_del(&elem->list);
406 elem->seq = 0;
377 407
378 list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { 408 list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
379 if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) { 409 if (cur_elem->seq < min_seq) {
380 if (seq_putting > cur_elem->seq) { 410 if (seq_putting > cur_elem->seq) {
381 /* 411 /*
382 * blocker with lower sequence number exists, we 412 * blocker with lower sequence number exists, we
383 * cannot remove anything from the log 413 * cannot remove anything from the log
384 */ 414 */
385 goto out; 415 spin_unlock(&fs_info->tree_mod_seq_lock);
416 return;
386 } 417 }
387 min_seq = cur_elem->seq; 418 min_seq = cur_elem->seq;
388 } 419 }
389 } 420 }
421 spin_unlock(&fs_info->tree_mod_seq_lock);
422
423 /*
424 * we removed the lowest blocker from the blocker list, so there may be
425 * more processible delayed refs.
426 */
427 wake_up(&fs_info->tree_mod_seq_wait);
390 428
391 /* 429 /*
392 * anything that's lower than the lowest existing (read: blocked) 430 * anything that's lower than the lowest existing (read: blocked)
393 * sequence number can be removed from the tree. 431 * sequence number can be removed from the tree.
394 */ 432 */
395 write_lock(&fs_info->tree_mod_log_lock); 433 tree_mod_log_write_lock(fs_info);
396 tm_root = &fs_info->tree_mod_log; 434 tm_root = &fs_info->tree_mod_log;
397 for (node = rb_first(tm_root); node; node = next) { 435 for (node = rb_first(tm_root); node; node = next) {
398 next = rb_next(node); 436 next = rb_next(node);
399 tm = container_of(node, struct tree_mod_elem, node); 437 tm = container_of(node, struct tree_mod_elem, node);
400 if (tm->elem.seq > min_seq) 438 if (tm->seq > min_seq)
401 continue; 439 continue;
402 rb_erase(node, tm_root); 440 rb_erase(node, tm_root);
403 list_del(&tm->elem.list);
404 kfree(tm); 441 kfree(tm);
405 } 442 }
406 write_unlock(&fs_info->tree_mod_log_lock); 443 tree_mod_log_write_unlock(fs_info);
407out:
408 spin_unlock(&fs_info->tree_mod_seq_lock);
409} 444}
410 445
411/* 446/*
@@ -423,11 +458,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
423 struct rb_node **new; 458 struct rb_node **new;
424 struct rb_node *parent = NULL; 459 struct rb_node *parent = NULL;
425 struct tree_mod_elem *cur; 460 struct tree_mod_elem *cur;
426 int ret = 0;
427 461
428 BUG_ON(!tm || !tm->elem.seq); 462 BUG_ON(!tm || !tm->seq);
429 463
430 write_lock(&fs_info->tree_mod_log_lock);
431 tm_root = &fs_info->tree_mod_log; 464 tm_root = &fs_info->tree_mod_log;
432 new = &tm_root->rb_node; 465 new = &tm_root->rb_node;
433 while (*new) { 466 while (*new) {
@@ -437,88 +470,81 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
437 new = &((*new)->rb_left); 470 new = &((*new)->rb_left);
438 else if (cur->index > tm->index) 471 else if (cur->index > tm->index)
439 new = &((*new)->rb_right); 472 new = &((*new)->rb_right);
440 else if (cur->elem.seq < tm->elem.seq) 473 else if (cur->seq < tm->seq)
441 new = &((*new)->rb_left); 474 new = &((*new)->rb_left);
442 else if (cur->elem.seq > tm->elem.seq) 475 else if (cur->seq > tm->seq)
443 new = &((*new)->rb_right); 476 new = &((*new)->rb_right);
444 else { 477 else {
445 kfree(tm); 478 kfree(tm);
446 ret = -EEXIST; 479 return -EEXIST;
447 goto unlock;
448 } 480 }
449 } 481 }
450 482
451 rb_link_node(&tm->node, parent, new); 483 rb_link_node(&tm->node, parent, new);
452 rb_insert_color(&tm->node, tm_root); 484 rb_insert_color(&tm->node, tm_root);
453unlock: 485 return 0;
454 write_unlock(&fs_info->tree_mod_log_lock);
455 return ret;
456} 486}
457 487
488/*
489 * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it
490 * returns zero with the tree_mod_log_lock acquired. The caller must hold
491 * this until all tree mod log insertions are recorded in the rb tree and then
492 * call tree_mod_log_write_unlock() to release.
493 */
458static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, 494static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
459 struct extent_buffer *eb) { 495 struct extent_buffer *eb) {
460 smp_mb(); 496 smp_mb();
461 if (list_empty(&(fs_info)->tree_mod_seq_list)) 497 if (list_empty(&(fs_info)->tree_mod_seq_list))
462 return 1; 498 return 1;
463 if (!eb) 499 if (eb && btrfs_header_level(eb) == 0)
464 return 0;
465 if (btrfs_header_level(eb) == 0)
466 return 1; 500 return 1;
501
502 tree_mod_log_write_lock(fs_info);
503 if (list_empty(&fs_info->tree_mod_seq_list)) {
504 /*
505 * someone emptied the list while we were waiting for the lock.
506 * we must not add to the list when no blocker exists.
507 */
508 tree_mod_log_write_unlock(fs_info);
509 return 1;
510 }
511
467 return 0; 512 return 0;
468} 513}
469 514
470/* 515/*
471 * This allocates memory and gets a tree modification sequence number when 516 * This allocates memory and gets a tree modification sequence number.
472 * needed.
473 * 517 *
474 * Returns 0 when no sequence number is needed, < 0 on error. 518 * Returns <0 on error.
475 * Returns 1 when a sequence number was added. In this case, 519 * Returns >0 (the added sequence number) on success.
476 * fs_info->tree_mod_seq_lock was acquired and must be released by the caller
477 * after inserting into the rb tree.
478 */ 520 */
479static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, 521static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
480 struct tree_mod_elem **tm_ret) 522 struct tree_mod_elem **tm_ret)
481{ 523{
482 struct tree_mod_elem *tm; 524 struct tree_mod_elem *tm;
483 int seq;
484 525
485 if (tree_mod_dont_log(fs_info, NULL)) 526 /*
486 return 0; 527 * once we switch from spin locks to something different, we should
487 528 * honor the flags parameter here.
488 tm = *tm_ret = kzalloc(sizeof(*tm), flags); 529 */
530 tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC);
489 if (!tm) 531 if (!tm)
490 return -ENOMEM; 532 return -ENOMEM;
491 533
492 tm->elem.flags = 0; 534 tm->seq = btrfs_inc_tree_mod_seq(fs_info);
493 spin_lock(&fs_info->tree_mod_seq_lock); 535 return tm->seq;
494 if (list_empty(&fs_info->tree_mod_seq_list)) {
495 /*
496 * someone emptied the list while we were waiting for the lock.
497 * we must not add to the list, because no blocker exists. items
498 * are removed from the list only when the existing blocker is
499 * removed from the list.
500 */
501 kfree(tm);
502 seq = 0;
503 spin_unlock(&fs_info->tree_mod_seq_lock);
504 } else {
505 __get_tree_mod_seq(fs_info, &tm->elem);
506 seq = tm->elem.seq;
507 }
508
509 return seq;
510} 536}
511 537
512static noinline int 538static inline int
513tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, 539__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
514 struct extent_buffer *eb, int slot, 540 struct extent_buffer *eb, int slot,
515 enum mod_log_op op, gfp_t flags) 541 enum mod_log_op op, gfp_t flags)
516{ 542{
517 struct tree_mod_elem *tm;
518 int ret; 543 int ret;
544 struct tree_mod_elem *tm;
519 545
520 ret = tree_mod_alloc(fs_info, flags, &tm); 546 ret = tree_mod_alloc(fs_info, flags, &tm);
521 if (ret <= 0) 547 if (ret < 0)
522 return ret; 548 return ret;
523 549
524 tm->index = eb->start >> PAGE_CACHE_SHIFT; 550 tm->index = eb->start >> PAGE_CACHE_SHIFT;
@@ -530,8 +556,22 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
530 tm->slot = slot; 556 tm->slot = slot;
531 tm->generation = btrfs_node_ptr_generation(eb, slot); 557 tm->generation = btrfs_node_ptr_generation(eb, slot);
532 558
533 ret = __tree_mod_log_insert(fs_info, tm); 559 return __tree_mod_log_insert(fs_info, tm);
534 spin_unlock(&fs_info->tree_mod_seq_lock); 560}
561
562static noinline int
563tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
564 struct extent_buffer *eb, int slot,
565 enum mod_log_op op, gfp_t flags)
566{
567 int ret;
568
569 if (tree_mod_dont_log(fs_info, eb))
570 return 0;
571
572 ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
573
574 tree_mod_log_write_unlock(fs_info);
535 return ret; 575 return ret;
536} 576}
537 577
@@ -543,6 +583,14 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
543} 583}
544 584
545static noinline int 585static noinline int
586tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info,
587 struct extent_buffer *eb, int slot,
588 enum mod_log_op op)
589{
590 return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS);
591}
592
593static noinline int
546tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, 594tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
547 struct extent_buffer *eb, int dst_slot, int src_slot, 595 struct extent_buffer *eb, int dst_slot, int src_slot,
548 int nr_items, gfp_t flags) 596 int nr_items, gfp_t flags)
@@ -555,14 +603,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
555 return 0; 603 return 0;
556 604
557 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { 605 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
558 ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, 606 ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot,
559 MOD_LOG_KEY_REMOVE_WHILE_MOVING); 607 MOD_LOG_KEY_REMOVE_WHILE_MOVING);
560 BUG_ON(ret < 0); 608 BUG_ON(ret < 0);
561 } 609 }
562 610
563 ret = tree_mod_alloc(fs_info, flags, &tm); 611 ret = tree_mod_alloc(fs_info, flags, &tm);
564 if (ret <= 0) 612 if (ret < 0)
565 return ret; 613 goto out;
566 614
567 tm->index = eb->start >> PAGE_CACHE_SHIFT; 615 tm->index = eb->start >> PAGE_CACHE_SHIFT;
568 tm->slot = src_slot; 616 tm->slot = src_slot;
@@ -571,10 +619,26 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
571 tm->op = MOD_LOG_MOVE_KEYS; 619 tm->op = MOD_LOG_MOVE_KEYS;
572 620
573 ret = __tree_mod_log_insert(fs_info, tm); 621 ret = __tree_mod_log_insert(fs_info, tm);
574 spin_unlock(&fs_info->tree_mod_seq_lock); 622out:
623 tree_mod_log_write_unlock(fs_info);
575 return ret; 624 return ret;
576} 625}
577 626
627static inline void
628__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
629{
630 int i;
631 u32 nritems;
632 int ret;
633
634 nritems = btrfs_header_nritems(eb);
635 for (i = nritems - 1; i >= 0; i--) {
636 ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
637 MOD_LOG_KEY_REMOVE_WHILE_FREEING);
638 BUG_ON(ret < 0);
639 }
640}
641
578static noinline int 642static noinline int
579tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, 643tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
580 struct extent_buffer *old_root, 644 struct extent_buffer *old_root,
@@ -583,9 +647,14 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
583 struct tree_mod_elem *tm; 647 struct tree_mod_elem *tm;
584 int ret; 648 int ret;
585 649
650 if (tree_mod_dont_log(fs_info, NULL))
651 return 0;
652
653 __tree_mod_log_free_eb(fs_info, old_root);
654
586 ret = tree_mod_alloc(fs_info, flags, &tm); 655 ret = tree_mod_alloc(fs_info, flags, &tm);
587 if (ret <= 0) 656 if (ret < 0)
588 return ret; 657 goto out;
589 658
590 tm->index = new_root->start >> PAGE_CACHE_SHIFT; 659 tm->index = new_root->start >> PAGE_CACHE_SHIFT;
591 tm->old_root.logical = old_root->start; 660 tm->old_root.logical = old_root->start;
@@ -594,7 +663,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
594 tm->op = MOD_LOG_ROOT_REPLACE; 663 tm->op = MOD_LOG_ROOT_REPLACE;
595 664
596 ret = __tree_mod_log_insert(fs_info, tm); 665 ret = __tree_mod_log_insert(fs_info, tm);
597 spin_unlock(&fs_info->tree_mod_seq_lock); 666out:
667 tree_mod_log_write_unlock(fs_info);
598 return ret; 668 return ret;
599} 669}
600 670
@@ -608,7 +678,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
608 struct tree_mod_elem *found = NULL; 678 struct tree_mod_elem *found = NULL;
609 u64 index = start >> PAGE_CACHE_SHIFT; 679 u64 index = start >> PAGE_CACHE_SHIFT;
610 680
611 read_lock(&fs_info->tree_mod_log_lock); 681 tree_mod_log_read_lock(fs_info);
612 tm_root = &fs_info->tree_mod_log; 682 tm_root = &fs_info->tree_mod_log;
613 node = tm_root->rb_node; 683 node = tm_root->rb_node;
614 while (node) { 684 while (node) {
@@ -617,18 +687,18 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
617 node = node->rb_left; 687 node = node->rb_left;
618 } else if (cur->index > index) { 688 } else if (cur->index > index) {
619 node = node->rb_right; 689 node = node->rb_right;
620 } else if (cur->elem.seq < min_seq) { 690 } else if (cur->seq < min_seq) {
621 node = node->rb_left; 691 node = node->rb_left;
622 } else if (!smallest) { 692 } else if (!smallest) {
623 /* we want the node with the highest seq */ 693 /* we want the node with the highest seq */
624 if (found) 694 if (found)
625 BUG_ON(found->elem.seq > cur->elem.seq); 695 BUG_ON(found->seq > cur->seq);
626 found = cur; 696 found = cur;
627 node = node->rb_left; 697 node = node->rb_left;
628 } else if (cur->elem.seq > min_seq) { 698 } else if (cur->seq > min_seq) {
629 /* we want the node with the smallest seq */ 699 /* we want the node with the smallest seq */
630 if (found) 700 if (found)
631 BUG_ON(found->elem.seq < cur->elem.seq); 701 BUG_ON(found->seq < cur->seq);
632 found = cur; 702 found = cur;
633 node = node->rb_right; 703 node = node->rb_right;
634 } else { 704 } else {
@@ -636,7 +706,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
636 break; 706 break;
637 } 707 }
638 } 708 }
639 read_unlock(&fs_info->tree_mod_log_lock); 709 tree_mod_log_read_unlock(fs_info);
640 710
641 return found; 711 return found;
642} 712}
@@ -664,7 +734,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
664 return __tree_mod_log_search(fs_info, start, min_seq, 0); 734 return __tree_mod_log_search(fs_info, start, min_seq, 0);
665} 735}
666 736
667static inline void 737static noinline void
668tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, 738tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
669 struct extent_buffer *src, unsigned long dst_offset, 739 struct extent_buffer *src, unsigned long dst_offset,
670 unsigned long src_offset, int nr_items) 740 unsigned long src_offset, int nr_items)
@@ -675,18 +745,23 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
675 if (tree_mod_dont_log(fs_info, NULL)) 745 if (tree_mod_dont_log(fs_info, NULL))
676 return; 746 return;
677 747
678 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) 748 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) {
749 tree_mod_log_write_unlock(fs_info);
679 return; 750 return;
751 }
680 752
681 /* speed this up by single seq for all operations? */
682 for (i = 0; i < nr_items; i++) { 753 for (i = 0; i < nr_items; i++) {
683 ret = tree_mod_log_insert_key(fs_info, src, i + src_offset, 754 ret = tree_mod_log_insert_key_locked(fs_info, src,
684 MOD_LOG_KEY_REMOVE); 755 i + src_offset,
756 MOD_LOG_KEY_REMOVE);
685 BUG_ON(ret < 0); 757 BUG_ON(ret < 0);
686 ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset, 758 ret = tree_mod_log_insert_key_locked(fs_info, dst,
687 MOD_LOG_KEY_ADD); 759 i + dst_offset,
760 MOD_LOG_KEY_ADD);
688 BUG_ON(ret < 0); 761 BUG_ON(ret < 0);
689 } 762 }
763
764 tree_mod_log_write_unlock(fs_info);
690} 765}
691 766
692static inline void 767static inline void
@@ -699,7 +774,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
699 BUG_ON(ret < 0); 774 BUG_ON(ret < 0);
700} 775}
701 776
702static inline void 777static noinline void
703tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, 778tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
704 struct extent_buffer *eb, 779 struct extent_buffer *eb,
705 struct btrfs_disk_key *disk_key, int slot, int atomic) 780 struct btrfs_disk_key *disk_key, int slot, int atomic)
@@ -712,30 +787,22 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
712 BUG_ON(ret < 0); 787 BUG_ON(ret < 0);
713} 788}
714 789
715static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, 790static noinline void
716 struct extent_buffer *eb) 791tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
717{ 792{
718 int i;
719 int ret;
720 u32 nritems;
721
722 if (tree_mod_dont_log(fs_info, eb)) 793 if (tree_mod_dont_log(fs_info, eb))
723 return; 794 return;
724 795
725 nritems = btrfs_header_nritems(eb); 796 __tree_mod_log_free_eb(fs_info, eb);
726 for (i = nritems - 1; i >= 0; i--) { 797
727 ret = tree_mod_log_insert_key(fs_info, eb, i, 798 tree_mod_log_write_unlock(fs_info);
728 MOD_LOG_KEY_REMOVE_WHILE_FREEING);
729 BUG_ON(ret < 0);
730 }
731} 799}
732 800
733static inline void 801static noinline void
734tree_mod_log_set_root_pointer(struct btrfs_root *root, 802tree_mod_log_set_root_pointer(struct btrfs_root *root,
735 struct extent_buffer *new_root_node) 803 struct extent_buffer *new_root_node)
736{ 804{
737 int ret; 805 int ret;
738 tree_mod_log_free_eb(root->fs_info, root->node);
739 ret = tree_mod_log_insert_root(root->fs_info, root->node, 806 ret = tree_mod_log_insert_root(root->fs_info, root->node,
740 new_root_node, GFP_NOFS); 807 new_root_node, GFP_NOFS);
741 BUG_ON(ret < 0); 808 BUG_ON(ret < 0);
@@ -1069,7 +1136,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
1069 unsigned long p_size = sizeof(struct btrfs_key_ptr); 1136 unsigned long p_size = sizeof(struct btrfs_key_ptr);
1070 1137
1071 n = btrfs_header_nritems(eb); 1138 n = btrfs_header_nritems(eb);
1072 while (tm && tm->elem.seq >= time_seq) { 1139 while (tm && tm->seq >= time_seq) {
1073 /* 1140 /*
1074 * all the operations are recorded with the operator used for 1141 * all the operations are recorded with the operator used for
1075 * the modification. as we're going backwards, we do the 1142 * the modification. as we're going backwards, we do the
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 84ac723f58f8..8f8dc46f44e7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1030,6 +1030,13 @@ struct btrfs_block_group_cache {
1030 struct list_head cluster_list; 1030 struct list_head cluster_list;
1031}; 1031};
1032 1032
1033/* delayed seq elem */
1034struct seq_list {
1035 struct list_head list;
1036 u64 seq;
1037};
1038
1039/* fs_info */
1033struct reloc_control; 1040struct reloc_control;
1034struct btrfs_device; 1041struct btrfs_device;
1035struct btrfs_fs_devices; 1042struct btrfs_fs_devices;
@@ -1144,6 +1151,8 @@ struct btrfs_fs_info {
1144 spinlock_t tree_mod_seq_lock; 1151 spinlock_t tree_mod_seq_lock;
1145 atomic_t tree_mod_seq; 1152 atomic_t tree_mod_seq;
1146 struct list_head tree_mod_seq_list; 1153 struct list_head tree_mod_seq_list;
1154 struct seq_list tree_mod_seq_elem;
1155 wait_queue_head_t tree_mod_seq_wait;
1147 1156
1148 /* this protects tree_mod_log */ 1157 /* this protects tree_mod_log */
1149 rwlock_t tree_mod_log_lock; 1158 rwlock_t tree_mod_log_lock;
@@ -2798,6 +2807,16 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
2798 kfree(fs_info); 2807 kfree(fs_info);
2799} 2808}
2800 2809
2810/* tree mod log functions from ctree.c */
2811u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
2812 struct seq_list *elem);
2813void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
2814 struct seq_list *elem);
2815static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
2816{
2817 return atomic_inc_return(&fs_info->tree_mod_seq);
2818}
2819
2801/* root-item.c */ 2820/* root-item.c */
2802int btrfs_find_root_ref(struct btrfs_root *tree_root, 2821int btrfs_find_root_ref(struct btrfs_root *tree_root,
2803 struct btrfs_path *path, 2822 struct btrfs_path *path,
@@ -3157,18 +3176,6 @@ void btrfs_reada_detach(void *handle);
3157int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, 3176int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
3158 u64 start, int err); 3177 u64 start, int err);
3159 3178
3160/* delayed seq elem */
3161struct seq_list {
3162 struct list_head list;
3163 u64 seq;
3164 u32 flags;
3165};
3166
3167void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
3168 struct seq_list *elem);
3169void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
3170 struct seq_list *elem);
3171
3172static inline int is_fstree(u64 rootid) 3179static inline int is_fstree(u64 rootid)
3173{ 3180{
3174 if (rootid == BTRFS_FS_TREE_OBJECTID || 3181 if (rootid == BTRFS_FS_TREE_OBJECTID ||
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 13ae7b04790e..21a757717637 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -233,22 +233,26 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
233 return 0; 233 return 0;
234} 234}
235 235
236int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, 236int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
237 struct btrfs_delayed_ref_root *delayed_refs,
237 u64 seq) 238 u64 seq)
238{ 239{
239 struct seq_list *elem; 240 struct seq_list *elem;
240 241 int ret = 0;
241 assert_spin_locked(&delayed_refs->lock); 242
242 if (list_empty(&delayed_refs->seq_head)) 243 spin_lock(&fs_info->tree_mod_seq_lock);
243 return 0; 244 if (!list_empty(&fs_info->tree_mod_seq_list)) {
244 245 elem = list_first_entry(&fs_info->tree_mod_seq_list,
245 elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list); 246 struct seq_list, list);
246 if (seq >= elem->seq) { 247 if (seq >= elem->seq) {
247 pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n", 248 pr_debug("holding back delayed_ref %llu, lowest is "
248 seq, elem->seq, delayed_refs); 249 "%llu (%p)\n", seq, elem->seq, delayed_refs);
249 return 1; 250 ret = 1;
251 }
250 } 252 }
251 return 0; 253
254 spin_unlock(&fs_info->tree_mod_seq_lock);
255 return ret;
252} 256}
253 257
254int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 258int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
@@ -526,7 +530,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
526 ref->in_tree = 1; 530 ref->in_tree = 1;
527 531
528 if (is_fstree(ref_root)) 532 if (is_fstree(ref_root))
529 seq = inc_delayed_seq(delayed_refs); 533 seq = btrfs_inc_tree_mod_seq(fs_info);
530 ref->seq = seq; 534 ref->seq = seq;
531 535
532 full_ref = btrfs_delayed_node_to_tree_ref(ref); 536 full_ref = btrfs_delayed_node_to_tree_ref(ref);
@@ -585,7 +589,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
585 ref->in_tree = 1; 589 ref->in_tree = 1;
586 590
587 if (is_fstree(ref_root)) 591 if (is_fstree(ref_root))
588 seq = inc_delayed_seq(delayed_refs); 592 seq = btrfs_inc_tree_mod_seq(fs_info);
589 ref->seq = seq; 593 ref->seq = seq;
590 594
591 full_ref = btrfs_delayed_node_to_data_ref(ref); 595 full_ref = btrfs_delayed_node_to_data_ref(ref);
@@ -659,8 +663,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
659 num_bytes, parent, ref_root, level, action, 663 num_bytes, parent, ref_root, level, action,
660 for_cow); 664 for_cow);
661 if (!is_fstree(ref_root) && 665 if (!is_fstree(ref_root) &&
662 waitqueue_active(&delayed_refs->seq_wait)) 666 waitqueue_active(&fs_info->tree_mod_seq_wait))
663 wake_up(&delayed_refs->seq_wait); 667 wake_up(&fs_info->tree_mod_seq_wait);
664 spin_unlock(&delayed_refs->lock); 668 spin_unlock(&delayed_refs->lock);
665 669
666 return 0; 670 return 0;
@@ -708,8 +712,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
708 num_bytes, parent, ref_root, owner, offset, 712 num_bytes, parent, ref_root, owner, offset,
709 action, for_cow); 713 action, for_cow);
710 if (!is_fstree(ref_root) && 714 if (!is_fstree(ref_root) &&
711 waitqueue_active(&delayed_refs->seq_wait)) 715 waitqueue_active(&fs_info->tree_mod_seq_wait))
712 wake_up(&delayed_refs->seq_wait); 716 wake_up(&fs_info->tree_mod_seq_wait);
713 spin_unlock(&delayed_refs->lock); 717 spin_unlock(&delayed_refs->lock);
714 718
715 return 0; 719 return 0;
@@ -736,8 +740,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
736 num_bytes, BTRFS_UPDATE_DELAYED_HEAD, 740 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
737 extent_op->is_data); 741 extent_op->is_data);
738 742
739 if (waitqueue_active(&delayed_refs->seq_wait)) 743 if (waitqueue_active(&fs_info->tree_mod_seq_wait))
740 wake_up(&delayed_refs->seq_wait); 744 wake_up(&fs_info->tree_mod_seq_wait);
741 spin_unlock(&delayed_refs->lock); 745 spin_unlock(&delayed_refs->lock);
742 return 0; 746 return 0;
743} 747}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 413927fb9957..2b5cb27f9861 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -139,26 +139,6 @@ struct btrfs_delayed_ref_root {
139 int flushing; 139 int flushing;
140 140
141 u64 run_delayed_start; 141 u64 run_delayed_start;
142
143 /*
144 * seq number of delayed refs. We need to know if a backref was being
145 * added before the currently processed ref or afterwards.
146 */
147 u64 seq;
148
149 /*
150 * seq_list holds a list of all seq numbers that are currently being
151 * added to the list. While walking backrefs (btrfs_find_all_roots,
152 * qgroups), which might take some time, no newer ref must be processed,
153 * as it might influence the outcome of the walk.
154 */
155 struct list_head seq_head;
156
157 /*
158 * when the only refs we have in the list must not be processed, we want
159 * to wait for more refs to show up or for the end of backref walking.
160 */
161 wait_queue_head_t seq_wait;
162}; 142};
163 143
164static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) 144static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
@@ -195,33 +175,8 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
195int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 175int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
196 struct list_head *cluster, u64 search_start); 176 struct list_head *cluster, u64 search_start);
197 177
198static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) 178int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
199{ 179 struct btrfs_delayed_ref_root *delayed_refs,
200 assert_spin_locked(&delayed_refs->lock);
201 ++delayed_refs->seq;
202 return delayed_refs->seq;
203}
204
205static inline void
206btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
207 struct seq_list *elem)
208{
209 assert_spin_locked(&delayed_refs->lock);
210 elem->seq = delayed_refs->seq;
211 list_add_tail(&elem->list, &delayed_refs->seq_head);
212}
213
214static inline void
215btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
216 struct seq_list *elem)
217{
218 spin_lock(&delayed_refs->lock);
219 list_del(&elem->list);
220 wake_up(&delayed_refs->seq_wait);
221 spin_unlock(&delayed_refs->lock);
222}
223
224int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
225 u64 seq); 180 u64 seq);
226 181
227/* 182/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8cc47103a32e..19a39e10d6f5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1944,6 +1944,8 @@ int open_ctree(struct super_block *sb,
1944 fs_info->free_chunk_space = 0; 1944 fs_info->free_chunk_space = 0;
1945 fs_info->tree_mod_log = RB_ROOT; 1945 fs_info->tree_mod_log = RB_ROOT;
1946 1946
1947 init_waitqueue_head(&fs_info->tree_mod_seq_wait);
1948
1947 /* readahead state */ 1949 /* readahead state */
1948 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 1950 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
1949 spin_lock_init(&fs_info->reada_lock); 1951 spin_lock_init(&fs_info->reada_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6e1d36702ff7..94ce79f76e5f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2217,6 +2217,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2217 struct btrfs_delayed_ref_node *ref; 2217 struct btrfs_delayed_ref_node *ref;
2218 struct btrfs_delayed_ref_head *locked_ref = NULL; 2218 struct btrfs_delayed_ref_head *locked_ref = NULL;
2219 struct btrfs_delayed_extent_op *extent_op; 2219 struct btrfs_delayed_extent_op *extent_op;
2220 struct btrfs_fs_info *fs_info = root->fs_info;
2220 int ret; 2221 int ret;
2221 int count = 0; 2222 int count = 0;
2222 int must_insert_reserved = 0; 2223 int must_insert_reserved = 0;
@@ -2255,7 +2256,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2255 ref = select_delayed_ref(locked_ref); 2256 ref = select_delayed_ref(locked_ref);
2256 2257
2257 if (ref && ref->seq && 2258 if (ref && ref->seq &&
2258 btrfs_check_delayed_seq(delayed_refs, ref->seq)) { 2259 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2259 /* 2260 /*
2260 * there are still refs with lower seq numbers in the 2261 * there are still refs with lower seq numbers in the
2261 * process of being added. Don't run this ref yet. 2262 * process of being added. Don't run this ref yet.
@@ -2337,7 +2338,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2337 } 2338 }
2338 2339
2339next: 2340next:
2340 do_chunk_alloc(trans, root->fs_info->extent_root, 2341 do_chunk_alloc(trans, fs_info->extent_root,
2341 2 * 1024 * 1024, 2342 2 * 1024 * 1024,
2342 btrfs_get_alloc_profile(root, 0), 2343 btrfs_get_alloc_profile(root, 0),
2343 CHUNK_ALLOC_NO_FORCE); 2344 CHUNK_ALLOC_NO_FORCE);
@@ -2347,18 +2348,19 @@ next:
2347 return count; 2348 return count;
2348} 2349}
2349 2350
2350static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, 2351static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
2352 struct btrfs_delayed_ref_root *delayed_refs,
2351 unsigned long num_refs, 2353 unsigned long num_refs,
2352 struct list_head *first_seq) 2354 struct list_head *first_seq)
2353{ 2355{
2354 spin_unlock(&delayed_refs->lock); 2356 spin_unlock(&delayed_refs->lock);
2355 pr_debug("waiting for more refs (num %ld, first %p)\n", 2357 pr_debug("waiting for more refs (num %ld, first %p)\n",
2356 num_refs, first_seq); 2358 num_refs, first_seq);
2357 wait_event(delayed_refs->seq_wait, 2359 wait_event(fs_info->tree_mod_seq_wait,
2358 num_refs != delayed_refs->num_entries || 2360 num_refs != delayed_refs->num_entries ||
2359 delayed_refs->seq_head.next != first_seq); 2361 fs_info->tree_mod_seq_list.next != first_seq);
2360 pr_debug("done waiting for more refs (num %ld, first %p)\n", 2362 pr_debug("done waiting for more refs (num %ld, first %p)\n",
2361 delayed_refs->num_entries, delayed_refs->seq_head.next); 2363 delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
2362 spin_lock(&delayed_refs->lock); 2364 spin_lock(&delayed_refs->lock);
2363} 2365}
2364 2366
@@ -2403,6 +2405,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2403again: 2405again:
2404 consider_waiting = 0; 2406 consider_waiting = 0;
2405 spin_lock(&delayed_refs->lock); 2407 spin_lock(&delayed_refs->lock);
2408
2406 if (count == 0) { 2409 if (count == 0) {
2407 count = delayed_refs->num_entries * 2; 2410 count = delayed_refs->num_entries * 2;
2408 run_most = 1; 2411 run_most = 1;
@@ -2437,7 +2440,7 @@ again:
2437 num_refs = delayed_refs->num_entries; 2440 num_refs = delayed_refs->num_entries;
2438 first_seq = root->fs_info->tree_mod_seq_list.next; 2441 first_seq = root->fs_info->tree_mod_seq_list.next;
2439 } else { 2442 } else {
2440 wait_for_more_refs(delayed_refs, 2443 wait_for_more_refs(root->fs_info, delayed_refs,
2441 num_refs, first_seq); 2444 num_refs, first_seq);
2442 /* 2445 /*
2443 * after waiting, things have changed. we 2446 * after waiting, things have changed. we
@@ -5190,8 +5193,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5190 rb_erase(&head->node.rb_node, &delayed_refs->root); 5193 rb_erase(&head->node.rb_node, &delayed_refs->root);
5191 5194
5192 delayed_refs->num_entries--; 5195 delayed_refs->num_entries--;
5193 if (waitqueue_active(&delayed_refs->seq_wait)) 5196 if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
5194 wake_up(&delayed_refs->seq_wait); 5197 wake_up(&root->fs_info->tree_mod_seq_wait);
5195 5198
5196 /* 5199 /*
5197 * we don't take a ref on the node because we're removing it from the 5200 * we don't take a ref on the node because we're removing it from the
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index b72b068183ec..621c8dc48fb6 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -38,7 +38,6 @@ void put_transaction(struct btrfs_transaction *transaction)
38 if (atomic_dec_and_test(&transaction->use_count)) { 38 if (atomic_dec_and_test(&transaction->use_count)) {
39 BUG_ON(!list_empty(&transaction->list)); 39 BUG_ON(!list_empty(&transaction->list));
40 WARN_ON(transaction->delayed_refs.root.rb_node); 40 WARN_ON(transaction->delayed_refs.root.rb_node);
41 WARN_ON(!list_empty(&transaction->delayed_refs.seq_head));
42 memset(transaction, 0, sizeof(*transaction)); 41 memset(transaction, 0, sizeof(*transaction));
43 kmem_cache_free(btrfs_transaction_cachep, transaction); 42 kmem_cache_free(btrfs_transaction_cachep, transaction);
44 } 43 }
@@ -126,7 +125,6 @@ loop:
126 cur_trans->delayed_refs.num_heads = 0; 125 cur_trans->delayed_refs.num_heads = 0;
127 cur_trans->delayed_refs.flushing = 0; 126 cur_trans->delayed_refs.flushing = 0;
128 cur_trans->delayed_refs.run_delayed_start = 0; 127 cur_trans->delayed_refs.run_delayed_start = 0;
129 cur_trans->delayed_refs.seq = 1;
130 128
131 /* 129 /*
132 * although the tree mod log is per file system and not per transaction, 130 * although the tree mod log is per file system and not per transaction,
@@ -145,10 +143,8 @@ loop:
145 } 143 }
146 atomic_set(&fs_info->tree_mod_seq, 0); 144 atomic_set(&fs_info->tree_mod_seq, 0);
147 145
148 init_waitqueue_head(&cur_trans->delayed_refs.seq_wait);
149 spin_lock_init(&cur_trans->commit_lock); 146 spin_lock_init(&cur_trans->commit_lock);
150 spin_lock_init(&cur_trans->delayed_refs.lock); 147 spin_lock_init(&cur_trans->delayed_refs.lock);
151 INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head);
152 148
153 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 149 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
154 list_add_tail(&cur_trans->list, &fs_info->trans_list); 150 list_add_tail(&cur_trans->list, &fs_info->trans_list);