aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/transaction.c
diff options
context:
space:
mode:
authorYan Zheng <zheng.yan@oracle.com>2009-06-10 10:45:14 -0400
committerChris Mason <chris.mason@oracle.com>2009-06-10 11:29:46 -0400
commit5d4f98a28c7d334091c1b7744f48a1acdd2a4ae0 (patch)
treec611d7d824cbcdb777dd2d8e33e2ed1c5df8a9c6 /fs/btrfs/transaction.c
parent5c939df56c3ea018b58e5aa76181284c2053d699 (diff)
Btrfs: Mixed back reference (FORWARD ROLLING FORMAT CHANGE)
This commit introduces a new kind of back reference for btrfs metadata. Once a filesystem has been mounted with this commit, IT WILL NO LONGER BE MOUNTABLE BY OLDER KERNELS. When a tree block in subvolume tree is cow'd, the reference counts of all extents it points to are increased by one. At transaction commit time, the old root of the subvolume is recorded in a "dead root" data structure, and the btree it points to is later walked, dropping reference counts and freeing any blocks where the reference count goes to 0. The increments done during cow and decrements done after commit cancel out, and the walk is a very expensive way to go about freeing the blocks that are no longer referenced by the new btree root. This commit reduces the transaction overhead by avoiding the need for dead root records. When a non-shared tree block is cow'd, we free the old block at once, and the new block inherits old block's references. When a tree block with reference count > 1 is cow'd, we increase the reference counts of all extents the new block points to by one, and decrease the old block's reference count by one. This dead tree avoidance code removes the need to modify the reference counts of lower level extents when a non-shared tree block is cow'd. But we still need to update back ref for all pointers in the block. This is because the location of the block is recorded in the back ref item. We can solve this by introducing a new type of back ref. The new back ref provides information about pointer's key, level and in which tree the pointer lives. This information allow us to find the pointer by searching the tree. The shortcoming of the new back ref is that it only works for pointers in tree blocks referenced by their owner trees. This is mostly a problem for snapshots, where resolving one of these fuzzy back references would be O(number_of_snapshots) and quite slow. The solution used here is to use the fuzzy back references in the common case where a given tree block is only referenced by one root, and use the full back references when multiple roots have a reference on a given block. This commit adds per subvolume red-black tree to keep trace of cached inodes. The red-black tree helps the balancing code to find cached inodes whose inode numbers within a given range. This commit improves the balancing code by introducing several data structures to keep the state of balancing. The most important one is the back ref cache. It caches how the upper level tree blocks are referenced. This greatly reduce the overhead of checking back ref. The improved balancing code scales significantly better with a large number of snapshots. This is a very large commit and was written in a number of pieces. But, they depend heavily on the disk format change and were squashed together to make sure git bisect didn't end up in a bad state wrt space balancing or the format change. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r--fs/btrfs/transaction.c410
1 files changed, 139 insertions, 271 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 01b143605ec1..2e177d7f4bb9 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -25,7 +25,6 @@
25#include "disk-io.h" 25#include "disk-io.h"
26#include "transaction.h" 26#include "transaction.h"
27#include "locking.h" 27#include "locking.h"
28#include "ref-cache.h"
29#include "tree-log.h" 28#include "tree-log.h"
30 29
31#define BTRFS_ROOT_TRANS_TAG 0 30#define BTRFS_ROOT_TRANS_TAG 0
@@ -94,45 +93,37 @@ static noinline int join_transaction(struct btrfs_root *root)
94 * to make sure the old root from before we joined the transaction is deleted 93 * to make sure the old root from before we joined the transaction is deleted
95 * when the transaction commits 94 * when the transaction commits
96 */ 95 */
97noinline int btrfs_record_root_in_trans(struct btrfs_root *root) 96static noinline int record_root_in_trans(struct btrfs_trans_handle *trans,
97 struct btrfs_root *root)
98{ 98{
99 struct btrfs_dirty_root *dirty; 99 if (root->ref_cows && root->last_trans < trans->transid) {
100 u64 running_trans_id = root->fs_info->running_transaction->transid;
101 if (root->ref_cows && root->last_trans < running_trans_id) {
102 WARN_ON(root == root->fs_info->extent_root); 100 WARN_ON(root == root->fs_info->extent_root);
103 if (root->root_item.refs != 0) { 101 WARN_ON(root->root_item.refs == 0);
104 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 102 WARN_ON(root->commit_root != root->node);
105 (unsigned long)root->root_key.objectid, 103
106 BTRFS_ROOT_TRANS_TAG); 104 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
107 105 (unsigned long)root->root_key.objectid,
108 dirty = kmalloc(sizeof(*dirty), GFP_NOFS); 106 BTRFS_ROOT_TRANS_TAG);
109 BUG_ON(!dirty); 107 root->last_trans = trans->transid;
110 dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); 108 btrfs_init_reloc_root(trans, root);
111 BUG_ON(!dirty->root); 109 }
112 dirty->latest_root = root; 110 return 0;
113 INIT_LIST_HEAD(&dirty->list); 111}
114
115 root->commit_root = btrfs_root_node(root);
116
117 memcpy(dirty->root, root, sizeof(*root));
118 spin_lock_init(&dirty->root->node_lock);
119 spin_lock_init(&dirty->root->list_lock);
120 mutex_init(&dirty->root->objectid_mutex);
121 mutex_init(&dirty->root->log_mutex);
122 INIT_LIST_HEAD(&dirty->root->dead_list);
123 dirty->root->node = root->commit_root;
124 dirty->root->commit_root = NULL;
125 112
126 spin_lock(&root->list_lock); 113int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
127 list_add(&dirty->root->dead_list, &root->dead_list); 114 struct btrfs_root *root)
128 spin_unlock(&root->list_lock); 115{
116 if (!root->ref_cows)
117 return 0;
129 118
130 root->dirty_root = dirty; 119 mutex_lock(&root->fs_info->trans_mutex);
131 } else { 120 if (root->last_trans == trans->transid) {
132 WARN_ON(1); 121 mutex_unlock(&root->fs_info->trans_mutex);
133 } 122 return 0;
134 root->last_trans = running_trans_id;
135 } 123 }
124
125 record_root_in_trans(trans, root);
126 mutex_unlock(&root->fs_info->trans_mutex);
136 return 0; 127 return 0;
137} 128}
138 129
@@ -181,7 +172,6 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
181 ret = join_transaction(root); 172 ret = join_transaction(root);
182 BUG_ON(ret); 173 BUG_ON(ret);
183 174
184 btrfs_record_root_in_trans(root);
185 h->transid = root->fs_info->running_transaction->transid; 175 h->transid = root->fs_info->running_transaction->transid;
186 h->transaction = root->fs_info->running_transaction; 176 h->transaction = root->fs_info->running_transaction;
187 h->blocks_reserved = num_blocks; 177 h->blocks_reserved = num_blocks;
@@ -192,6 +182,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
192 h->delayed_ref_updates = 0; 182 h->delayed_ref_updates = 0;
193 183
194 root->fs_info->running_transaction->use_count++; 184 root->fs_info->running_transaction->use_count++;
185 record_root_in_trans(h, root);
195 mutex_unlock(&root->fs_info->trans_mutex); 186 mutex_unlock(&root->fs_info->trans_mutex);
196 return h; 187 return h;
197} 188}
@@ -233,6 +224,7 @@ static noinline int wait_for_commit(struct btrfs_root *root,
233 return 0; 224 return 0;
234} 225}
235 226
227#if 0
236/* 228/*
237 * rate limit against the drop_snapshot code. This helps to slow down new 229 * rate limit against the drop_snapshot code. This helps to slow down new
238 * operations if the drop_snapshot code isn't able to keep up. 230 * operations if the drop_snapshot code isn't able to keep up.
@@ -273,6 +265,7 @@ harder:
273 goto harder; 265 goto harder;
274 } 266 }
275} 267}
268#endif
276 269
277void btrfs_throttle(struct btrfs_root *root) 270void btrfs_throttle(struct btrfs_root *root)
278{ 271{
@@ -280,7 +273,6 @@ void btrfs_throttle(struct btrfs_root *root)
280 if (!root->fs_info->open_ioctl_trans) 273 if (!root->fs_info->open_ioctl_trans)
281 wait_current_trans(root); 274 wait_current_trans(root);
282 mutex_unlock(&root->fs_info->trans_mutex); 275 mutex_unlock(&root->fs_info->trans_mutex);
283 throttle_on_drops(root);
284} 276}
285 277
286static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 278static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
@@ -323,9 +315,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
323 memset(trans, 0, sizeof(*trans)); 315 memset(trans, 0, sizeof(*trans));
324 kmem_cache_free(btrfs_trans_handle_cachep, trans); 316 kmem_cache_free(btrfs_trans_handle_cachep, trans);
325 317
326 if (throttle)
327 throttle_on_drops(root);
328
329 return 0; 318 return 0;
330} 319}
331 320
@@ -462,12 +451,8 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
462 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 451 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
463 if (old_root_bytenr == root->node->start) 452 if (old_root_bytenr == root->node->start)
464 break; 453 break;
465 btrfs_set_root_bytenr(&root->root_item,
466 root->node->start);
467 btrfs_set_root_level(&root->root_item,
468 btrfs_header_level(root->node));
469 btrfs_set_root_generation(&root->root_item, trans->transid);
470 454
455 btrfs_set_root_node(&root->root_item, root->node);
471 ret = btrfs_update_root(trans, tree_root, 456 ret = btrfs_update_root(trans, tree_root,
472 &root->root_key, 457 &root->root_key,
473 &root->root_item); 458 &root->root_item);
@@ -477,14 +462,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
477 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 462 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
478 BUG_ON(ret); 463 BUG_ON(ret);
479 } 464 }
465 free_extent_buffer(root->commit_root);
466 root->commit_root = btrfs_root_node(root);
480 return 0; 467 return 0;
481} 468}
482 469
483/* 470/*
484 * update all the cowonly tree roots on disk 471 * update all the cowonly tree roots on disk
485 */ 472 */
486int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, 473static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
487 struct btrfs_root *root) 474 struct btrfs_root *root)
488{ 475{
489 struct btrfs_fs_info *fs_info = root->fs_info; 476 struct btrfs_fs_info *fs_info = root->fs_info;
490 struct list_head *next; 477 struct list_head *next;
@@ -520,118 +507,54 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
520 * a dirty root struct and adds it into the list of dead roots that need to 507 * a dirty root struct and adds it into the list of dead roots that need to
521 * be deleted 508 * be deleted
522 */ 509 */
523int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) 510int btrfs_add_dead_root(struct btrfs_root *root)
524{ 511{
525 struct btrfs_dirty_root *dirty;
526
527 dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
528 if (!dirty)
529 return -ENOMEM;
530 dirty->root = root;
531 dirty->latest_root = latest;
532
533 mutex_lock(&root->fs_info->trans_mutex); 512 mutex_lock(&root->fs_info->trans_mutex);
534 list_add(&dirty->list, &latest->fs_info->dead_roots); 513 list_add(&root->root_list, &root->fs_info->dead_roots);
535 mutex_unlock(&root->fs_info->trans_mutex); 514 mutex_unlock(&root->fs_info->trans_mutex);
536 return 0; 515 return 0;
537} 516}
538 517
539/* 518/*
540 * at transaction commit time we need to schedule the old roots for 519 * update all the cowonly tree roots on disk
541 * deletion via btrfs_drop_snapshot. This runs through all the
542 * reference counted roots that were modified in the current
543 * transaction and puts them into the drop list
544 */ 520 */
545static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, 521static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
546 struct radix_tree_root *radix, 522 struct btrfs_root *root)
547 struct list_head *list)
548{ 523{
549 struct btrfs_dirty_root *dirty;
550 struct btrfs_root *gang[8]; 524 struct btrfs_root *gang[8];
551 struct btrfs_root *root; 525 struct btrfs_fs_info *fs_info = root->fs_info;
552 int i; 526 int i;
553 int ret; 527 int ret;
554 int err = 0; 528 int err = 0;
555 u32 refs;
556 529
557 while (1) { 530 while (1) {
558 ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, 531 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
532 (void **)gang, 0,
559 ARRAY_SIZE(gang), 533 ARRAY_SIZE(gang),
560 BTRFS_ROOT_TRANS_TAG); 534 BTRFS_ROOT_TRANS_TAG);
561 if (ret == 0) 535 if (ret == 0)
562 break; 536 break;
563 for (i = 0; i < ret; i++) { 537 for (i = 0; i < ret; i++) {
564 root = gang[i]; 538 root = gang[i];
565 radix_tree_tag_clear(radix, 539 radix_tree_tag_clear(&fs_info->fs_roots_radix,
566 (unsigned long)root->root_key.objectid, 540 (unsigned long)root->root_key.objectid,
567 BTRFS_ROOT_TRANS_TAG); 541 BTRFS_ROOT_TRANS_TAG);
568
569 BUG_ON(!root->ref_tree);
570 dirty = root->dirty_root;
571 542
572 btrfs_free_log(trans, root); 543 btrfs_free_log(trans, root);
573 btrfs_free_reloc_root(trans, root); 544 btrfs_update_reloc_root(trans, root);
574
575 if (root->commit_root == root->node) {
576 WARN_ON(root->node->start !=
577 btrfs_root_bytenr(&root->root_item));
578
579 free_extent_buffer(root->commit_root);
580 root->commit_root = NULL;
581 root->dirty_root = NULL;
582
583 spin_lock(&root->list_lock);
584 list_del_init(&dirty->root->dead_list);
585 spin_unlock(&root->list_lock);
586 545
587 kfree(dirty->root); 546 if (root->commit_root == root->node)
588 kfree(dirty);
589
590 /* make sure to update the root on disk
591 * so we get any updates to the block used
592 * counts
593 */
594 err = btrfs_update_root(trans,
595 root->fs_info->tree_root,
596 &root->root_key,
597 &root->root_item);
598 continue; 547 continue;
599 }
600 548
601 memset(&root->root_item.drop_progress, 0, 549 free_extent_buffer(root->commit_root);
602 sizeof(struct btrfs_disk_key)); 550 root->commit_root = btrfs_root_node(root);
603 root->root_item.drop_level = 0; 551
604 root->commit_root = NULL; 552 btrfs_set_root_node(&root->root_item, root->node);
605 root->dirty_root = NULL; 553 err = btrfs_update_root(trans, fs_info->tree_root,
606 root->root_key.offset = root->fs_info->generation;
607 btrfs_set_root_bytenr(&root->root_item,
608 root->node->start);
609 btrfs_set_root_level(&root->root_item,
610 btrfs_header_level(root->node));
611 btrfs_set_root_generation(&root->root_item,
612 root->root_key.offset);
613
614 err = btrfs_insert_root(trans, root->fs_info->tree_root,
615 &root->root_key, 554 &root->root_key,
616 &root->root_item); 555 &root->root_item);
617 if (err) 556 if (err)
618 break; 557 break;
619
620 refs = btrfs_root_refs(&dirty->root->root_item);
621 btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
622 err = btrfs_update_root(trans, root->fs_info->tree_root,
623 &dirty->root->root_key,
624 &dirty->root->root_item);
625
626 BUG_ON(err);
627 if (refs == 1) {
628 list_add(&dirty->list, list);
629 } else {
630 WARN_ON(1);
631 free_extent_buffer(dirty->root->node);
632 kfree(dirty->root);
633 kfree(dirty);
634 }
635 } 558 }
636 } 559 }
637 return err; 560 return err;
@@ -688,12 +611,8 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
688 TASK_UNINTERRUPTIBLE); 611 TASK_UNINTERRUPTIBLE);
689 mutex_unlock(&info->trans_mutex); 612 mutex_unlock(&info->trans_mutex);
690 613
691 atomic_dec(&info->throttles);
692 wake_up(&info->transaction_throttle);
693
694 schedule(); 614 schedule();
695 615
696 atomic_inc(&info->throttles);
697 mutex_lock(&info->trans_mutex); 616 mutex_lock(&info->trans_mutex);
698 finish_wait(&info->transaction_wait, &wait); 617 finish_wait(&info->transaction_wait, &wait);
699 } 618 }
@@ -705,111 +624,61 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
705 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on 624 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
706 * all of them 625 * all of them
707 */ 626 */
708static noinline int drop_dirty_roots(struct btrfs_root *tree_root, 627int btrfs_drop_dead_root(struct btrfs_root *root)
709 struct list_head *list)
710{ 628{
711 struct btrfs_dirty_root *dirty;
712 struct btrfs_trans_handle *trans; 629 struct btrfs_trans_handle *trans;
630 struct btrfs_root *tree_root = root->fs_info->tree_root;
713 unsigned long nr; 631 unsigned long nr;
714 u64 num_bytes; 632 int ret;
715 u64 bytes_used;
716 u64 max_useless;
717 int ret = 0;
718 int err;
719
720 while (!list_empty(list)) {
721 struct btrfs_root *root;
722
723 dirty = list_entry(list->prev, struct btrfs_dirty_root, list);
724 list_del_init(&dirty->list);
725
726 num_bytes = btrfs_root_used(&dirty->root->root_item);
727 root = dirty->latest_root;
728 atomic_inc(&root->fs_info->throttles);
729
730 while (1) {
731 /*
732 * we don't want to jump in and create a bunch of
733 * delayed refs if the transaction is starting to close
734 */
735 wait_transaction_pre_flush(tree_root->fs_info);
736 trans = btrfs_start_transaction(tree_root, 1);
737
738 /*
739 * we've joined a transaction, make sure it isn't
740 * closing right now
741 */
742 if (trans->transaction->delayed_refs.flushing) {
743 btrfs_end_transaction(trans, tree_root);
744 continue;
745 }
746
747 mutex_lock(&root->fs_info->drop_mutex);
748 ret = btrfs_drop_snapshot(trans, dirty->root);
749 if (ret != -EAGAIN)
750 break;
751 mutex_unlock(&root->fs_info->drop_mutex);
752 633
753 err = btrfs_update_root(trans, 634 while (1) {
754 tree_root, 635 /*
755 &dirty->root->root_key, 636 * we don't want to jump in and create a bunch of
756 &dirty->root->root_item); 637 * delayed refs if the transaction is starting to close
757 if (err) 638 */
758 ret = err; 639 wait_transaction_pre_flush(tree_root->fs_info);
759 nr = trans->blocks_used; 640 trans = btrfs_start_transaction(tree_root, 1);
760 ret = btrfs_end_transaction(trans, tree_root);
761 BUG_ON(ret);
762 641
763 btrfs_btree_balance_dirty(tree_root, nr); 642 /*
764 cond_resched(); 643 * we've joined a transaction, make sure it isn't
644 * closing right now
645 */
646 if (trans->transaction->delayed_refs.flushing) {
647 btrfs_end_transaction(trans, tree_root);
648 continue;
765 } 649 }
766 BUG_ON(ret);
767 atomic_dec(&root->fs_info->throttles);
768 wake_up(&root->fs_info->transaction_throttle);
769 650
770 num_bytes -= btrfs_root_used(&dirty->root->root_item); 651 ret = btrfs_drop_snapshot(trans, root);
771 bytes_used = btrfs_root_used(&root->root_item); 652 if (ret != -EAGAIN)
772 if (num_bytes) { 653 break;
773 mutex_lock(&root->fs_info->trans_mutex);
774 btrfs_record_root_in_trans(root);
775 mutex_unlock(&root->fs_info->trans_mutex);
776 btrfs_set_root_used(&root->root_item,
777 bytes_used - num_bytes);
778 }
779 654
780 ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); 655 ret = btrfs_update_root(trans, tree_root,
781 if (ret) { 656 &root->root_key,
782 BUG(); 657 &root->root_item);
658 if (ret)
783 break; 659 break;
784 }
785 mutex_unlock(&root->fs_info->drop_mutex);
786
787 spin_lock(&root->list_lock);
788 list_del_init(&dirty->root->dead_list);
789 if (!list_empty(&root->dead_list)) {
790 struct btrfs_root *oldest;
791 oldest = list_entry(root->dead_list.prev,
792 struct btrfs_root, dead_list);
793 max_useless = oldest->root_key.offset - 1;
794 } else {
795 max_useless = root->root_key.offset - 1;
796 }
797 spin_unlock(&root->list_lock);
798 660
799 nr = trans->blocks_used; 661 nr = trans->blocks_used;
800 ret = btrfs_end_transaction(trans, tree_root); 662 ret = btrfs_end_transaction(trans, tree_root);
801 BUG_ON(ret); 663 BUG_ON(ret);
802 664
803 ret = btrfs_remove_leaf_refs(root, max_useless, 0);
804 BUG_ON(ret);
805
806 free_extent_buffer(dirty->root->node);
807 kfree(dirty->root);
808 kfree(dirty);
809
810 btrfs_btree_balance_dirty(tree_root, nr); 665 btrfs_btree_balance_dirty(tree_root, nr);
811 cond_resched(); 666 cond_resched();
812 } 667 }
668 BUG_ON(ret);
669
670 ret = btrfs_del_root(trans, tree_root, &root->root_key);
671 BUG_ON(ret);
672
673 nr = trans->blocks_used;
674 ret = btrfs_end_transaction(trans, tree_root);
675 BUG_ON(ret);
676
677 free_extent_buffer(root->node);
678 free_extent_buffer(root->commit_root);
679 kfree(root);
680
681 btrfs_btree_balance_dirty(tree_root, nr);
813 return ret; 682 return ret;
814} 683}
815 684
@@ -839,24 +708,23 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
839 if (ret) 708 if (ret)
840 goto fail; 709 goto fail;
841 710
842 btrfs_record_root_in_trans(root); 711 record_root_in_trans(trans, root);
843 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 712 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
844 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 713 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
845 714
846 key.objectid = objectid; 715 key.objectid = objectid;
847 key.offset = trans->transid; 716 key.offset = 0;
848 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 717 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
849 718
850 old = btrfs_lock_root_node(root); 719 old = btrfs_lock_root_node(root);
851 btrfs_cow_block(trans, root, old, NULL, 0, &old); 720 btrfs_cow_block(trans, root, old, NULL, 0, &old);
721 btrfs_set_lock_blocking(old);
852 722
853 btrfs_copy_root(trans, root, old, &tmp, objectid); 723 btrfs_copy_root(trans, root, old, &tmp, objectid);
854 btrfs_tree_unlock(old); 724 btrfs_tree_unlock(old);
855 free_extent_buffer(old); 725 free_extent_buffer(old);
856 726
857 btrfs_set_root_bytenr(new_root_item, tmp->start); 727 btrfs_set_root_node(new_root_item, tmp);
858 btrfs_set_root_level(new_root_item, btrfs_header_level(tmp));
859 btrfs_set_root_generation(new_root_item, trans->transid);
860 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 728 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
861 new_root_item); 729 new_root_item);
862 btrfs_tree_unlock(tmp); 730 btrfs_tree_unlock(tmp);
@@ -964,6 +832,24 @@ static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans,
964 return 0; 832 return 0;
965} 833}
966 834
835static void update_super_roots(struct btrfs_root *root)
836{
837 struct btrfs_root_item *root_item;
838 struct btrfs_super_block *super;
839
840 super = &root->fs_info->super_copy;
841
842 root_item = &root->fs_info->chunk_root->root_item;
843 super->chunk_root = root_item->bytenr;
844 super->chunk_root_generation = root_item->generation;
845 super->chunk_root_level = root_item->level;
846
847 root_item = &root->fs_info->tree_root->root_item;
848 super->root = root_item->bytenr;
849 super->generation = root_item->generation;
850 super->root_level = root_item->level;
851}
852
967int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 853int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
968 struct btrfs_root *root) 854 struct btrfs_root *root)
969{ 855{
@@ -971,8 +857,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
971 unsigned long timeout = 1; 857 unsigned long timeout = 1;
972 struct btrfs_transaction *cur_trans; 858 struct btrfs_transaction *cur_trans;
973 struct btrfs_transaction *prev_trans = NULL; 859 struct btrfs_transaction *prev_trans = NULL;
974 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
975 struct list_head dirty_fs_roots;
976 struct extent_io_tree *pinned_copy; 860 struct extent_io_tree *pinned_copy;
977 DEFINE_WAIT(wait); 861 DEFINE_WAIT(wait);
978 int ret; 862 int ret;
@@ -999,7 +883,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
999 BUG_ON(ret); 883 BUG_ON(ret);
1000 884
1001 mutex_lock(&root->fs_info->trans_mutex); 885 mutex_lock(&root->fs_info->trans_mutex);
1002 INIT_LIST_HEAD(&dirty_fs_roots);
1003 if (cur_trans->in_commit) { 886 if (cur_trans->in_commit) {
1004 cur_trans->use_count++; 887 cur_trans->use_count++;
1005 mutex_unlock(&root->fs_info->trans_mutex); 888 mutex_unlock(&root->fs_info->trans_mutex);
@@ -1105,41 +988,36 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1105 * with the tree-log code. 988 * with the tree-log code.
1106 */ 989 */
1107 mutex_lock(&root->fs_info->tree_log_mutex); 990 mutex_lock(&root->fs_info->tree_log_mutex);
1108 /*
1109 * keep tree reloc code from adding new reloc trees
1110 */
1111 mutex_lock(&root->fs_info->tree_reloc_mutex);
1112
1113 991
1114 ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, 992 ret = commit_fs_roots(trans, root);
1115 &dirty_fs_roots);
1116 BUG_ON(ret); 993 BUG_ON(ret);
1117 994
1118 /* add_dirty_roots gets rid of all the tree log roots, it is now 995 /* commit_fs_roots gets rid of all the tree log roots, it is now
1119 * safe to free the root of tree log roots 996 * safe to free the root of tree log roots
1120 */ 997 */
1121 btrfs_free_log_root_tree(trans, root->fs_info); 998 btrfs_free_log_root_tree(trans, root->fs_info);
1122 999
1123 ret = btrfs_commit_tree_roots(trans, root); 1000 ret = commit_cowonly_roots(trans, root);
1124 BUG_ON(ret); 1001 BUG_ON(ret);
1125 1002
1126 cur_trans = root->fs_info->running_transaction; 1003 cur_trans = root->fs_info->running_transaction;
1127 spin_lock(&root->fs_info->new_trans_lock); 1004 spin_lock(&root->fs_info->new_trans_lock);
1128 root->fs_info->running_transaction = NULL; 1005 root->fs_info->running_transaction = NULL;
1129 spin_unlock(&root->fs_info->new_trans_lock); 1006 spin_unlock(&root->fs_info->new_trans_lock);
1130 btrfs_set_super_generation(&root->fs_info->super_copy, 1007
1131 cur_trans->transid); 1008 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1132 btrfs_set_super_root(&root->fs_info->super_copy, 1009 root->fs_info->tree_root->node);
1133 root->fs_info->tree_root->node->start); 1010 free_extent_buffer(root->fs_info->tree_root->commit_root);
1134 btrfs_set_super_root_level(&root->fs_info->super_copy, 1011 root->fs_info->tree_root->commit_root =
1135 btrfs_header_level(root->fs_info->tree_root->node)); 1012 btrfs_root_node(root->fs_info->tree_root);
1136 1013
1137 btrfs_set_super_chunk_root(&root->fs_info->super_copy, 1014 btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
1138 chunk_root->node->start); 1015 root->fs_info->chunk_root->node);
1139 btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, 1016 free_extent_buffer(root->fs_info->chunk_root->commit_root);
1140 btrfs_header_level(chunk_root->node)); 1017 root->fs_info->chunk_root->commit_root =
1141 btrfs_set_super_chunk_root_generation(&root->fs_info->super_copy, 1018 btrfs_root_node(root->fs_info->chunk_root);
1142 btrfs_header_generation(chunk_root->node)); 1019
1020 update_super_roots(root);
1143 1021
1144 if (!root->fs_info->log_root_recovering) { 1022 if (!root->fs_info->log_root_recovering) {
1145 btrfs_set_super_log_root(&root->fs_info->super_copy, 0); 1023 btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
@@ -1153,7 +1031,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1153 1031
1154 trans->transaction->blocked = 0; 1032 trans->transaction->blocked = 0;
1155 1033
1156 wake_up(&root->fs_info->transaction_throttle);
1157 wake_up(&root->fs_info->transaction_wait); 1034 wake_up(&root->fs_info->transaction_wait);
1158 1035
1159 mutex_unlock(&root->fs_info->trans_mutex); 1036 mutex_unlock(&root->fs_info->trans_mutex);
@@ -1170,9 +1047,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1170 btrfs_finish_extent_commit(trans, root, pinned_copy); 1047 btrfs_finish_extent_commit(trans, root, pinned_copy);
1171 kfree(pinned_copy); 1048 kfree(pinned_copy);
1172 1049
1173 btrfs_drop_dead_reloc_roots(root);
1174 mutex_unlock(&root->fs_info->tree_reloc_mutex);
1175
1176 /* do the directory inserts of any pending snapshot creations */ 1050 /* do the directory inserts of any pending snapshot creations */
1177 finish_pending_snapshots(trans, root->fs_info); 1051 finish_pending_snapshots(trans, root->fs_info);
1178 1052
@@ -1186,16 +1060,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1186 put_transaction(cur_trans); 1060 put_transaction(cur_trans);
1187 put_transaction(cur_trans); 1061 put_transaction(cur_trans);
1188 1062
1189 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
1190 if (root->fs_info->closing)
1191 list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
1192
1193 mutex_unlock(&root->fs_info->trans_mutex); 1063 mutex_unlock(&root->fs_info->trans_mutex);
1194 1064
1195 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1065 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1196
1197 if (root->fs_info->closing)
1198 drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
1199 return ret; 1066 return ret;
1200} 1067}
1201 1068
@@ -1204,16 +1071,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1204 */ 1071 */
1205int btrfs_clean_old_snapshots(struct btrfs_root *root) 1072int btrfs_clean_old_snapshots(struct btrfs_root *root)
1206{ 1073{
1207 struct list_head dirty_roots; 1074 LIST_HEAD(list);
1208 INIT_LIST_HEAD(&dirty_roots); 1075 struct btrfs_fs_info *fs_info = root->fs_info;
1209again: 1076
1210 mutex_lock(&root->fs_info->trans_mutex); 1077 mutex_lock(&fs_info->trans_mutex);
1211 list_splice_init(&root->fs_info->dead_roots, &dirty_roots); 1078 list_splice_init(&fs_info->dead_roots, &list);
1212 mutex_unlock(&root->fs_info->trans_mutex); 1079 mutex_unlock(&fs_info->trans_mutex);
1213 1080
1214 if (!list_empty(&dirty_roots)) { 1081 while (!list_empty(&list)) {
1215 drop_dirty_roots(root, &dirty_roots); 1082 root = list_entry(list.next, struct btrfs_root, root_list);
1216 goto again; 1083 list_del_init(&root->root_list);
1084 btrfs_drop_dead_root(root);
1217 } 1085 }
1218 return 0; 1086 return 0;
1219} 1087}