aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-03-13 10:10:06 -0400
committerChris Mason <chris.mason@oracle.com>2009-03-24 16:14:25 -0400
commit56bec294dea971335d4466b30f2d959f28f6e36d (patch)
treefc0b5bbf4bb6ab35582a4c7f58f5ac88f71c38bf /fs/btrfs/extent-tree.c
parent9fa8cfe706f9c20067c042a064999d5825a35330 (diff)
Btrfs: do extent allocation and reference count updates in the background
The extent allocation tree maintains a reference count and full back reference information for every extent allocated in the filesystem. For subvolume and snapshot trees, every time a block goes through COW, the new copy of the block adds a reference on every block it points to. If a btree node points to 150 leaves, then the COW code needs to go and add backrefs on 150 different extents, which might be spread all over the extent allocation tree. These updates currently happen during btrfs_cow_block, and most COWs happen during btrfs_search_slot. btrfs_search_slot has locks held on both the parent and the node we are COWing, and so we really want to avoid IO during the COW if we can. This commit adds an rbtree of pending reference count updates and extent allocations. The tree is ordered by byte number of the extent and byte number of the parent for the back reference. The tree allows us to: 1) Modify back references in something close to disk order, reducing seeks 2) Significantly reduce the number of modifications made as block pointers are balanced around 3) Do all of the extent insertion and back reference modifications outside of the performance critical btrfs_search_slot code. #3 has the added benefit of greatly reducing the btrfs stack footprint. The extent allocation tree modifications are done without the deep (and somewhat recursive) call chains used in the past. These delayed back reference updates must be done before the transaction commits, and so the rbtree is tied to the transaction. Throttling is implemented to help keep the queue of backrefs at a reasonable size. Since there was a similar mechanism in place for the extent tree extents, that is removed and replaced by the delayed reference tree. Yan Zheng <yan.zheng@oracle.com> helped review and fixup this code. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c1496
1 files changed, 389 insertions, 1107 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fefe83ad2059..9b5da2b013e4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -49,10 +49,13 @@ struct pending_extent_op {
49 int del; 49 int del;
50}; 50};
51 51
52static int finish_current_insert(struct btrfs_trans_handle *trans, 52static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
53 struct btrfs_root *extent_root, int all); 53 struct btrfs_root *root, u64 parent,
54static int del_pending_extents(struct btrfs_trans_handle *trans, 54 u64 root_objectid, u64 ref_generation,
55 struct btrfs_root *extent_root, int all); 55 u64 owner, struct btrfs_key *ins,
56 int ref_mod);
57static int update_reserved_extents(struct btrfs_root *root,
58 u64 bytenr, u64 num, int reserve);
56static int pin_down_bytes(struct btrfs_trans_handle *trans, 59static int pin_down_bytes(struct btrfs_trans_handle *trans,
57 struct btrfs_root *root, 60 struct btrfs_root *root,
58 u64 bytenr, u64 num_bytes, int is_data); 61 u64 bytenr, u64 num_bytes, int is_data);
@@ -60,6 +63,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
60 struct btrfs_root *root, 63 struct btrfs_root *root,
61 u64 bytenr, u64 num_bytes, int alloc, 64 u64 bytenr, u64 num_bytes, int alloc,
62 int mark_free); 65 int mark_free);
66static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans,
67 struct btrfs_root *root,
68 u64 bytenr, u64 num_bytes, u64 parent,
69 u64 root_objectid, u64 ref_generation,
70 u64 owner_objectid, int pin,
71 int ref_to_drop);
63 72
64static int do_chunk_alloc(struct btrfs_trans_handle *trans, 73static int do_chunk_alloc(struct btrfs_trans_handle *trans,
65 struct btrfs_root *extent_root, u64 alloc_bytes, 74 struct btrfs_root *extent_root, u64 alloc_bytes,
@@ -554,262 +563,13 @@ out:
554 return ret; 563 return ret;
555} 564}
556 565
557/*
558 * updates all the backrefs that are pending on update_list for the
559 * extent_root
560 */
561static noinline int update_backrefs(struct btrfs_trans_handle *trans,
562 struct btrfs_root *extent_root,
563 struct btrfs_path *path,
564 struct list_head *update_list)
565{
566 struct btrfs_key key;
567 struct btrfs_extent_ref *ref;
568 struct btrfs_fs_info *info = extent_root->fs_info;
569 struct pending_extent_op *op;
570 struct extent_buffer *leaf;
571 int ret = 0;
572 struct list_head *cur = update_list->next;
573 u64 ref_objectid;
574 u64 ref_root = extent_root->root_key.objectid;
575
576 op = list_entry(cur, struct pending_extent_op, list);
577
578search:
579 key.objectid = op->bytenr;
580 key.type = BTRFS_EXTENT_REF_KEY;
581 key.offset = op->orig_parent;
582
583 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
584 BUG_ON(ret);
585
586 leaf = path->nodes[0];
587
588loop:
589 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
590
591 ref_objectid = btrfs_ref_objectid(leaf, ref);
592
593 if (btrfs_ref_root(leaf, ref) != ref_root ||
594 btrfs_ref_generation(leaf, ref) != op->orig_generation ||
595 (ref_objectid != op->level &&
596 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
597 printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, "
598 "root %llu, owner %u\n",
599 (unsigned long long)op->bytenr,
600 (unsigned long long)op->orig_parent,
601 (unsigned long long)ref_root, op->level);
602 btrfs_print_leaf(extent_root, leaf);
603 BUG();
604 }
605
606 key.objectid = op->bytenr;
607 key.offset = op->parent;
608 key.type = BTRFS_EXTENT_REF_KEY;
609 ret = btrfs_set_item_key_safe(trans, extent_root, path, &key);
610 BUG_ON(ret);
611 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
612 btrfs_set_ref_generation(leaf, ref, op->generation);
613
614 cur = cur->next;
615
616 list_del_init(&op->list);
617 unlock_extent(&info->extent_ins, op->bytenr,
618 op->bytenr + op->num_bytes - 1, GFP_NOFS);
619 kfree(op);
620
621 if (cur == update_list) {
622 btrfs_mark_buffer_dirty(path->nodes[0]);
623 btrfs_release_path(extent_root, path);
624 goto out;
625 }
626
627 op = list_entry(cur, struct pending_extent_op, list);
628
629 path->slots[0]++;
630 while (path->slots[0] < btrfs_header_nritems(leaf)) {
631 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
632 if (key.objectid == op->bytenr &&
633 key.type == BTRFS_EXTENT_REF_KEY)
634 goto loop;
635 path->slots[0]++;
636 }
637
638 btrfs_mark_buffer_dirty(path->nodes[0]);
639 btrfs_release_path(extent_root, path);
640 goto search;
641
642out:
643 return 0;
644}
645
646static noinline int insert_extents(struct btrfs_trans_handle *trans,
647 struct btrfs_root *extent_root,
648 struct btrfs_path *path,
649 struct list_head *insert_list, int nr)
650{
651 struct btrfs_key *keys;
652 u32 *data_size;
653 struct pending_extent_op *op;
654 struct extent_buffer *leaf;
655 struct list_head *cur = insert_list->next;
656 struct btrfs_fs_info *info = extent_root->fs_info;
657 u64 ref_root = extent_root->root_key.objectid;
658 int i = 0, last = 0, ret;
659 int total = nr * 2;
660
661 if (!nr)
662 return 0;
663
664 keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS);
665 if (!keys)
666 return -ENOMEM;
667
668 data_size = kzalloc(total * sizeof(u32), GFP_NOFS);
669 if (!data_size) {
670 kfree(keys);
671 return -ENOMEM;
672 }
673
674 list_for_each_entry(op, insert_list, list) {
675 keys[i].objectid = op->bytenr;
676 keys[i].offset = op->num_bytes;
677 keys[i].type = BTRFS_EXTENT_ITEM_KEY;
678 data_size[i] = sizeof(struct btrfs_extent_item);
679 i++;
680
681 keys[i].objectid = op->bytenr;
682 keys[i].offset = op->parent;
683 keys[i].type = BTRFS_EXTENT_REF_KEY;
684 data_size[i] = sizeof(struct btrfs_extent_ref);
685 i++;
686 }
687
688 op = list_entry(cur, struct pending_extent_op, list);
689 i = 0;
690 while (i < total) {
691 int c;
692 ret = btrfs_insert_some_items(trans, extent_root, path,
693 keys+i, data_size+i, total-i);
694 BUG_ON(ret < 0);
695
696 if (last && ret > 1)
697 BUG();
698
699 leaf = path->nodes[0];
700 for (c = 0; c < ret; c++) {
701 int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY;
702
703 /*
704 * if the first item we inserted was a backref, then
705 * the EXTENT_ITEM will be the odd c's, else it will
706 * be the even c's
707 */
708 if ((ref_first && (c % 2)) ||
709 (!ref_first && !(c % 2))) {
710 struct btrfs_extent_item *itm;
711
712 itm = btrfs_item_ptr(leaf, path->slots[0] + c,
713 struct btrfs_extent_item);
714 btrfs_set_extent_refs(path->nodes[0], itm, 1);
715 op->del++;
716 } else {
717 struct btrfs_extent_ref *ref;
718
719 ref = btrfs_item_ptr(leaf, path->slots[0] + c,
720 struct btrfs_extent_ref);
721 btrfs_set_ref_root(leaf, ref, ref_root);
722 btrfs_set_ref_generation(leaf, ref,
723 op->generation);
724 btrfs_set_ref_objectid(leaf, ref, op->level);
725 btrfs_set_ref_num_refs(leaf, ref, 1);
726 op->del++;
727 }
728
729 /*
730 * using del to see when its ok to free up the
731 * pending_extent_op. In the case where we insert the
732 * last item on the list in order to help do batching
733 * we need to not free the extent op until we actually
734 * insert the extent_item
735 */
736 if (op->del == 2) {
737 unlock_extent(&info->extent_ins, op->bytenr,
738 op->bytenr + op->num_bytes - 1,
739 GFP_NOFS);
740 cur = cur->next;
741 list_del_init(&op->list);
742 kfree(op);
743 if (cur != insert_list)
744 op = list_entry(cur,
745 struct pending_extent_op,
746 list);
747 }
748 }
749 btrfs_mark_buffer_dirty(leaf);
750 btrfs_release_path(extent_root, path);
751
752 /*
753 * Ok backref's and items usually go right next to eachother,
754 * but if we could only insert 1 item that means that we
755 * inserted on the end of a leaf, and we have no idea what may
756 * be on the next leaf so we just play it safe. In order to
757 * try and help this case we insert the last thing on our
758 * insert list so hopefully it will end up being the last
759 * thing on the leaf and everything else will be before it,
760 * which will let us insert a whole bunch of items at the same
761 * time.
762 */
763 if (ret == 1 && !last && (i + ret < total)) {
764 /*
765 * last: where we will pick up the next time around
766 * i: our current key to insert, will be total - 1
767 * cur: the current op we are screwing with
768 * op: duh
769 */
770 last = i + ret;
771 i = total - 1;
772 cur = insert_list->prev;
773 op = list_entry(cur, struct pending_extent_op, list);
774 } else if (last) {
775 /*
776 * ok we successfully inserted the last item on the
777 * list, lets reset everything
778 *
779 * i: our current key to insert, so where we left off
780 * last time
781 * last: done with this
782 * cur: the op we are messing with
783 * op: duh
784 * total: since we inserted the last key, we need to
785 * decrement total so we dont overflow
786 */
787 i = last;
788 last = 0;
789 total--;
790 if (i < total) {
791 cur = insert_list->next;
792 op = list_entry(cur, struct pending_extent_op,
793 list);
794 }
795 } else {
796 i += ret;
797 }
798
799 cond_resched();
800 }
801 ret = 0;
802 kfree(keys);
803 kfree(data_size);
804 return ret;
805}
806
807static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, 566static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
808 struct btrfs_root *root, 567 struct btrfs_root *root,
809 struct btrfs_path *path, 568 struct btrfs_path *path,
810 u64 bytenr, u64 parent, 569 u64 bytenr, u64 parent,
811 u64 ref_root, u64 ref_generation, 570 u64 ref_root, u64 ref_generation,
812 u64 owner_objectid) 571 u64 owner_objectid,
572 int refs_to_add)
813{ 573{
814 struct btrfs_key key; 574 struct btrfs_key key;
815 struct extent_buffer *leaf; 575 struct extent_buffer *leaf;
@@ -829,9 +589,10 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
829 btrfs_set_ref_root(leaf, ref, ref_root); 589 btrfs_set_ref_root(leaf, ref, ref_root);
830 btrfs_set_ref_generation(leaf, ref, ref_generation); 590 btrfs_set_ref_generation(leaf, ref, ref_generation);
831 btrfs_set_ref_objectid(leaf, ref, owner_objectid); 591 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
832 btrfs_set_ref_num_refs(leaf, ref, 1); 592 btrfs_set_ref_num_refs(leaf, ref, refs_to_add);
833 } else if (ret == -EEXIST) { 593 } else if (ret == -EEXIST) {
834 u64 existing_owner; 594 u64 existing_owner;
595
835 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); 596 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
836 leaf = path->nodes[0]; 597 leaf = path->nodes[0];
837 ref = btrfs_item_ptr(leaf, path->slots[0], 598 ref = btrfs_item_ptr(leaf, path->slots[0],
@@ -845,7 +606,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
845 606
846 num_refs = btrfs_ref_num_refs(leaf, ref); 607 num_refs = btrfs_ref_num_refs(leaf, ref);
847 BUG_ON(num_refs == 0); 608 BUG_ON(num_refs == 0);
848 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); 609 btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add);
849 610
850 existing_owner = btrfs_ref_objectid(leaf, ref); 611 existing_owner = btrfs_ref_objectid(leaf, ref);
851 if (existing_owner != owner_objectid && 612 if (existing_owner != owner_objectid &&
@@ -865,7 +626,8 @@ out:
865 626
866static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, 627static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
867 struct btrfs_root *root, 628 struct btrfs_root *root,
868 struct btrfs_path *path) 629 struct btrfs_path *path,
630 int refs_to_drop)
869{ 631{
870 struct extent_buffer *leaf; 632 struct extent_buffer *leaf;
871 struct btrfs_extent_ref *ref; 633 struct btrfs_extent_ref *ref;
@@ -875,8 +637,8 @@ static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
875 leaf = path->nodes[0]; 637 leaf = path->nodes[0];
876 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 638 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
877 num_refs = btrfs_ref_num_refs(leaf, ref); 639 num_refs = btrfs_ref_num_refs(leaf, ref);
878 BUG_ON(num_refs == 0); 640 BUG_ON(num_refs < refs_to_drop);
879 num_refs -= 1; 641 num_refs -= refs_to_drop;
880 if (num_refs == 0) { 642 if (num_refs == 0) {
881 ret = btrfs_del_item(trans, root, path); 643 ret = btrfs_del_item(trans, root, path);
882 } else { 644 } else {
@@ -927,332 +689,28 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
927#endif 689#endif
928} 690}
929 691
930static noinline int free_extents(struct btrfs_trans_handle *trans,
931 struct btrfs_root *extent_root,
932 struct list_head *del_list)
933{
934 struct btrfs_fs_info *info = extent_root->fs_info;
935 struct btrfs_path *path;
936 struct btrfs_key key, found_key;
937 struct extent_buffer *leaf;
938 struct list_head *cur;
939 struct pending_extent_op *op;
940 struct btrfs_extent_item *ei;
941 int ret, num_to_del, extent_slot = 0, found_extent = 0;
942 u32 refs;
943 u64 bytes_freed = 0;
944
945 path = btrfs_alloc_path();
946 if (!path)
947 return -ENOMEM;
948 path->reada = 1;
949
950search:
951 /* search for the backref for the current ref we want to delete */
952 cur = del_list->next;
953 op = list_entry(cur, struct pending_extent_op, list);
954 ret = lookup_extent_backref(trans, extent_root, path, op->bytenr,
955 op->orig_parent,
956 extent_root->root_key.objectid,
957 op->orig_generation, op->level, 1);
958 if (ret) {
959 printk(KERN_ERR "btrfs unable to find backref byte nr %llu "
960 "root %llu gen %llu owner %u\n",
961 (unsigned long long)op->bytenr,
962 (unsigned long long)extent_root->root_key.objectid,
963 (unsigned long long)op->orig_generation, op->level);
964 btrfs_print_leaf(extent_root, path->nodes[0]);
965 WARN_ON(1);
966 goto out;
967 }
968
969 extent_slot = path->slots[0];
970 num_to_del = 1;
971 found_extent = 0;
972
973 /*
974 * if we aren't the first item on the leaf we can move back one and see
975 * if our ref is right next to our extent item
976 */
977 if (likely(extent_slot)) {
978 extent_slot--;
979 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
980 extent_slot);
981 if (found_key.objectid == op->bytenr &&
982 found_key.type == BTRFS_EXTENT_ITEM_KEY &&
983 found_key.offset == op->num_bytes) {
984 num_to_del++;
985 found_extent = 1;
986 }
987 }
988
989 /*
990 * if we didn't find the extent we need to delete the backref and then
991 * search for the extent item key so we can update its ref count
992 */
993 if (!found_extent) {
994 key.objectid = op->bytenr;
995 key.type = BTRFS_EXTENT_ITEM_KEY;
996 key.offset = op->num_bytes;
997
998 ret = remove_extent_backref(trans, extent_root, path);
999 BUG_ON(ret);
1000 btrfs_release_path(extent_root, path);
1001 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1002 BUG_ON(ret);
1003 extent_slot = path->slots[0];
1004 }
1005
1006 /* this is where we update the ref count for the extent */
1007 leaf = path->nodes[0];
1008 ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item);
1009 refs = btrfs_extent_refs(leaf, ei);
1010 BUG_ON(refs == 0);
1011 refs--;
1012 btrfs_set_extent_refs(leaf, ei, refs);
1013
1014 btrfs_mark_buffer_dirty(leaf);
1015
1016 /*
1017 * This extent needs deleting. The reason cur_slot is extent_slot +
1018 * num_to_del is because extent_slot points to the slot where the extent
1019 * is, and if the backref was not right next to the extent we will be
1020 * deleting at least 1 item, and will want to start searching at the
1021 * slot directly next to extent_slot. However if we did find the
1022 * backref next to the extent item them we will be deleting at least 2
1023 * items and will want to start searching directly after the ref slot
1024 */
1025 if (!refs) {
1026 struct list_head *pos, *n, *end;
1027 int cur_slot = extent_slot+num_to_del;
1028 u64 super_used;
1029 u64 root_used;
1030
1031 path->slots[0] = extent_slot;
1032 bytes_freed = op->num_bytes;
1033
1034 mutex_lock(&info->pinned_mutex);
1035 ret = pin_down_bytes(trans, extent_root, op->bytenr,
1036 op->num_bytes, op->level >=
1037 BTRFS_FIRST_FREE_OBJECTID);
1038 mutex_unlock(&info->pinned_mutex);
1039 BUG_ON(ret < 0);
1040 op->del = ret;
1041
1042 /*
1043 * we need to see if we can delete multiple things at once, so
1044 * start looping through the list of extents we are wanting to
1045 * delete and see if their extent/backref's are right next to
1046 * eachother and the extents only have 1 ref
1047 */
1048 for (pos = cur->next; pos != del_list; pos = pos->next) {
1049 struct pending_extent_op *tmp;
1050
1051 tmp = list_entry(pos, struct pending_extent_op, list);
1052
1053 /* we only want to delete extent+ref at this stage */
1054 if (cur_slot >= btrfs_header_nritems(leaf) - 1)
1055 break;
1056
1057 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot);
1058 if (found_key.objectid != tmp->bytenr ||
1059 found_key.type != BTRFS_EXTENT_ITEM_KEY ||
1060 found_key.offset != tmp->num_bytes)
1061 break;
1062
1063 /* check to make sure this extent only has one ref */
1064 ei = btrfs_item_ptr(leaf, cur_slot,
1065 struct btrfs_extent_item);
1066 if (btrfs_extent_refs(leaf, ei) != 1)
1067 break;
1068
1069 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1);
1070 if (found_key.objectid != tmp->bytenr ||
1071 found_key.type != BTRFS_EXTENT_REF_KEY ||
1072 found_key.offset != tmp->orig_parent)
1073 break;
1074
1075 /*
1076 * the ref is right next to the extent, we can set the
1077 * ref count to 0 since we will delete them both now
1078 */
1079 btrfs_set_extent_refs(leaf, ei, 0);
1080
1081 /* pin down the bytes for this extent */
1082 mutex_lock(&info->pinned_mutex);
1083 ret = pin_down_bytes(trans, extent_root, tmp->bytenr,
1084 tmp->num_bytes, tmp->level >=
1085 BTRFS_FIRST_FREE_OBJECTID);
1086 mutex_unlock(&info->pinned_mutex);
1087 BUG_ON(ret < 0);
1088
1089 /*
1090 * use the del field to tell if we need to go ahead and
1091 * free up the extent when we delete the item or not.
1092 */
1093 tmp->del = ret;
1094 bytes_freed += tmp->num_bytes;
1095
1096 num_to_del += 2;
1097 cur_slot += 2;
1098 }
1099 end = pos;
1100
1101 /* update the free space counters */
1102 spin_lock(&info->delalloc_lock);
1103 super_used = btrfs_super_bytes_used(&info->super_copy);
1104 btrfs_set_super_bytes_used(&info->super_copy,
1105 super_used - bytes_freed);
1106
1107 root_used = btrfs_root_used(&extent_root->root_item);
1108 btrfs_set_root_used(&extent_root->root_item,
1109 root_used - bytes_freed);
1110 spin_unlock(&info->delalloc_lock);
1111
1112 /* delete the items */
1113 ret = btrfs_del_items(trans, extent_root, path,
1114 path->slots[0], num_to_del);
1115 BUG_ON(ret);
1116
1117 /*
1118 * loop through the extents we deleted and do the cleanup work
1119 * on them
1120 */
1121 for (pos = cur, n = pos->next; pos != end;
1122 pos = n, n = pos->next) {
1123 struct pending_extent_op *tmp;
1124 tmp = list_entry(pos, struct pending_extent_op, list);
1125
1126 /*
1127 * remember tmp->del tells us wether or not we pinned
1128 * down the extent
1129 */
1130 ret = update_block_group(trans, extent_root,
1131 tmp->bytenr, tmp->num_bytes, 0,
1132 tmp->del);
1133 BUG_ON(ret);
1134
1135 list_del_init(&tmp->list);
1136 unlock_extent(&info->extent_ins, tmp->bytenr,
1137 tmp->bytenr + tmp->num_bytes - 1,
1138 GFP_NOFS);
1139 kfree(tmp);
1140 }
1141 } else if (refs && found_extent) {
1142 /*
1143 * the ref and extent were right next to eachother, but the
1144 * extent still has a ref, so just free the backref and keep
1145 * going
1146 */
1147 ret = remove_extent_backref(trans, extent_root, path);
1148 BUG_ON(ret);
1149
1150 list_del_init(&op->list);
1151 unlock_extent(&info->extent_ins, op->bytenr,
1152 op->bytenr + op->num_bytes - 1, GFP_NOFS);
1153 kfree(op);
1154 } else {
1155 /*
1156 * the extent has multiple refs and the backref we were looking
1157 * for was not right next to it, so just unlock and go next,
1158 * we're good to go
1159 */
1160 list_del_init(&op->list);
1161 unlock_extent(&info->extent_ins, op->bytenr,
1162 op->bytenr + op->num_bytes - 1, GFP_NOFS);
1163 kfree(op);
1164 }
1165
1166 btrfs_release_path(extent_root, path);
1167 if (!list_empty(del_list))
1168 goto search;
1169
1170out:
1171 btrfs_free_path(path);
1172 return ret;
1173}
1174
1175static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 692static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1176 struct btrfs_root *root, u64 bytenr, 693 struct btrfs_root *root, u64 bytenr,
694 u64 num_bytes,
1177 u64 orig_parent, u64 parent, 695 u64 orig_parent, u64 parent,
1178 u64 orig_root, u64 ref_root, 696 u64 orig_root, u64 ref_root,
1179 u64 orig_generation, u64 ref_generation, 697 u64 orig_generation, u64 ref_generation,
1180 u64 owner_objectid) 698 u64 owner_objectid)
1181{ 699{
1182 int ret; 700 int ret;
1183 struct btrfs_root *extent_root = root->fs_info->extent_root; 701 int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID;
1184 struct btrfs_path *path;
1185
1186 if (root == root->fs_info->extent_root) {
1187 struct pending_extent_op *extent_op;
1188 u64 num_bytes;
1189
1190 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
1191 num_bytes = btrfs_level_size(root, (int)owner_objectid);
1192 mutex_lock(&root->fs_info->extent_ins_mutex);
1193 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
1194 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
1195 u64 priv;
1196 ret = get_state_private(&root->fs_info->extent_ins,
1197 bytenr, &priv);
1198 BUG_ON(ret);
1199 extent_op = (struct pending_extent_op *)
1200 (unsigned long)priv;
1201 BUG_ON(extent_op->parent != orig_parent);
1202 BUG_ON(extent_op->generation != orig_generation);
1203
1204 extent_op->parent = parent;
1205 extent_op->generation = ref_generation;
1206 } else {
1207 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
1208 BUG_ON(!extent_op);
1209
1210 extent_op->type = PENDING_BACKREF_UPDATE;
1211 extent_op->bytenr = bytenr;
1212 extent_op->num_bytes = num_bytes;
1213 extent_op->parent = parent;
1214 extent_op->orig_parent = orig_parent;
1215 extent_op->generation = ref_generation;
1216 extent_op->orig_generation = orig_generation;
1217 extent_op->level = (int)owner_objectid;
1218 INIT_LIST_HEAD(&extent_op->list);
1219 extent_op->del = 0;
1220
1221 set_extent_bits(&root->fs_info->extent_ins,
1222 bytenr, bytenr + num_bytes - 1,
1223 EXTENT_WRITEBACK, GFP_NOFS);
1224 set_state_private(&root->fs_info->extent_ins,
1225 bytenr, (unsigned long)extent_op);
1226 }
1227 mutex_unlock(&root->fs_info->extent_ins_mutex);
1228 return 0;
1229 }
1230 702
1231 path = btrfs_alloc_path(); 703 ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes,
1232 if (!path) 704 orig_parent, parent, orig_root,
1233 return -ENOMEM; 705 ref_root, orig_generation,
1234 ret = lookup_extent_backref(trans, extent_root, path, 706 ref_generation, owner_objectid, pin);
1235 bytenr, orig_parent, orig_root,
1236 orig_generation, owner_objectid, 1);
1237 if (ret)
1238 goto out;
1239 ret = remove_extent_backref(trans, extent_root, path);
1240 if (ret)
1241 goto out;
1242 ret = insert_extent_backref(trans, extent_root, path, bytenr,
1243 parent, ref_root, ref_generation,
1244 owner_objectid);
1245 BUG_ON(ret); 707 BUG_ON(ret);
1246 finish_current_insert(trans, extent_root, 0);
1247 del_pending_extents(trans, extent_root, 0);
1248out:
1249 btrfs_free_path(path);
1250 return ret; 708 return ret;
1251} 709}
1252 710
1253int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 711int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1254 struct btrfs_root *root, u64 bytenr, 712 struct btrfs_root *root, u64 bytenr,
1255 u64 orig_parent, u64 parent, 713 u64 num_bytes, u64 orig_parent, u64 parent,
1256 u64 ref_root, u64 ref_generation, 714 u64 ref_root, u64 ref_generation,
1257 u64 owner_objectid) 715 u64 owner_objectid)
1258{ 716{
@@ -1260,20 +718,36 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1260 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 718 if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
1261 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 719 owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
1262 return 0; 720 return 0;
1263 ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, 721
1264 parent, ref_root, ref_root, 722 ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
1265 ref_generation, ref_generation, 723 orig_parent, parent, ref_root,
1266 owner_objectid); 724 ref_root, ref_generation,
725 ref_generation, owner_objectid);
1267 return ret; 726 return ret;
1268} 727}
1269
1270static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 728static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1271 struct btrfs_root *root, u64 bytenr, 729 struct btrfs_root *root, u64 bytenr,
730 u64 num_bytes,
1272 u64 orig_parent, u64 parent, 731 u64 orig_parent, u64 parent,
1273 u64 orig_root, u64 ref_root, 732 u64 orig_root, u64 ref_root,
1274 u64 orig_generation, u64 ref_generation, 733 u64 orig_generation, u64 ref_generation,
1275 u64 owner_objectid) 734 u64 owner_objectid)
1276{ 735{
736 int ret;
737
738 ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root,
739 ref_generation, owner_objectid,
740 BTRFS_ADD_DELAYED_REF, 0);
741 BUG_ON(ret);
742 return ret;
743}
744
745static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
746 struct btrfs_root *root, u64 bytenr,
747 u64 num_bytes, u64 parent, u64 ref_root,
748 u64 ref_generation, u64 owner_objectid,
749 int refs_to_add)
750{
1277 struct btrfs_path *path; 751 struct btrfs_path *path;
1278 int ret; 752 int ret;
1279 struct btrfs_key key; 753 struct btrfs_key key;
@@ -1288,15 +762,19 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1288 path->reada = 1; 762 path->reada = 1;
1289 key.objectid = bytenr; 763 key.objectid = bytenr;
1290 key.type = BTRFS_EXTENT_ITEM_KEY; 764 key.type = BTRFS_EXTENT_ITEM_KEY;
1291 key.offset = (u64)-1; 765 key.offset = num_bytes;
1292 766
1293 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 767 /* first find the extent item and update its reference count */
1294 0, 1); 768 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
769 path, 0, 1);
1295 if (ret < 0) 770 if (ret < 0)
1296 return ret; 771 return ret;
1297 BUG_ON(ret == 0 || path->slots[0] == 0);
1298 772
1299 path->slots[0]--; 773 if (ret > 0) {
774 WARN_ON(1);
775 btrfs_free_path(path);
776 return -EIO;
777 }
1300 l = path->nodes[0]; 778 l = path->nodes[0];
1301 779
1302 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 780 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
@@ -1310,21 +788,20 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1310 BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); 788 BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
1311 789
1312 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 790 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
791
1313 refs = btrfs_extent_refs(l, item); 792 refs = btrfs_extent_refs(l, item);
1314 btrfs_set_extent_refs(l, item, refs + 1); 793 btrfs_set_extent_refs(l, item, refs + refs_to_add);
1315 btrfs_mark_buffer_dirty(path->nodes[0]); 794 btrfs_mark_buffer_dirty(path->nodes[0]);
1316 795
1317 btrfs_release_path(root->fs_info->extent_root, path); 796 btrfs_release_path(root->fs_info->extent_root, path);
1318 797
1319 path->reada = 1; 798 path->reada = 1;
799 /* now insert the actual backref */
1320 ret = insert_extent_backref(trans, root->fs_info->extent_root, 800 ret = insert_extent_backref(trans, root->fs_info->extent_root,
1321 path, bytenr, parent, 801 path, bytenr, parent,
1322 ref_root, ref_generation, 802 ref_root, ref_generation,
1323 owner_objectid); 803 owner_objectid, refs_to_add);
1324 BUG_ON(ret); 804 BUG_ON(ret);
1325 finish_current_insert(trans, root->fs_info->extent_root, 0);
1326 del_pending_extents(trans, root->fs_info->extent_root, 0);
1327
1328 btrfs_free_path(path); 805 btrfs_free_path(path);
1329 return 0; 806 return 0;
1330} 807}
@@ -1339,68 +816,245 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1339 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 816 if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
1340 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 817 owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
1341 return 0; 818 return 0;
1342 ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, 819
820 ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent,
1343 0, ref_root, 0, ref_generation, 821 0, ref_root, 0, ref_generation,
1344 owner_objectid); 822 owner_objectid);
1345 return ret; 823 return ret;
1346} 824}
1347 825
1348int btrfs_extent_post_op(struct btrfs_trans_handle *trans, 826static int drop_delayed_ref(struct btrfs_trans_handle *trans,
1349 struct btrfs_root *root) 827 struct btrfs_root *root,
828 struct btrfs_delayed_ref_node *node)
829{
830 int ret = 0;
831 struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node);
832
833 BUG_ON(node->ref_mod == 0);
834 ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes,
835 node->parent, ref->root, ref->generation,
836 ref->owner_objectid, ref->pin, node->ref_mod);
837
838 return ret;
839}
840
841/* helper function to actually process a single delayed ref entry */
842static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans,
843 struct btrfs_root *root,
844 struct btrfs_delayed_ref_node *node,
845 int insert_reserved)
1350{ 846{
1351 u64 start;
1352 u64 end;
1353 int ret; 847 int ret;
848 struct btrfs_delayed_ref *ref;
1354 849
1355 while(1) { 850 if (node->parent == (u64)-1) {
1356 finish_current_insert(trans, root->fs_info->extent_root, 1); 851 struct btrfs_delayed_ref_head *head;
1357 del_pending_extents(trans, root->fs_info->extent_root, 1); 852 /*
853 * we've hit the end of the chain and we were supposed
854 * to insert this extent into the tree. But, it got
855 * deleted before we ever needed to insert it, so all
856 * we have to do is clean up the accounting
857 */
858 if (insert_reserved) {
859 update_reserved_extents(root, node->bytenr,
860 node->num_bytes, 0);
861 }
862 head = btrfs_delayed_node_to_head(node);
863 mutex_unlock(&head->mutex);
864 return 0;
865 }
1358 866
1359 /* is there more work to do? */ 867 ref = btrfs_delayed_node_to_ref(node);
1360 ret = find_first_extent_bit(&root->fs_info->pending_del, 868 if (ref->action == BTRFS_ADD_DELAYED_REF) {
1361 0, &start, &end, EXTENT_WRITEBACK); 869 if (insert_reserved) {
1362 if (!ret) 870 struct btrfs_key ins;
1363 continue; 871
1364 ret = find_first_extent_bit(&root->fs_info->extent_ins, 872 ins.objectid = node->bytenr;
1365 0, &start, &end, EXTENT_WRITEBACK); 873 ins.offset = node->num_bytes;
1366 if (!ret) 874 ins.type = BTRFS_EXTENT_ITEM_KEY;
1367 continue; 875
1368 break; 876 /* record the full extent allocation */
877 ret = __btrfs_alloc_reserved_extent(trans, root,
878 node->parent, ref->root,
879 ref->generation, ref->owner_objectid,
880 &ins, node->ref_mod);
881 update_reserved_extents(root, node->bytenr,
882 node->num_bytes, 0);
883 } else {
884 /* just add one backref */
885 ret = add_extent_ref(trans, root, node->bytenr,
886 node->num_bytes,
887 node->parent, ref->root, ref->generation,
888 ref->owner_objectid, node->ref_mod);
889 }
890 BUG_ON(ret);
891 } else if (ref->action == BTRFS_DROP_DELAYED_REF) {
892 WARN_ON(insert_reserved);
893 ret = drop_delayed_ref(trans, root, node);
1369 } 894 }
1370 return 0; 895 return 0;
1371} 896}
1372 897
1373int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 898static noinline struct btrfs_delayed_ref_node *
1374 struct btrfs_root *root, u64 bytenr, 899select_delayed_ref(struct btrfs_delayed_ref_head *head)
1375 u64 num_bytes, u32 *refs)
1376{ 900{
1377 struct btrfs_path *path; 901 struct rb_node *node;
902 struct btrfs_delayed_ref_node *ref;
903 int action = BTRFS_ADD_DELAYED_REF;
904again:
905 /*
906 * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
907 * this prevents ref count from going down to zero when
908 * there still are pending delayed ref.
909 */
910 node = rb_prev(&head->node.rb_node);
911 while (1) {
912 if (!node)
913 break;
914 ref = rb_entry(node, struct btrfs_delayed_ref_node,
915 rb_node);
916 if (ref->bytenr != head->node.bytenr)
917 break;
918 if (btrfs_delayed_node_to_ref(ref)->action == action)
919 return ref;
920 node = rb_prev(node);
921 }
922 if (action == BTRFS_ADD_DELAYED_REF) {
923 action = BTRFS_DROP_DELAYED_REF;
924 goto again;
925 }
926 return NULL;
927}
928
929/*
930 * this starts processing the delayed reference count updates and
931 * extent insertions we have queued up so far. count can be
932 * 0, which means to process everything in the tree at the start
933 * of the run (but not newly added entries), or it can be some target
934 * number you'd like to process.
935 */
936int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
937 struct btrfs_root *root, unsigned long count)
938{
939 struct rb_node *node;
940 struct btrfs_delayed_ref_root *delayed_refs;
941 struct btrfs_delayed_ref_node *ref;
942 struct btrfs_delayed_ref_head *locked_ref = NULL;
1378 int ret; 943 int ret;
1379 struct btrfs_key key; 944 int must_insert_reserved = 0;
1380 struct extent_buffer *l; 945 int run_all = count == (unsigned long)-1;
1381 struct btrfs_extent_item *item;
1382 946
1383 WARN_ON(num_bytes < root->sectorsize); 947 if (root == root->fs_info->extent_root)
1384 path = btrfs_alloc_path(); 948 root = root->fs_info->tree_root;
1385 path->reada = 1; 949
1386 key.objectid = bytenr; 950 delayed_refs = &trans->transaction->delayed_refs;
1387 key.offset = num_bytes; 951again:
1388 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 952 spin_lock(&delayed_refs->lock);
1389 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 953 if (count == 0)
1390 0, 0); 954 count = delayed_refs->num_entries;
1391 if (ret < 0) 955 while (1) {
1392 goto out; 956 if (!locked_ref) {
1393 if (ret != 0) { 957 /*
1394 btrfs_print_leaf(root, path->nodes[0]); 958 * no locked ref, go find something we can
1395 printk(KERN_INFO "btrfs failed to find block number %llu\n", 959 * process in the rbtree. We start at
1396 (unsigned long long)bytenr); 960 * the beginning of the tree, there may be less
1397 BUG(); 961 * lock contention if we do something smarter here.
962 */
963 node = rb_first(&delayed_refs->root);
964 if (!node) {
965 spin_unlock(&delayed_refs->lock);
966 break;
967 }
968
969 ref = rb_entry(node, struct btrfs_delayed_ref_node,
970 rb_node);
971 ret = btrfs_lock_delayed_ref(trans, ref, &locked_ref);
972 if (ret) {
973 spin_unlock(&delayed_refs->lock);
974 break;
975 }
976 }
977
978 /*
979 * record the must insert reserved flag before we
980 * drop the spin lock.
981 */
982 must_insert_reserved = locked_ref->must_insert_reserved;
983 locked_ref->must_insert_reserved = 0;
984
985 /*
986 * locked_ref is the head node, so we have to go one
987 * node back for any delayed ref updates
988 */
989
990 ref = select_delayed_ref(locked_ref);
991 if (!ref) {
992 /* All delayed refs have been processed, Go ahead
993 * and send the head node to run_one_delayed_ref,
994 * so that any accounting fixes can happen
995 */
996 ref = &locked_ref->node;
997 locked_ref = NULL;
998 }
999
1000 ref->in_tree = 0;
1001 rb_erase(&ref->rb_node, &delayed_refs->root);
1002 delayed_refs->num_entries--;
1003 spin_unlock(&delayed_refs->lock);
1004
1005 ret = run_one_delayed_ref(trans, root, ref,
1006 must_insert_reserved);
1007 BUG_ON(ret);
1008 btrfs_put_delayed_ref(ref);
1009
1010 /* once we lock the head ref, we have to process all the
1011 * entries for it. So, we might end up doing more entries
1012 * that count was asking us to do.
1013 */
1014 if (count > 0)
1015 count--;
1016
1017 /*
1018 * we set locked_ref to null above if we're all done
1019 * with this bytenr
1020 */
1021 if (!locked_ref && count == 0)
1022 break;
1023
1024 spin_lock(&delayed_refs->lock);
1025 }
1026 if (run_all) {
1027 spin_lock(&delayed_refs->lock);
1028 node = rb_first(&delayed_refs->root);
1029 if (!node) {
1030 spin_unlock(&delayed_refs->lock);
1031 goto out;
1032 }
1033
1034 while (node) {
1035 ref = rb_entry(node, struct btrfs_delayed_ref_node,
1036 rb_node);
1037 if (btrfs_delayed_ref_is_head(ref)) {
1038 struct btrfs_delayed_ref_head *head;
1039
1040 head = btrfs_delayed_node_to_head(ref);
1041 atomic_inc(&ref->refs);
1042
1043 spin_unlock(&delayed_refs->lock);
1044 mutex_lock(&head->mutex);
1045 mutex_unlock(&head->mutex);
1046
1047 btrfs_put_delayed_ref(ref);
1048 goto again;
1049 }
1050 node = rb_next(node);
1051 }
1052 spin_unlock(&delayed_refs->lock);
1053 count = (unsigned long)-1;
1054 schedule_timeout(1);
1055 goto again;
1398 } 1056 }
1399 l = path->nodes[0];
1400 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1401 *refs = btrfs_extent_refs(l, item);
1402out: 1057out:
1403 btrfs_free_path(path);
1404 return 0; 1058 return 0;
1405} 1059}
1406 1060
@@ -1624,7 +1278,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1624 int refi = 0; 1278 int refi = 0;
1625 int slot; 1279 int slot;
1626 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 1280 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1627 u64, u64, u64, u64, u64, u64, u64, u64); 1281 u64, u64, u64, u64, u64, u64, u64, u64, u64);
1628 1282
1629 ref_root = btrfs_header_owner(buf); 1283 ref_root = btrfs_header_owner(buf);
1630 ref_generation = btrfs_header_generation(buf); 1284 ref_generation = btrfs_header_generation(buf);
@@ -1696,12 +1350,19 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1696 1350
1697 if (level == 0) { 1351 if (level == 0) {
1698 btrfs_item_key_to_cpu(buf, &key, slot); 1352 btrfs_item_key_to_cpu(buf, &key, slot);
1353 fi = btrfs_item_ptr(buf, slot,
1354 struct btrfs_file_extent_item);
1355
1356 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1357 if (bytenr == 0)
1358 continue;
1699 1359
1700 ret = process_func(trans, root, bytenr, 1360 ret = process_func(trans, root, bytenr,
1701 orig_buf->start, buf->start, 1361 btrfs_file_extent_disk_num_bytes(buf, fi),
1702 orig_root, ref_root, 1362 orig_buf->start, buf->start,
1703 orig_generation, ref_generation, 1363 orig_root, ref_root,
1704 key.objectid); 1364 orig_generation, ref_generation,
1365 key.objectid);
1705 1366
1706 if (ret) { 1367 if (ret) {
1707 faili = slot; 1368 faili = slot;
@@ -1709,7 +1370,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1709 goto fail; 1370 goto fail;
1710 } 1371 }
1711 } else { 1372 } else {
1712 ret = process_func(trans, root, bytenr, 1373 ret = process_func(trans, root, bytenr, buf->len,
1713 orig_buf->start, buf->start, 1374 orig_buf->start, buf->start,
1714 orig_root, ref_root, 1375 orig_root, ref_root,
1715 orig_generation, ref_generation, 1376 orig_generation, ref_generation,
@@ -1786,17 +1447,17 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans,
1786 if (bytenr == 0) 1447 if (bytenr == 0)
1787 continue; 1448 continue;
1788 ret = __btrfs_update_extent_ref(trans, root, bytenr, 1449 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1789 orig_buf->start, buf->start, 1450 btrfs_file_extent_disk_num_bytes(buf, fi),
1790 orig_root, ref_root, 1451 orig_buf->start, buf->start,
1791 orig_generation, ref_generation, 1452 orig_root, ref_root, orig_generation,
1792 key.objectid); 1453 ref_generation, key.objectid);
1793 if (ret) 1454 if (ret)
1794 goto fail; 1455 goto fail;
1795 } else { 1456 } else {
1796 bytenr = btrfs_node_blockptr(buf, slot); 1457 bytenr = btrfs_node_blockptr(buf, slot);
1797 ret = __btrfs_update_extent_ref(trans, root, bytenr, 1458 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1798 orig_buf->start, buf->start, 1459 buf->len, orig_buf->start,
1799 orig_root, ref_root, 1460 buf->start, orig_root, ref_root,
1800 orig_generation, ref_generation, 1461 orig_generation, ref_generation,
1801 level - 1); 1462 level - 1);
1802 if (ret) 1463 if (ret)
@@ -1815,7 +1476,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
1815 struct btrfs_block_group_cache *cache) 1476 struct btrfs_block_group_cache *cache)
1816{ 1477{
1817 int ret; 1478 int ret;
1818 int pending_ret;
1819 struct btrfs_root *extent_root = root->fs_info->extent_root; 1479 struct btrfs_root *extent_root = root->fs_info->extent_root;
1820 unsigned long bi; 1480 unsigned long bi;
1821 struct extent_buffer *leaf; 1481 struct extent_buffer *leaf;
@@ -1831,12 +1491,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
1831 btrfs_mark_buffer_dirty(leaf); 1491 btrfs_mark_buffer_dirty(leaf);
1832 btrfs_release_path(extent_root, path); 1492 btrfs_release_path(extent_root, path);
1833fail: 1493fail:
1834 finish_current_insert(trans, extent_root, 0);
1835 pending_ret = del_pending_extents(trans, extent_root, 0);
1836 if (ret) 1494 if (ret)
1837 return ret; 1495 return ret;
1838 if (pending_ret)
1839 return pending_ret;
1840 return 0; 1496 return 0;
1841 1497
1842} 1498}
@@ -2474,193 +2130,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2474 return ret; 2130 return ret;
2475} 2131}
2476 2132
2477static int finish_current_insert(struct btrfs_trans_handle *trans,
2478 struct btrfs_root *extent_root, int all)
2479{
2480 u64 start;
2481 u64 end;
2482 u64 priv;
2483 u64 search = 0;
2484 struct btrfs_fs_info *info = extent_root->fs_info;
2485 struct btrfs_path *path;
2486 struct pending_extent_op *extent_op, *tmp;
2487 struct list_head insert_list, update_list;
2488 int ret;
2489 int num_inserts = 0, max_inserts, restart = 0;
2490
2491 path = btrfs_alloc_path();
2492 INIT_LIST_HEAD(&insert_list);
2493 INIT_LIST_HEAD(&update_list);
2494
2495 max_inserts = extent_root->leafsize /
2496 (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) +
2497 sizeof(struct btrfs_extent_ref) +
2498 sizeof(struct btrfs_extent_item));
2499again:
2500 mutex_lock(&info->extent_ins_mutex);
2501 while (1) {
2502 ret = find_first_extent_bit(&info->extent_ins, search, &start,
2503 &end, EXTENT_WRITEBACK);
2504 if (ret) {
2505 if (restart && !num_inserts &&
2506 list_empty(&update_list)) {
2507 restart = 0;
2508 search = 0;
2509 continue;
2510 }
2511 break;
2512 }
2513
2514 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
2515 if (!ret) {
2516 if (all)
2517 restart = 1;
2518 search = end + 1;
2519 if (need_resched()) {
2520 mutex_unlock(&info->extent_ins_mutex);
2521 cond_resched();
2522 mutex_lock(&info->extent_ins_mutex);
2523 }
2524 continue;
2525 }
2526
2527 ret = get_state_private(&info->extent_ins, start, &priv);
2528 BUG_ON(ret);
2529 extent_op = (struct pending_extent_op *)(unsigned long) priv;
2530
2531 if (extent_op->type == PENDING_EXTENT_INSERT) {
2532 num_inserts++;
2533 list_add_tail(&extent_op->list, &insert_list);
2534 search = end + 1;
2535 if (num_inserts == max_inserts) {
2536 restart = 1;
2537 break;
2538 }
2539 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
2540 list_add_tail(&extent_op->list, &update_list);
2541 search = end + 1;
2542 } else {
2543 BUG();
2544 }
2545 }
2546
2547 /*
2548 * process the update list, clear the writeback bit for it, and if
2549 * somebody marked this thing for deletion then just unlock it and be
2550 * done, the free_extents will handle it
2551 */
2552 list_for_each_entry_safe(extent_op, tmp, &update_list, list) {
2553 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2554 extent_op->bytenr + extent_op->num_bytes - 1,
2555 EXTENT_WRITEBACK, GFP_NOFS);
2556 if (extent_op->del) {
2557 list_del_init(&extent_op->list);
2558 unlock_extent(&info->extent_ins, extent_op->bytenr,
2559 extent_op->bytenr + extent_op->num_bytes
2560 - 1, GFP_NOFS);
2561 kfree(extent_op);
2562 }
2563 }
2564 mutex_unlock(&info->extent_ins_mutex);
2565
2566 /*
2567 * still have things left on the update list, go ahead an update
2568 * everything
2569 */
2570 if (!list_empty(&update_list)) {
2571 ret = update_backrefs(trans, extent_root, path, &update_list);
2572 BUG_ON(ret);
2573
2574 /* we may have COW'ed new blocks, so lets start over */
2575 if (all)
2576 restart = 1;
2577 }
2578
2579 /*
2580 * if no inserts need to be done, but we skipped some extents and we
2581 * need to make sure everything is cleaned then reset everything and
2582 * go back to the beginning
2583 */
2584 if (!num_inserts && restart) {
2585 search = 0;
2586 restart = 0;
2587 INIT_LIST_HEAD(&update_list);
2588 INIT_LIST_HEAD(&insert_list);
2589 goto again;
2590 } else if (!num_inserts) {
2591 goto out;
2592 }
2593
2594 /*
2595 * process the insert extents list. Again if we are deleting this
2596 * extent, then just unlock it, pin down the bytes if need be, and be
2597 * done with it. Saves us from having to actually insert the extent
2598 * into the tree and then subsequently come along and delete it
2599 */
2600 mutex_lock(&info->extent_ins_mutex);
2601 list_for_each_entry_safe(extent_op, tmp, &insert_list, list) {
2602 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2603 extent_op->bytenr + extent_op->num_bytes - 1,
2604 EXTENT_WRITEBACK, GFP_NOFS);
2605 if (extent_op->del) {
2606 u64 used;
2607 list_del_init(&extent_op->list);
2608 unlock_extent(&info->extent_ins, extent_op->bytenr,
2609 extent_op->bytenr + extent_op->num_bytes
2610 - 1, GFP_NOFS);
2611
2612 mutex_lock(&extent_root->fs_info->pinned_mutex);
2613 ret = pin_down_bytes(trans, extent_root,
2614 extent_op->bytenr,
2615 extent_op->num_bytes, 0);
2616 mutex_unlock(&extent_root->fs_info->pinned_mutex);
2617
2618 spin_lock(&info->delalloc_lock);
2619 used = btrfs_super_bytes_used(&info->super_copy);
2620 btrfs_set_super_bytes_used(&info->super_copy,
2621 used - extent_op->num_bytes);
2622 used = btrfs_root_used(&extent_root->root_item);
2623 btrfs_set_root_used(&extent_root->root_item,
2624 used - extent_op->num_bytes);
2625 spin_unlock(&info->delalloc_lock);
2626
2627 ret = update_block_group(trans, extent_root,
2628 extent_op->bytenr,
2629 extent_op->num_bytes,
2630 0, ret > 0);
2631 BUG_ON(ret);
2632 kfree(extent_op);
2633 num_inserts--;
2634 }
2635 }
2636 mutex_unlock(&info->extent_ins_mutex);
2637
2638 ret = insert_extents(trans, extent_root, path, &insert_list,
2639 num_inserts);
2640 BUG_ON(ret);
2641
2642 /*
2643 * if restart is set for whatever reason we need to go back and start
2644 * searching through the pending list again.
2645 *
2646 * We just inserted some extents, which could have resulted in new
2647 * blocks being allocated, which would result in new blocks needing
2648 * updates, so if all is set we _must_ restart to get the updated
2649 * blocks.
2650 */
2651 if (restart || all) {
2652 INIT_LIST_HEAD(&insert_list);
2653 INIT_LIST_HEAD(&update_list);
2654 search = 0;
2655 restart = 0;
2656 num_inserts = 0;
2657 goto again;
2658 }
2659out:
2660 btrfs_free_path(path);
2661 return 0;
2662}
2663
2664static int pin_down_bytes(struct btrfs_trans_handle *trans, 2133static int pin_down_bytes(struct btrfs_trans_handle *trans,
2665 struct btrfs_root *root, 2134 struct btrfs_root *root,
2666 u64 bytenr, u64 num_bytes, int is_data) 2135 u64 bytenr, u64 num_bytes, int is_data)
@@ -2686,6 +2155,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
2686 u64 header_transid = btrfs_header_generation(buf); 2155 u64 header_transid = btrfs_header_generation(buf);
2687 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 2156 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2688 header_owner != BTRFS_TREE_RELOC_OBJECTID && 2157 header_owner != BTRFS_TREE_RELOC_OBJECTID &&
2158 header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&
2689 header_transid == trans->transid && 2159 header_transid == trans->transid &&
2690 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 2160 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2691 clean_tree_block(NULL, root, buf); 2161 clean_tree_block(NULL, root, buf);
@@ -2710,7 +2180,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2710 struct btrfs_root *root, 2180 struct btrfs_root *root,
2711 u64 bytenr, u64 num_bytes, u64 parent, 2181 u64 bytenr, u64 num_bytes, u64 parent,
2712 u64 root_objectid, u64 ref_generation, 2182 u64 root_objectid, u64 ref_generation,
2713 u64 owner_objectid, int pin, int mark_free) 2183 u64 owner_objectid, int pin, int mark_free,
2184 int refs_to_drop)
2714{ 2185{
2715 struct btrfs_path *path; 2186 struct btrfs_path *path;
2716 struct btrfs_key key; 2187 struct btrfs_key key;
@@ -2753,7 +2224,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2753 break; 2224 break;
2754 } 2225 }
2755 if (!found_extent) { 2226 if (!found_extent) {
2756 ret = remove_extent_backref(trans, extent_root, path); 2227 ret = remove_extent_backref(trans, extent_root, path,
2228 refs_to_drop);
2757 BUG_ON(ret); 2229 BUG_ON(ret);
2758 btrfs_release_path(extent_root, path); 2230 btrfs_release_path(extent_root, path);
2759 ret = btrfs_search_slot(trans, extent_root, 2231 ret = btrfs_search_slot(trans, extent_root,
@@ -2771,8 +2243,9 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2771 btrfs_print_leaf(extent_root, path->nodes[0]); 2243 btrfs_print_leaf(extent_root, path->nodes[0]);
2772 WARN_ON(1); 2244 WARN_ON(1);
2773 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 2245 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
2774 "root %llu gen %llu owner %llu\n", 2246 "parent %llu root %llu gen %llu owner %llu\n",
2775 (unsigned long long)bytenr, 2247 (unsigned long long)bytenr,
2248 (unsigned long long)parent,
2776 (unsigned long long)root_objectid, 2249 (unsigned long long)root_objectid,
2777 (unsigned long long)ref_generation, 2250 (unsigned long long)ref_generation,
2778 (unsigned long long)owner_objectid); 2251 (unsigned long long)owner_objectid);
@@ -2782,17 +2255,23 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2782 ei = btrfs_item_ptr(leaf, extent_slot, 2255 ei = btrfs_item_ptr(leaf, extent_slot,
2783 struct btrfs_extent_item); 2256 struct btrfs_extent_item);
2784 refs = btrfs_extent_refs(leaf, ei); 2257 refs = btrfs_extent_refs(leaf, ei);
2785 BUG_ON(refs == 0);
2786 refs -= 1;
2787 btrfs_set_extent_refs(leaf, ei, refs);
2788 2258
2259 /*
2260 * we're not allowed to delete the extent item if there
2261 * are other delayed ref updates pending
2262 */
2263
2264 BUG_ON(refs < refs_to_drop);
2265 refs -= refs_to_drop;
2266 btrfs_set_extent_refs(leaf, ei, refs);
2789 btrfs_mark_buffer_dirty(leaf); 2267 btrfs_mark_buffer_dirty(leaf);
2790 2268
2791 if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { 2269 if (refs == 0 && found_extent &&
2270 path->slots[0] == extent_slot + 1) {
2792 struct btrfs_extent_ref *ref; 2271 struct btrfs_extent_ref *ref;
2793 ref = btrfs_item_ptr(leaf, path->slots[0], 2272 ref = btrfs_item_ptr(leaf, path->slots[0],
2794 struct btrfs_extent_ref); 2273 struct btrfs_extent_ref);
2795 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1); 2274 BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop);
2796 /* if the back ref and the extent are next to each other 2275 /* if the back ref and the extent are next to each other
2797 * they get deleted below in one shot 2276 * they get deleted below in one shot
2798 */ 2277 */
@@ -2800,7 +2279,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2800 num_to_del = 2; 2279 num_to_del = 2;
2801 } else if (found_extent) { 2280 } else if (found_extent) {
2802 /* otherwise delete the extent back ref */ 2281 /* otherwise delete the extent back ref */
2803 ret = remove_extent_backref(trans, extent_root, path); 2282 ret = remove_extent_backref(trans, extent_root, path,
2283 refs_to_drop);
2804 BUG_ON(ret); 2284 BUG_ON(ret);
2805 /* if refs are 0, we need to setup the path for deletion */ 2285 /* if refs are 0, we need to setup the path for deletion */
2806 if (refs == 0) { 2286 if (refs == 0) {
@@ -2850,218 +2330,35 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2850 BUG_ON(ret); 2330 BUG_ON(ret);
2851 } 2331 }
2852 btrfs_free_path(path); 2332 btrfs_free_path(path);
2853 finish_current_insert(trans, extent_root, 0);
2854 return ret; 2333 return ret;
2855} 2334}
2856 2335
2857/* 2336/*
2858 * find all the blocks marked as pending in the radix tree and remove
2859 * them from the extent map
2860 */
2861static int del_pending_extents(struct btrfs_trans_handle *trans,
2862 struct btrfs_root *extent_root, int all)
2863{
2864 int ret;
2865 int err = 0;
2866 u64 start;
2867 u64 end;
2868 u64 priv;
2869 u64 search = 0;
2870 int nr = 0, skipped = 0;
2871 struct extent_io_tree *pending_del;
2872 struct extent_io_tree *extent_ins;
2873 struct pending_extent_op *extent_op;
2874 struct btrfs_fs_info *info = extent_root->fs_info;
2875 struct list_head delete_list;
2876
2877 INIT_LIST_HEAD(&delete_list);
2878 extent_ins = &extent_root->fs_info->extent_ins;
2879 pending_del = &extent_root->fs_info->pending_del;
2880
2881again:
2882 mutex_lock(&info->extent_ins_mutex);
2883 while (1) {
2884 ret = find_first_extent_bit(pending_del, search, &start, &end,
2885 EXTENT_WRITEBACK);
2886 if (ret) {
2887 if (all && skipped && !nr) {
2888 search = 0;
2889 skipped = 0;
2890 continue;
2891 }
2892 mutex_unlock(&info->extent_ins_mutex);
2893 break;
2894 }
2895
2896 ret = try_lock_extent(extent_ins, start, end, GFP_NOFS);
2897 if (!ret) {
2898 search = end+1;
2899 skipped = 1;
2900
2901 if (need_resched()) {
2902 mutex_unlock(&info->extent_ins_mutex);
2903 cond_resched();
2904 mutex_lock(&info->extent_ins_mutex);
2905 }
2906
2907 continue;
2908 }
2909 BUG_ON(ret < 0);
2910
2911 ret = get_state_private(pending_del, start, &priv);
2912 BUG_ON(ret);
2913 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2914
2915 clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK,
2916 GFP_NOFS);
2917 if (!test_range_bit(extent_ins, start, end,
2918 EXTENT_WRITEBACK, 0)) {
2919 list_add_tail(&extent_op->list, &delete_list);
2920 nr++;
2921 } else {
2922 kfree(extent_op);
2923
2924 ret = get_state_private(&info->extent_ins, start,
2925 &priv);
2926 BUG_ON(ret);
2927 extent_op = (struct pending_extent_op *)
2928 (unsigned long)priv;
2929
2930 clear_extent_bits(&info->extent_ins, start, end,
2931 EXTENT_WRITEBACK, GFP_NOFS);
2932
2933 if (extent_op->type == PENDING_BACKREF_UPDATE) {
2934 list_add_tail(&extent_op->list, &delete_list);
2935 search = end + 1;
2936 nr++;
2937 continue;
2938 }
2939
2940 mutex_lock(&extent_root->fs_info->pinned_mutex);
2941 ret = pin_down_bytes(trans, extent_root, start,
2942 end + 1 - start, 0);
2943 mutex_unlock(&extent_root->fs_info->pinned_mutex);
2944
2945 ret = update_block_group(trans, extent_root, start,
2946 end + 1 - start, 0, ret > 0);
2947
2948 unlock_extent(extent_ins, start, end, GFP_NOFS);
2949 BUG_ON(ret);
2950 kfree(extent_op);
2951 }
2952 if (ret)
2953 err = ret;
2954
2955 search = end + 1;
2956
2957 if (need_resched()) {
2958 mutex_unlock(&info->extent_ins_mutex);
2959 cond_resched();
2960 mutex_lock(&info->extent_ins_mutex);
2961 }
2962 }
2963
2964 if (nr) {
2965 ret = free_extents(trans, extent_root, &delete_list);
2966 BUG_ON(ret);
2967 }
2968
2969 if (all && skipped) {
2970 INIT_LIST_HEAD(&delete_list);
2971 search = 0;
2972 nr = 0;
2973 goto again;
2974 }
2975
2976 if (!err)
2977 finish_current_insert(trans, extent_root, 0);
2978 return err;
2979}
2980
2981/*
2982 * remove an extent from the root, returns 0 on success 2337 * remove an extent from the root, returns 0 on success
2983 */ 2338 */
2984static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 2339static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2985 struct btrfs_root *root, 2340 struct btrfs_root *root,
2986 u64 bytenr, u64 num_bytes, u64 parent, 2341 u64 bytenr, u64 num_bytes, u64 parent,
2987 u64 root_objectid, u64 ref_generation, 2342 u64 root_objectid, u64 ref_generation,
2988 u64 owner_objectid, int pin) 2343 u64 owner_objectid, int pin,
2344 int refs_to_drop)
2989{ 2345{
2990 struct btrfs_root *extent_root = root->fs_info->extent_root;
2991 int pending_ret;
2992 int ret;
2993
2994 WARN_ON(num_bytes < root->sectorsize); 2346 WARN_ON(num_bytes < root->sectorsize);
2995 if (root == extent_root) {
2996 struct pending_extent_op *extent_op = NULL;
2997
2998 mutex_lock(&root->fs_info->extent_ins_mutex);
2999 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
3000 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
3001 u64 priv;
3002 ret = get_state_private(&root->fs_info->extent_ins,
3003 bytenr, &priv);
3004 BUG_ON(ret);
3005 extent_op = (struct pending_extent_op *)
3006 (unsigned long)priv;
3007
3008 extent_op->del = 1;
3009 if (extent_op->type == PENDING_EXTENT_INSERT) {
3010 mutex_unlock(&root->fs_info->extent_ins_mutex);
3011 return 0;
3012 }
3013 }
3014
3015 if (extent_op) {
3016 ref_generation = extent_op->orig_generation;
3017 parent = extent_op->orig_parent;
3018 }
3019 2347
3020 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2348 /*
3021 BUG_ON(!extent_op); 2349 * if metadata always pin
3022 2350 * if data pin when any transaction has committed this
3023 extent_op->type = PENDING_EXTENT_DELETE; 2351 */
3024 extent_op->bytenr = bytenr; 2352 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID ||
3025 extent_op->num_bytes = num_bytes; 2353 ref_generation != trans->transid)
3026 extent_op->parent = parent;
3027 extent_op->orig_parent = parent;
3028 extent_op->generation = ref_generation;
3029 extent_op->orig_generation = ref_generation;
3030 extent_op->level = (int)owner_objectid;
3031 INIT_LIST_HEAD(&extent_op->list);
3032 extent_op->del = 0;
3033
3034 set_extent_bits(&root->fs_info->pending_del,
3035 bytenr, bytenr + num_bytes - 1,
3036 EXTENT_WRITEBACK, GFP_NOFS);
3037 set_state_private(&root->fs_info->pending_del,
3038 bytenr, (unsigned long)extent_op);
3039 mutex_unlock(&root->fs_info->extent_ins_mutex);
3040 return 0;
3041 }
3042 /* if metadata always pin */
3043 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
3044 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
3045 mutex_lock(&root->fs_info->pinned_mutex);
3046 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
3047 mutex_unlock(&root->fs_info->pinned_mutex);
3048 update_reserved_extents(root, bytenr, num_bytes, 0);
3049 return 0;
3050 }
3051 pin = 1; 2354 pin = 1;
3052 }
3053 2355
3054 /* if data pin when any transaction has committed this */
3055 if (ref_generation != trans->transid) 2356 if (ref_generation != trans->transid)
3056 pin = 1; 2357 pin = 1;
3057 2358
3058 ret = __free_extent(trans, root, bytenr, num_bytes, parent, 2359 return __free_extent(trans, root, bytenr, num_bytes, parent,
3059 root_objectid, ref_generation, 2360 root_objectid, ref_generation,
3060 owner_objectid, pin, pin == 0); 2361 owner_objectid, pin, pin == 0, refs_to_drop);
3061
3062 finish_current_insert(trans, root->fs_info->extent_root, 0);
3063 pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0);
3064 return ret ? ret : pending_ret;
3065} 2362}
3066 2363
3067int btrfs_free_extent(struct btrfs_trans_handle *trans, 2364int btrfs_free_extent(struct btrfs_trans_handle *trans,
@@ -3072,9 +2369,26 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
3072{ 2369{
3073 int ret; 2370 int ret;
3074 2371
3075 ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, 2372 /*
3076 root_objectid, ref_generation, 2373 * tree log blocks never actually go into the extent allocation
3077 owner_objectid, pin); 2374 * tree, just update pinning info and exit early.
2375 *
2376 * data extents referenced by the tree log do need to have
2377 * their reference counts bumped.
2378 */
2379 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID &&
2380 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2381 mutex_lock(&root->fs_info->pinned_mutex);
2382 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2383 mutex_unlock(&root->fs_info->pinned_mutex);
2384 update_reserved_extents(root, bytenr, num_bytes, 0);
2385 ret = 0;
2386 } else {
2387 ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent,
2388 root_objectid, ref_generation,
2389 owner_objectid,
2390 BTRFS_DROP_DELAYED_REF, 1);
2391 }
3078 return ret; 2392 return ret;
3079} 2393}
3080 2394
@@ -3475,10 +2789,10 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3475static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 2789static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3476 struct btrfs_root *root, u64 parent, 2790 struct btrfs_root *root, u64 parent,
3477 u64 root_objectid, u64 ref_generation, 2791 u64 root_objectid, u64 ref_generation,
3478 u64 owner, struct btrfs_key *ins) 2792 u64 owner, struct btrfs_key *ins,
2793 int ref_mod)
3479{ 2794{
3480 int ret; 2795 int ret;
3481 int pending_ret;
3482 u64 super_used; 2796 u64 super_used;
3483 u64 root_used; 2797 u64 root_used;
3484 u64 num_bytes = ins->offset; 2798 u64 num_bytes = ins->offset;
@@ -3503,33 +2817,6 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3503 btrfs_set_root_used(&root->root_item, root_used + num_bytes); 2817 btrfs_set_root_used(&root->root_item, root_used + num_bytes);
3504 spin_unlock(&info->delalloc_lock); 2818 spin_unlock(&info->delalloc_lock);
3505 2819
3506 if (root == extent_root) {
3507 struct pending_extent_op *extent_op;
3508
3509 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
3510 BUG_ON(!extent_op);
3511
3512 extent_op->type = PENDING_EXTENT_INSERT;
3513 extent_op->bytenr = ins->objectid;
3514 extent_op->num_bytes = ins->offset;
3515 extent_op->parent = parent;
3516 extent_op->orig_parent = 0;
3517 extent_op->generation = ref_generation;
3518 extent_op->orig_generation = 0;
3519 extent_op->level = (int)owner;
3520 INIT_LIST_HEAD(&extent_op->list);
3521 extent_op->del = 0;
3522
3523 mutex_lock(&root->fs_info->extent_ins_mutex);
3524 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
3525 ins->objectid + ins->offset - 1,
3526 EXTENT_WRITEBACK, GFP_NOFS);
3527 set_state_private(&root->fs_info->extent_ins,
3528 ins->objectid, (unsigned long)extent_op);
3529 mutex_unlock(&root->fs_info->extent_ins_mutex);
3530 goto update_block;
3531 }
3532
3533 memcpy(&keys[0], ins, sizeof(*ins)); 2820 memcpy(&keys[0], ins, sizeof(*ins));
3534 keys[1].objectid = ins->objectid; 2821 keys[1].objectid = ins->objectid;
3535 keys[1].type = BTRFS_EXTENT_REF_KEY; 2822 keys[1].type = BTRFS_EXTENT_REF_KEY;
@@ -3546,31 +2833,24 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3546 2833
3547 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 2834 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3548 struct btrfs_extent_item); 2835 struct btrfs_extent_item);
3549 btrfs_set_extent_refs(path->nodes[0], extent_item, 1); 2836 btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod);
3550 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, 2837 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
3551 struct btrfs_extent_ref); 2838 struct btrfs_extent_ref);
3552 2839
3553 btrfs_set_ref_root(path->nodes[0], ref, root_objectid); 2840 btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
3554 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); 2841 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
3555 btrfs_set_ref_objectid(path->nodes[0], ref, owner); 2842 btrfs_set_ref_objectid(path->nodes[0], ref, owner);
3556 btrfs_set_ref_num_refs(path->nodes[0], ref, 1); 2843 btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod);
3557 2844
3558 btrfs_mark_buffer_dirty(path->nodes[0]); 2845 btrfs_mark_buffer_dirty(path->nodes[0]);
3559 2846
3560 trans->alloc_exclude_start = 0; 2847 trans->alloc_exclude_start = 0;
3561 trans->alloc_exclude_nr = 0; 2848 trans->alloc_exclude_nr = 0;
3562 btrfs_free_path(path); 2849 btrfs_free_path(path);
3563 finish_current_insert(trans, extent_root, 0);
3564 pending_ret = del_pending_extents(trans, extent_root, 0);
3565 2850
3566 if (ret) 2851 if (ret)
3567 goto out; 2852 goto out;
3568 if (pending_ret) {
3569 ret = pending_ret;
3570 goto out;
3571 }
3572 2853
3573update_block:
3574 ret = update_block_group(trans, root, ins->objectid, 2854 ret = update_block_group(trans, root, ins->objectid,
3575 ins->offset, 1, 0); 2855 ins->offset, 1, 0);
3576 if (ret) { 2856 if (ret) {
@@ -3592,9 +2872,12 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3592 2872
3593 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) 2873 if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
3594 return 0; 2874 return 0;
3595 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 2875
3596 ref_generation, owner, ins); 2876 ret = btrfs_add_delayed_ref(trans, ins->objectid,
3597 update_reserved_extents(root, ins->objectid, ins->offset, 0); 2877 ins->offset, parent, root_objectid,
2878 ref_generation, owner,
2879 BTRFS_ADD_DELAYED_EXTENT, 0);
2880 BUG_ON(ret);
3598 return ret; 2881 return ret;
3599} 2882}
3600 2883
@@ -3621,7 +2904,7 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
3621 BUG_ON(ret); 2904 BUG_ON(ret);
3622 put_block_group(block_group); 2905 put_block_group(block_group);
3623 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 2906 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
3624 ref_generation, owner, ins); 2907 ref_generation, owner, ins, 1);
3625 return ret; 2908 return ret;
3626} 2909}
3627 2910
@@ -3640,20 +2923,18 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
3640 u64 search_end, struct btrfs_key *ins, u64 data) 2923 u64 search_end, struct btrfs_key *ins, u64 data)
3641{ 2924{
3642 int ret; 2925 int ret;
3643
3644 ret = __btrfs_reserve_extent(trans, root, num_bytes, 2926 ret = __btrfs_reserve_extent(trans, root, num_bytes,
3645 min_alloc_size, empty_size, hint_byte, 2927 min_alloc_size, empty_size, hint_byte,
3646 search_end, ins, data); 2928 search_end, ins, data);
3647 BUG_ON(ret); 2929 BUG_ON(ret);
3648 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 2930 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
3649 ret = __btrfs_alloc_reserved_extent(trans, root, parent, 2931 ret = btrfs_add_delayed_ref(trans, ins->objectid,
3650 root_objectid, ref_generation, 2932 ins->offset, parent, root_objectid,
3651 owner_objectid, ins); 2933 ref_generation, owner_objectid,
2934 BTRFS_ADD_DELAYED_EXTENT, 0);
3652 BUG_ON(ret); 2935 BUG_ON(ret);
3653
3654 } else {
3655 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3656 } 2936 }
2937 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3657 return ret; 2938 return ret;
3658} 2939}
3659 2940
@@ -3789,7 +3070,7 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3789 3070
3790 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 3071 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
3791 3072
3792 ret = __btrfs_free_extent(trans, root, disk_bytenr, 3073 ret = btrfs_free_extent(trans, root, disk_bytenr,
3793 btrfs_file_extent_disk_num_bytes(leaf, fi), 3074 btrfs_file_extent_disk_num_bytes(leaf, fi),
3794 leaf->start, leaf_owner, leaf_generation, 3075 leaf->start, leaf_owner, leaf_generation,
3795 key.objectid, 0); 3076 key.objectid, 0);
@@ -3829,7 +3110,7 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3829 */ 3110 */
3830 for (i = 0; i < ref->nritems; i++) { 3111 for (i = 0; i < ref->nritems; i++) {
3831 info = ref->extents + sorted[i].slot; 3112 info = ref->extents + sorted[i].slot;
3832 ret = __btrfs_free_extent(trans, root, info->bytenr, 3113 ret = btrfs_free_extent(trans, root, info->bytenr,
3833 info->num_bytes, ref->bytenr, 3114 info->num_bytes, ref->bytenr,
3834 ref->owner, ref->generation, 3115 ref->owner, ref->generation,
3835 info->objectid, 0); 3116 info->objectid, 0);
@@ -3846,12 +3127,13 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3846 return 0; 3127 return 0;
3847} 3128}
3848 3129
3849static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, 3130static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
3131 struct btrfs_root *root, u64 start,
3850 u64 len, u32 *refs) 3132 u64 len, u32 *refs)
3851{ 3133{
3852 int ret; 3134 int ret;
3853 3135
3854 ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); 3136 ret = btrfs_lookup_extent_ref(trans, root, start, len, refs);
3855 BUG_ON(ret); 3137 BUG_ON(ret);
3856 3138
3857#if 0 /* some debugging code in case we see problems here */ 3139#if 0 /* some debugging code in case we see problems here */
@@ -3959,7 +3241,8 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
3959 * we just decrement it below and don't update any 3241 * we just decrement it below and don't update any
3960 * of the refs the leaf points to. 3242 * of the refs the leaf points to.
3961 */ 3243 */
3962 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 3244 ret = drop_snap_lookup_refcount(trans, root, bytenr,
3245 blocksize, &refs);
3963 BUG_ON(ret); 3246 BUG_ON(ret);
3964 if (refs != 1) 3247 if (refs != 1)
3965 continue; 3248 continue;
@@ -4010,7 +3293,7 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
4010 */ 3293 */
4011 for (i = 0; i < refi; i++) { 3294 for (i = 0; i < refi; i++) {
4012 bytenr = sorted[i].bytenr; 3295 bytenr = sorted[i].bytenr;
4013 ret = __btrfs_free_extent(trans, root, bytenr, 3296 ret = btrfs_free_extent(trans, root, bytenr,
4014 blocksize, eb->start, 3297 blocksize, eb->start,
4015 root_owner, root_gen, 0, 1); 3298 root_owner, root_gen, 0, 1);
4016 BUG_ON(ret); 3299 BUG_ON(ret);
@@ -4053,7 +3336,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4053 3336
4054 WARN_ON(*level < 0); 3337 WARN_ON(*level < 0);
4055 WARN_ON(*level >= BTRFS_MAX_LEVEL); 3338 WARN_ON(*level >= BTRFS_MAX_LEVEL);
4056 ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, 3339 ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,
4057 path->nodes[*level]->len, &refs); 3340 path->nodes[*level]->len, &refs);
4058 BUG_ON(ret); 3341 BUG_ON(ret);
4059 if (refs > 1) 3342 if (refs > 1)
@@ -4104,7 +3387,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4104 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 3387 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
4105 blocksize = btrfs_level_size(root, *level - 1); 3388 blocksize = btrfs_level_size(root, *level - 1);
4106 3389
4107 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 3390 ret = drop_snap_lookup_refcount(trans, root, bytenr,
3391 blocksize, &refs);
4108 BUG_ON(ret); 3392 BUG_ON(ret);
4109 3393
4110 /* 3394 /*
@@ -4119,7 +3403,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4119 root_gen = btrfs_header_generation(parent); 3403 root_gen = btrfs_header_generation(parent);
4120 path->slots[*level]++; 3404 path->slots[*level]++;
4121 3405
4122 ret = __btrfs_free_extent(trans, root, bytenr, 3406 ret = btrfs_free_extent(trans, root, bytenr,
4123 blocksize, parent->start, 3407 blocksize, parent->start,
4124 root_owner, root_gen, 3408 root_owner, root_gen,
4125 *level - 1, 1); 3409 *level - 1, 1);
@@ -4165,7 +3449,7 @@ out:
4165 * cleanup and free the reference on the last node 3449 * cleanup and free the reference on the last node
4166 * we processed 3450 * we processed
4167 */ 3451 */
4168 ret = __btrfs_free_extent(trans, root, bytenr, blocksize, 3452 ret = btrfs_free_extent(trans, root, bytenr, blocksize,
4169 parent->start, root_owner, root_gen, 3453 parent->start, root_owner, root_gen,
4170 *level, 1); 3454 *level, 1);
4171 free_extent_buffer(path->nodes[*level]); 3455 free_extent_buffer(path->nodes[*level]);
@@ -5457,6 +4741,7 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
5457 root->root_key.objectid, 4741 root->root_key.objectid,
5458 trans->transid, key.objectid); 4742 trans->transid, key.objectid);
5459 BUG_ON(ret); 4743 BUG_ON(ret);
4744
5460 ret = btrfs_free_extent(trans, root, 4745 ret = btrfs_free_extent(trans, root,
5461 bytenr, num_bytes, leaf->start, 4746 bytenr, num_bytes, leaf->start,
5462 btrfs_header_owner(leaf), 4747 btrfs_header_owner(leaf),
@@ -5768,9 +5053,6 @@ static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
5768 ref_path, NULL, NULL); 5053 ref_path, NULL, NULL);
5769 BUG_ON(ret); 5054 BUG_ON(ret);
5770 5055
5771 if (root == root->fs_info->extent_root)
5772 btrfs_extent_post_op(trans, root);
5773
5774 return 0; 5056 return 0;
5775} 5057}
5776 5058
@@ -6208,6 +5490,9 @@ again:
6208 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); 5490 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
6209 mutex_unlock(&root->fs_info->cleaner_mutex); 5491 mutex_unlock(&root->fs_info->cleaner_mutex);
6210 5492
5493 trans = btrfs_start_transaction(info->tree_root, 1);
5494 btrfs_commit_transaction(trans, info->tree_root);
5495
6211 while (1) { 5496 while (1) {
6212 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5497 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6213 if (ret < 0) 5498 if (ret < 0)
@@ -6500,9 +5785,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
6500 sizeof(cache->item)); 5785 sizeof(cache->item));
6501 BUG_ON(ret); 5786 BUG_ON(ret);
6502 5787
6503 finish_current_insert(trans, extent_root, 0);
6504 ret = del_pending_extents(trans, extent_root, 0);
6505 BUG_ON(ret);
6506 set_avail_alloc_bits(extent_root->fs_info, type); 5788 set_avail_alloc_bits(extent_root->fs_info, type);
6507 5789
6508 return 0; 5790 return 0;