aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@redhat.com>2008-11-12 14:19:50 -0500
committerChris Mason <chris.mason@oracle.com>2008-11-12 14:19:50 -0500
commitf3465ca44e2a51fd647c167045768a8ab5a96603 (patch)
tree3d08ed21a29374321469d4f43391fec7f34d8214 /fs/btrfs/extent-tree.c
parentc5c9cd4d1b827fe545ed2a945e91e3a6909f3886 (diff)
Btrfs: batch extent inserts/updates/deletions on the extent root
While profiling the allocator I noticed a good amount of time was being spent in finish_current_insert and del_pending_extents, and as the filesystem filled up more and more time was being spent in those functions. This patch aims to try and reduce that problem. This happens two ways 1) track if we tried to delete an extent that we are going to update or insert. Once we get into finish_current_insert we discard any of the extents that were marked for deletion. This saves us from doing unnecessary work almost every time finish_current_insert runs. 2) Batch insertion/updates/deletions. Instead of doing a btrfs_search_slot for each individual extent and doing the needed operation, we instead keep the leaf around and see if there is anything else we can do on that leaf. On the insert case I introduced a btrfs_insert_some_items, which will take an array of keys with an array of data_sizes and try and squeeze in as many of those keys as possible, and then return how many keys it was able to insert. In the update case we search for an extent ref, update the ref and then loop through the leaf to see if any of the other refs we are looking to update are on that leaf, and then once we are done we release the path and search for the next ref we need to update. And finally for the deletion we try and delete the extent+ref in pairs, so we will try to find extent+ref pairs next to the extent we are trying to free and free them in bulk if possible. This along with the other cluster fix that Chris pushed out a bit ago helps make the allocator preform more uniformly as it fills up the disk. There is still a slight drop as we fill up the disk since we start having to stick new blocks in odd places which results in more COW's than on a empty fs, but the drop is not nearly as severe as it was before. Signed-off-by: Josef Bacik <jbacik@redhat.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c827
1 files changed, 734 insertions, 93 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 22820f91d2b0..e785f0a0632b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -42,6 +42,8 @@ struct pending_extent_op {
42 u64 generation; 42 u64 generation;
43 u64 orig_generation; 43 u64 orig_generation;
44 int level; 44 int level;
45 struct list_head list;
46 int del;
45}; 47};
46 48
47static int finish_current_insert(struct btrfs_trans_handle *trans, struct 49static int finish_current_insert(struct btrfs_trans_handle *trans, struct
@@ -52,6 +54,13 @@ static struct btrfs_block_group_cache *
52__btrfs_find_block_group(struct btrfs_root *root, 54__btrfs_find_block_group(struct btrfs_root *root,
53 struct btrfs_block_group_cache *hint, 55 struct btrfs_block_group_cache *hint,
54 u64 search_start, int data, int owner); 56 u64 search_start, int data, int owner);
57static int pin_down_bytes(struct btrfs_trans_handle *trans,
58 struct btrfs_root *root,
59 u64 bytenr, u64 num_bytes, int is_data);
60static int update_block_group(struct btrfs_trans_handle *trans,
61 struct btrfs_root *root,
62 u64 bytenr, u64 num_bytes, int alloc,
63 int mark_free);
55 64
56static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 65static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
57{ 66{
@@ -559,6 +568,251 @@ out:
559 return ret; 568 return ret;
560} 569}
561 570
571/*
572 * updates all the backrefs that are pending on update_list for the
573 * extent_root
574 */
575static int noinline update_backrefs(struct btrfs_trans_handle *trans,
576 struct btrfs_root *extent_root,
577 struct btrfs_path *path,
578 struct list_head *update_list)
579{
580 struct btrfs_key key;
581 struct btrfs_extent_ref *ref;
582 struct btrfs_fs_info *info = extent_root->fs_info;
583 struct pending_extent_op *op;
584 struct extent_buffer *leaf;
585 int ret = 0;
586 struct list_head *cur = update_list->next;
587 u64 ref_objectid;
588 u64 ref_root = extent_root->root_key.objectid;
589
590 op = list_entry(cur, struct pending_extent_op, list);
591
592search:
593 key.objectid = op->bytenr;
594 key.type = BTRFS_EXTENT_REF_KEY;
595 key.offset = op->orig_parent;
596
597 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
598 BUG_ON(ret);
599
600 leaf = path->nodes[0];
601
602loop:
603 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
604
605 ref_objectid = btrfs_ref_objectid(leaf, ref);
606
607 if (btrfs_ref_root(leaf, ref) != ref_root ||
608 btrfs_ref_generation(leaf, ref) != op->orig_generation ||
609 (ref_objectid != op->level &&
610 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
611 printk(KERN_ERR "couldn't find %Lu, parent %Lu, root %Lu, "
612 "owner %u\n", op->bytenr, op->orig_parent,
613 ref_root, op->level);
614 btrfs_print_leaf(extent_root, leaf);
615 BUG();
616 }
617
618 key.objectid = op->bytenr;
619 key.offset = op->parent;
620 key.type = BTRFS_EXTENT_REF_KEY;
621 ret = btrfs_set_item_key_safe(trans, extent_root, path, &key);
622 BUG_ON(ret);
623 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
624 btrfs_set_ref_generation(leaf, ref, op->generation);
625
626 cur = cur->next;
627
628 list_del_init(&op->list);
629 unlock_extent(&info->extent_ins, op->bytenr,
630 op->bytenr + op->num_bytes - 1, GFP_NOFS);
631 kfree(op);
632
633 if (cur == update_list) {
634 btrfs_mark_buffer_dirty(path->nodes[0]);
635 btrfs_release_path(extent_root, path);
636 goto out;
637 }
638
639 op = list_entry(cur, struct pending_extent_op, list);
640
641 path->slots[0]++;
642 while (path->slots[0] < btrfs_header_nritems(leaf)) {
643 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
644 if (key.objectid == op->bytenr &&
645 key.type == BTRFS_EXTENT_REF_KEY)
646 goto loop;
647 path->slots[0]++;
648 }
649
650 btrfs_mark_buffer_dirty(path->nodes[0]);
651 btrfs_release_path(extent_root, path);
652 goto search;
653
654out:
655 return 0;
656}
657
658static int noinline insert_extents(struct btrfs_trans_handle *trans,
659 struct btrfs_root *extent_root,
660 struct btrfs_path *path,
661 struct list_head *insert_list, int nr)
662{
663 struct btrfs_key *keys;
664 u32 *data_size;
665 struct pending_extent_op *op;
666 struct extent_buffer *leaf;
667 struct list_head *cur = insert_list->next;
668 struct btrfs_fs_info *info = extent_root->fs_info;
669 u64 ref_root = extent_root->root_key.objectid;
670 int i = 0, last = 0, ret;
671 int total = nr * 2;
672
673 if (!nr)
674 return 0;
675
676 keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS);
677 if (!keys)
678 return -ENOMEM;
679
680 data_size = kzalloc(total * sizeof(u32), GFP_NOFS);
681 if (!data_size) {
682 kfree(keys);
683 return -ENOMEM;
684 }
685
686 list_for_each_entry(op, insert_list, list) {
687 keys[i].objectid = op->bytenr;
688 keys[i].offset = op->num_bytes;
689 keys[i].type = BTRFS_EXTENT_ITEM_KEY;
690 data_size[i] = sizeof(struct btrfs_extent_item);
691 i++;
692
693 keys[i].objectid = op->bytenr;
694 keys[i].offset = op->parent;
695 keys[i].type = BTRFS_EXTENT_REF_KEY;
696 data_size[i] = sizeof(struct btrfs_extent_ref);
697 i++;
698 }
699
700 op = list_entry(cur, struct pending_extent_op, list);
701 i = 0;
702 while (i < total) {
703 int c;
704 ret = btrfs_insert_some_items(trans, extent_root, path,
705 keys+i, data_size+i, total-i);
706 BUG_ON(ret < 0);
707
708 if (last && ret > 1)
709 BUG();
710
711 leaf = path->nodes[0];
712 for (c = 0; c < ret; c++) {
713 int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY;
714
715 /*
716 * if the first item we inserted was a backref, then
717 * the EXTENT_ITEM will be the odd c's, else it will
718 * be the even c's
719 */
720 if ((ref_first && (c % 2)) ||
721 (!ref_first && !(c % 2))) {
722 struct btrfs_extent_item *itm;
723
724 itm = btrfs_item_ptr(leaf, path->slots[0] + c,
725 struct btrfs_extent_item);
726 btrfs_set_extent_refs(path->nodes[0], itm, 1);
727 op->del++;
728 } else {
729 struct btrfs_extent_ref *ref;
730
731 ref = btrfs_item_ptr(leaf, path->slots[0] + c,
732 struct btrfs_extent_ref);
733 btrfs_set_ref_root(leaf, ref, ref_root);
734 btrfs_set_ref_generation(leaf, ref,
735 op->generation);
736 btrfs_set_ref_objectid(leaf, ref, op->level);
737 btrfs_set_ref_num_refs(leaf, ref, 1);
738 op->del++;
739 }
740
741 /*
742 * using del to see when its ok to free up the
743 * pending_extent_op. In the case where we insert the
744 * last item on the list in order to help do batching
745 * we need to not free the extent op until we actually
746 * insert the extent_item
747 */
748 if (op->del == 2) {
749 unlock_extent(&info->extent_ins, op->bytenr,
750 op->bytenr + op->num_bytes - 1,
751 GFP_NOFS);
752 cur = cur->next;
753 list_del_init(&op->list);
754 kfree(op);
755 if (cur != insert_list)
756 op = list_entry(cur,
757 struct pending_extent_op,
758 list);
759 }
760 }
761 btrfs_mark_buffer_dirty(leaf);
762 btrfs_release_path(extent_root, path);
763
764 /*
765 * Ok backref's and items usually go right next to eachother,
766 * but if we could only insert 1 item that means that we
767 * inserted on the end of a leaf, and we have no idea what may
768 * be on the next leaf so we just play it safe. In order to
769 * try and help this case we insert the last thing on our
770 * insert list so hopefully it will end up being the last
771 * thing on the leaf and everything else will be before it,
772 * which will let us insert a whole bunch of items at the same
773 * time.
774 */
775 if (ret == 1 && !last && (i + ret < total)) {
776 /*
777 * last: where we will pick up the next time around
778 * i: our current key to insert, will be total - 1
779 * cur: the current op we are screwing with
780 * op: duh
781 */
782 last = i + ret;
783 i = total - 1;
784 cur = insert_list->prev;
785 op = list_entry(cur, struct pending_extent_op, list);
786 } else if (last) {
787 /*
788 * ok we successfully inserted the last item on the
789 * list, lets reset everything
790 *
791 * i: our current key to insert, so where we left off
792 * last time
793 * last: done with this
794 * cur: the op we are messing with
795 * op: duh
796 * total: since we inserted the last key, we need to
797 * decrement total so we dont overflow
798 */
799 i = last;
800 last = 0;
801 cur = insert_list->next;
802 op = list_entry(cur, struct pending_extent_op, list);
803 total--;
804 } else {
805 i += ret;
806 }
807
808 cond_resched();
809 }
810 ret = 0;
811 kfree(keys);
812 kfree(data_size);
813 return ret;
814}
815
562static int noinline insert_extent_backref(struct btrfs_trans_handle *trans, 816static int noinline insert_extent_backref(struct btrfs_trans_handle *trans,
563 struct btrfs_root *root, 817 struct btrfs_root *root,
564 struct btrfs_path *path, 818 struct btrfs_path *path,
@@ -642,6 +896,267 @@ static int noinline remove_extent_backref(struct btrfs_trans_handle *trans,
642 return ret; 896 return ret;
643} 897}
644 898
899static int noinline free_extents(struct btrfs_trans_handle *trans,
900 struct btrfs_root *extent_root,
901 struct list_head *del_list)
902{
903 struct btrfs_fs_info *info = extent_root->fs_info;
904 struct btrfs_path *path;
905 struct btrfs_key key, found_key;
906 struct extent_buffer *leaf;
907 struct list_head *cur;
908 struct pending_extent_op *op;
909 struct btrfs_extent_item *ei;
910 int ret, num_to_del, extent_slot = 0, found_extent = 0;
911 u32 refs;
912 u64 bytes_freed = 0;
913
914 path = btrfs_alloc_path();
915 if (!path)
916 return -ENOMEM;
917 path->reada = 1;
918
919search:
920 /* search for the backref for the current ref we want to delete */
921 cur = del_list->next;
922 op = list_entry(cur, struct pending_extent_op, list);
923 ret = lookup_extent_backref(trans, extent_root, path, op->bytenr,
924 op->orig_parent,
925 extent_root->root_key.objectid,
926 op->orig_generation, op->level, 1);
927 if (ret) {
928 printk("Unable to find backref byte nr %Lu root %Lu gen %Lu "
929 "owner %u\n", op->bytenr,
930 extent_root->root_key.objectid, op->orig_generation,
931 op->level);
932 btrfs_print_leaf(extent_root, path->nodes[0]);
933 WARN_ON(1);
934 goto out;
935 }
936
937 extent_slot = path->slots[0];
938 num_to_del = 1;
939 found_extent = 0;
940
941 /*
942 * if we aren't the first item on the leaf we can move back one and see
943 * if our ref is right next to our extent item
944 */
945 if (likely(extent_slot)) {
946 extent_slot--;
947 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
948 extent_slot);
949 if (found_key.objectid == op->bytenr &&
950 found_key.type == BTRFS_EXTENT_ITEM_KEY &&
951 found_key.offset == op->num_bytes) {
952 num_to_del++;
953 found_extent = 1;
954 }
955 }
956
957 /*
958 * if we didn't find the extent we need to delete the backref and then
959 * search for the extent item key so we can update its ref count
960 */
961 if (!found_extent) {
962 key.objectid = op->bytenr;
963 key.type = BTRFS_EXTENT_ITEM_KEY;
964 key.offset = op->num_bytes;
965
966 ret = remove_extent_backref(trans, extent_root, path);
967 BUG_ON(ret);
968 btrfs_release_path(extent_root, path);
969 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
970 BUG_ON(ret);
971 extent_slot = path->slots[0];
972 }
973
974 /* this is where we update the ref count for the extent */
975 leaf = path->nodes[0];
976 ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item);
977 refs = btrfs_extent_refs(leaf, ei);
978 BUG_ON(refs == 0);
979 refs--;
980 btrfs_set_extent_refs(leaf, ei, refs);
981
982 btrfs_mark_buffer_dirty(leaf);
983
984 /*
985 * This extent needs deleting. The reason cur_slot is extent_slot +
986 * num_to_del is because extent_slot points to the slot where the extent
987 * is, and if the backref was not right next to the extent we will be
988 * deleting at least 1 item, and will want to start searching at the
989 * slot directly next to extent_slot. However if we did find the
990 * backref next to the extent item them we will be deleting at least 2
991 * items and will want to start searching directly after the ref slot
992 */
993 if (!refs) {
994 struct list_head *pos, *n, *end;
995 int cur_slot = extent_slot+num_to_del;
996 u64 super_used;
997 u64 root_used;
998
999 path->slots[0] = extent_slot;
1000 bytes_freed = op->num_bytes;
1001
1002 /*
1003 * we need to see if we can delete multiple things at once, so
1004 * start looping through the list of extents we are wanting to
1005 * delete and see if their extent/backref's are right next to
1006 * eachother and the extents only have 1 ref
1007 */
1008 for (pos = cur->next; pos != del_list; pos = pos->next) {
1009 struct pending_extent_op *tmp;
1010
1011 tmp = list_entry(pos, struct pending_extent_op, list);
1012
1013 /* we only want to delete extent+ref at this stage */
1014 if (cur_slot >= btrfs_header_nritems(leaf) - 1)
1015 break;
1016
1017 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot);
1018 if (found_key.objectid != tmp->bytenr ||
1019 found_key.type != BTRFS_EXTENT_ITEM_KEY ||
1020 found_key.offset != tmp->num_bytes)
1021 break;
1022
1023 /* check to make sure this extent only has one ref */
1024 ei = btrfs_item_ptr(leaf, cur_slot,
1025 struct btrfs_extent_item);
1026 if (btrfs_extent_refs(leaf, ei) != 1)
1027 break;
1028
1029 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1);
1030 if (found_key.objectid != tmp->bytenr ||
1031 found_key.type != BTRFS_EXTENT_REF_KEY ||
1032 found_key.offset != tmp->orig_parent)
1033 break;
1034
1035 /*
1036 * the ref is right next to the extent, we can set the
1037 * ref count to 0 since we will delete them both now
1038 */
1039 btrfs_set_extent_refs(leaf, ei, 0);
1040
1041 /* pin down the bytes for this extent */
1042 mutex_lock(&info->pinned_mutex);
1043 ret = pin_down_bytes(trans, extent_root, tmp->bytenr,
1044 tmp->num_bytes, tmp->level >=
1045 BTRFS_FIRST_FREE_OBJECTID);
1046 mutex_unlock(&info->pinned_mutex);
1047 BUG_ON(ret < 0);
1048
1049 /*
1050 * use the del field to tell if we need to go ahead and
1051 * free up the extent when we delete the item or not.
1052 */
1053 tmp->del = ret;
1054 bytes_freed += tmp->num_bytes;
1055
1056 num_to_del += 2;
1057 cur_slot += 2;
1058 }
1059 end = pos;
1060
1061 /* update the free space counters */
1062 spin_lock_irq(&info->delalloc_lock);
1063 super_used = btrfs_super_bytes_used(&info->super_copy);
1064 btrfs_set_super_bytes_used(&info->super_copy,
1065 super_used - bytes_freed);
1066 spin_unlock_irq(&info->delalloc_lock);
1067
1068 root_used = btrfs_root_used(&extent_root->root_item);
1069 btrfs_set_root_used(&extent_root->root_item,
1070 root_used - bytes_freed);
1071
1072 /* delete the items */
1073 ret = btrfs_del_items(trans, extent_root, path,
1074 path->slots[0], num_to_del);
1075 BUG_ON(ret);
1076
1077 /*
1078 * loop through the extents we deleted and do the cleanup work
1079 * on them
1080 */
1081 for (pos = cur, n = pos->next; pos != end;
1082 pos = n, n = pos->next) {
1083 struct pending_extent_op *tmp;
1084#ifdef BIO_RW_DISCARD
1085 u64 map_length;
1086 struct btrfs_multi_bio *multi = NULL;
1087#endif
1088 tmp = list_entry(pos, struct pending_extent_op, list);
1089
1090 /*
1091 * remember tmp->del tells us wether or not we pinned
1092 * down the extent
1093 */
1094 ret = update_block_group(trans, extent_root,
1095 tmp->bytenr, tmp->num_bytes, 0,
1096 tmp->del);
1097 BUG_ON(ret);
1098
1099#ifdef BIO_RW_DISCARD
1100 ret = btrfs_map_block(&info->mapping_tree, READ,
1101 tmp->bytenr, &map_length, &multi,
1102 0);
1103 if (!ret) {
1104 struct btrfs_bio_stripe *stripe;
1105 int i;
1106
1107 stripe = multi->stripe;
1108
1109 if (map_length > tmp->num_bytes)
1110 map_length = tmp->num_bytes;
1111
1112 for (i = 0; i < multi->num_stripes;
1113 i++, stripe++)
1114 blkdev_issue_discard(stripe->dev->bdev,
1115 stripe->physical >> 9,
1116 map_length >> 9);
1117 kfree(multi);
1118 }
1119#endif
1120 list_del_init(&tmp->list);
1121 unlock_extent(&info->extent_ins, tmp->bytenr,
1122 tmp->bytenr + tmp->num_bytes - 1,
1123 GFP_NOFS);
1124 kfree(tmp);
1125 }
1126 } else if (refs && found_extent) {
1127 /*
1128 * the ref and extent were right next to eachother, but the
1129 * extent still has a ref, so just free the backref and keep
1130 * going
1131 */
1132 ret = remove_extent_backref(trans, extent_root, path);
1133 BUG_ON(ret);
1134
1135 list_del_init(&op->list);
1136 unlock_extent(&info->extent_ins, op->bytenr,
1137 op->bytenr + op->num_bytes - 1, GFP_NOFS);
1138 kfree(op);
1139 } else {
1140 /*
1141 * the extent has multiple refs and the backref we were looking
1142 * for was not right next to it, so just unlock and go next,
1143 * we're good to go
1144 */
1145 list_del_init(&op->list);
1146 unlock_extent(&info->extent_ins, op->bytenr,
1147 op->bytenr + op->num_bytes - 1, GFP_NOFS);
1148 kfree(op);
1149 }
1150
1151 btrfs_release_path(extent_root, path);
1152 if (!list_empty(del_list))
1153 goto search;
1154
1155out:
1156 btrfs_free_path(path);
1157 return ret;
1158}
1159
645static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 1160static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
646 struct btrfs_root *root, u64 bytenr, 1161 struct btrfs_root *root, u64 bytenr,
647 u64 orig_parent, u64 parent, 1162 u64 orig_parent, u64 parent,
@@ -685,6 +1200,8 @@ static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
685 extent_op->generation = ref_generation; 1200 extent_op->generation = ref_generation;
686 extent_op->orig_generation = orig_generation; 1201 extent_op->orig_generation = orig_generation;
687 extent_op->level = (int)owner_objectid; 1202 extent_op->level = (int)owner_objectid;
1203 INIT_LIST_HEAD(&extent_op->list);
1204 extent_op->del = 0;
688 1205
689 set_extent_bits(&root->fs_info->extent_ins, 1206 set_extent_bits(&root->fs_info->extent_ins,
690 bytenr, bytenr + num_bytes - 1, 1207 bytenr, bytenr + num_bytes - 1,
@@ -1426,9 +1943,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
1426 1943
1427 while(total) { 1944 while(total) {
1428 cache = btrfs_lookup_block_group(info, bytenr); 1945 cache = btrfs_lookup_block_group(info, bytenr);
1429 if (!cache) { 1946 if (!cache)
1430 return -1; 1947 return -1;
1431 }
1432 byte_in_group = bytenr - cache->key.objectid; 1948 byte_in_group = bytenr - cache->key.objectid;
1433 WARN_ON(byte_in_group > cache->key.offset); 1949 WARN_ON(byte_in_group > cache->key.offset);
1434 1950
@@ -1605,102 +2121,176 @@ static int finish_current_insert(struct btrfs_trans_handle *trans,
1605 u64 end; 2121 u64 end;
1606 u64 priv; 2122 u64 priv;
1607 u64 search = 0; 2123 u64 search = 0;
2124 u64 skipped = 0;
1608 struct btrfs_fs_info *info = extent_root->fs_info; 2125 struct btrfs_fs_info *info = extent_root->fs_info;
1609 struct btrfs_path *path; 2126 struct btrfs_path *path;
1610 struct btrfs_extent_ref *ref; 2127 struct pending_extent_op *extent_op, *tmp;
1611 struct pending_extent_op *extent_op; 2128 struct list_head insert_list, update_list;
1612 struct btrfs_key key;
1613 struct btrfs_extent_item extent_item;
1614 int ret; 2129 int ret;
1615 int err = 0; 2130 int num_inserts = 0, max_inserts;
1616 2131
1617 btrfs_set_stack_extent_refs(&extent_item, 1);
1618 path = btrfs_alloc_path(); 2132 path = btrfs_alloc_path();
2133 INIT_LIST_HEAD(&insert_list);
2134 INIT_LIST_HEAD(&update_list);
1619 2135
1620 while(1) { 2136 max_inserts = extent_root->leafsize /
1621 mutex_lock(&info->extent_ins_mutex); 2137 (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) +
2138 sizeof(struct btrfs_extent_ref) +
2139 sizeof(struct btrfs_extent_item));
2140again:
2141 mutex_lock(&info->extent_ins_mutex);
2142 while (1) {
1622 ret = find_first_extent_bit(&info->extent_ins, search, &start, 2143 ret = find_first_extent_bit(&info->extent_ins, search, &start,
1623 &end, EXTENT_WRITEBACK); 2144 &end, EXTENT_WRITEBACK);
1624 if (ret) { 2145 if (ret) {
1625 mutex_unlock(&info->extent_ins_mutex); 2146 if (skipped && all && !num_inserts) {
1626 if (search && all) { 2147 skipped = 0;
1627 search = 0;
1628 continue; 2148 continue;
1629 } 2149 }
2150 mutex_unlock(&info->extent_ins_mutex);
1630 break; 2151 break;
1631 } 2152 }
1632 2153
1633 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); 2154 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
1634 if (!ret) { 2155 if (!ret) {
2156 skipped = 1;
1635 search = end + 1; 2157 search = end + 1;
1636 mutex_unlock(&info->extent_ins_mutex); 2158 if (need_resched()) {
1637 cond_resched(); 2159 mutex_unlock(&info->extent_ins_mutex);
2160 cond_resched();
2161 mutex_lock(&info->extent_ins_mutex);
2162 }
1638 continue; 2163 continue;
1639 } 2164 }
1640 BUG_ON(ret < 0);
1641 2165
1642 ret = get_state_private(&info->extent_ins, start, &priv); 2166 ret = get_state_private(&info->extent_ins, start, &priv);
1643 BUG_ON(ret); 2167 BUG_ON(ret);
1644 extent_op = (struct pending_extent_op *)(unsigned long)priv; 2168 extent_op = (struct pending_extent_op *)(unsigned long) priv;
1645
1646 mutex_unlock(&info->extent_ins_mutex);
1647 2169
1648 if (extent_op->type == PENDING_EXTENT_INSERT) { 2170 if (extent_op->type == PENDING_EXTENT_INSERT) {
1649 key.objectid = start; 2171 num_inserts++;
1650 key.offset = end + 1 - start; 2172 list_add_tail(&extent_op->list, &insert_list);
1651 key.type = BTRFS_EXTENT_ITEM_KEY; 2173 search = end + 1;
1652 err = btrfs_insert_item(trans, extent_root, &key, 2174 if (num_inserts == max_inserts) {
1653 &extent_item, sizeof(extent_item)); 2175 mutex_unlock(&info->extent_ins_mutex);
1654 BUG_ON(err); 2176 break;
2177 }
2178 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
2179 list_add_tail(&extent_op->list, &update_list);
2180 search = end + 1;
2181 } else {
2182 BUG();
2183 }
2184 }
1655 2185
1656 mutex_lock(&info->extent_ins_mutex); 2186 /*
1657 clear_extent_bits(&info->extent_ins, start, end, 2187 * process teh update list, clear the writeback bit for it, and if
1658 EXTENT_WRITEBACK, GFP_NOFS); 2188 * somebody marked this thing for deletion then just unlock it and be
1659 mutex_unlock(&info->extent_ins_mutex); 2189 * done, the free_extents will handle it
2190 */
2191 mutex_lock(&info->extent_ins_mutex);
2192 list_for_each_entry_safe(extent_op, tmp, &update_list, list) {
2193 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2194 extent_op->bytenr + extent_op->num_bytes - 1,
2195 EXTENT_WRITEBACK, GFP_NOFS);
2196 if (extent_op->del) {
2197 list_del_init(&extent_op->list);
2198 unlock_extent(&info->extent_ins, extent_op->bytenr,
2199 extent_op->bytenr + extent_op->num_bytes
2200 - 1, GFP_NOFS);
2201 kfree(extent_op);
2202 }
2203 }
2204 mutex_unlock(&info->extent_ins_mutex);
1660 2205
1661 err = insert_extent_backref(trans, extent_root, path, 2206 /*
1662 start, extent_op->parent, 2207 * still have things left on the update list, go ahead an update
1663 extent_root->root_key.objectid, 2208 * everything
1664 extent_op->generation, 2209 */
1665 extent_op->level); 2210 if (!list_empty(&update_list)) {
1666 BUG_ON(err); 2211 ret = update_backrefs(trans, extent_root, path, &update_list);
1667 } else if (extent_op->type == PENDING_BACKREF_UPDATE) { 2212 BUG_ON(ret);
1668 err = lookup_extent_backref(trans, extent_root, path, 2213 }
1669 start, extent_op->orig_parent,
1670 extent_root->root_key.objectid,
1671 extent_op->orig_generation,
1672 extent_op->level, 0);
1673 BUG_ON(err);
1674 2214
1675 mutex_lock(&info->extent_ins_mutex); 2215 /*
1676 clear_extent_bits(&info->extent_ins, start, end, 2216 * if no inserts need to be done, but we skipped some extents and we
1677 EXTENT_WRITEBACK, GFP_NOFS); 2217 * need to make sure everything is cleaned then reset everything and
1678 mutex_unlock(&info->extent_ins_mutex); 2218 * go back to the beginning
2219 */
2220 if (!num_inserts && all && skipped) {
2221 search = 0;
2222 skipped = 0;
2223 INIT_LIST_HEAD(&update_list);
2224 INIT_LIST_HEAD(&insert_list);
2225 goto again;
2226 } else if (!num_inserts) {
2227 goto out;
2228 }
1679 2229
1680 key.objectid = start; 2230 /*
1681 key.offset = extent_op->parent; 2231 * process the insert extents list. Again if we are deleting this
1682 key.type = BTRFS_EXTENT_REF_KEY; 2232 * extent, then just unlock it, pin down the bytes if need be, and be
1683 err = btrfs_set_item_key_safe(trans, extent_root, path, 2233 * done with it. Saves us from having to actually insert the extent
1684 &key); 2234 * into the tree and then subsequently come along and delete it
1685 BUG_ON(err); 2235 */
1686 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 2236 mutex_lock(&info->extent_ins_mutex);
1687 struct btrfs_extent_ref); 2237 list_for_each_entry_safe(extent_op, tmp, &insert_list, list) {
1688 btrfs_set_ref_generation(path->nodes[0], ref, 2238 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
1689 extent_op->generation); 2239 extent_op->bytenr + extent_op->num_bytes - 1,
1690 btrfs_mark_buffer_dirty(path->nodes[0]); 2240 EXTENT_WRITEBACK, GFP_NOFS);
1691 btrfs_release_path(extent_root, path); 2241 if (extent_op->del) {
1692 } else { 2242 list_del_init(&extent_op->list);
1693 BUG_ON(1); 2243 unlock_extent(&info->extent_ins, extent_op->bytenr,
2244 extent_op->bytenr + extent_op->num_bytes
2245 - 1, GFP_NOFS);
2246
2247 mutex_lock(&extent_root->fs_info->pinned_mutex);
2248 ret = pin_down_bytes(trans, extent_root,
2249 extent_op->bytenr,
2250 extent_op->num_bytes, 0);
2251 mutex_unlock(&extent_root->fs_info->pinned_mutex);
2252
2253 ret = update_block_group(trans, extent_root,
2254 extent_op->bytenr,
2255 extent_op->num_bytes,
2256 0, ret > 0);
2257 BUG_ON(ret);
2258 kfree(extent_op);
2259 num_inserts--;
1694 } 2260 }
1695 kfree(extent_op); 2261 }
1696 unlock_extent(&info->extent_ins, start, end, GFP_NOFS); 2262 mutex_unlock(&info->extent_ins_mutex);
1697 if (all)
1698 search = 0;
1699 else
1700 search = end + 1;
1701 2263
1702 cond_resched(); 2264 ret = insert_extents(trans, extent_root, path, &insert_list,
2265 num_inserts);
2266 BUG_ON(ret);
2267
2268 /*
2269 * if we broke out of the loop in order to insert stuff because we hit
2270 * the maximum number of inserts at a time we can handle, then loop
2271 * back and pick up where we left off
2272 */
2273 if (num_inserts == max_inserts) {
2274 INIT_LIST_HEAD(&insert_list);
2275 INIT_LIST_HEAD(&update_list);
2276 num_inserts = 0;
2277 goto again;
1703 } 2278 }
2279
2280 /*
2281 * again, if we need to make absolutely sure there are no more pending
2282 * extent operations left and we know that we skipped some, go back to
2283 * the beginning and do it all again
2284 */
2285 if (all && skipped) {
2286 INIT_LIST_HEAD(&insert_list);
2287 INIT_LIST_HEAD(&update_list);
2288 search = 0;
2289 skipped = 0;
2290 num_inserts = 0;
2291 goto again;
2292 }
2293out:
1704 btrfs_free_path(path); 2294 btrfs_free_path(path);
1705 return 0; 2295 return 0;
1706} 2296}
@@ -1802,6 +2392,12 @@ static int __free_extent(struct btrfs_trans_handle *trans,
1802 btrfs_release_path(extent_root, path); 2392 btrfs_release_path(extent_root, path);
1803 ret = btrfs_search_slot(trans, extent_root, 2393 ret = btrfs_search_slot(trans, extent_root,
1804 &key, path, -1, 1); 2394 &key, path, -1, 1);
2395 if (ret) {
2396 printk(KERN_ERR "umm, got %d back from search"
2397 ", was looking for %Lu\n", ret,
2398 bytenr);
2399 btrfs_print_leaf(extent_root, path->nodes[0]);
2400 }
1805 BUG_ON(ret); 2401 BUG_ON(ret);
1806 extent_slot = path->slots[0]; 2402 extent_slot = path->slots[0];
1807 } 2403 }
@@ -1921,32 +2517,42 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1921 u64 end; 2517 u64 end;
1922 u64 priv; 2518 u64 priv;
1923 u64 search = 0; 2519 u64 search = 0;
2520 int nr = 0, skipped = 0;
1924 struct extent_io_tree *pending_del; 2521 struct extent_io_tree *pending_del;
1925 struct extent_io_tree *extent_ins; 2522 struct extent_io_tree *extent_ins;
1926 struct pending_extent_op *extent_op; 2523 struct pending_extent_op *extent_op;
1927 struct btrfs_fs_info *info = extent_root->fs_info; 2524 struct btrfs_fs_info *info = extent_root->fs_info;
2525 struct list_head delete_list;
1928 2526
2527 INIT_LIST_HEAD(&delete_list);
1929 extent_ins = &extent_root->fs_info->extent_ins; 2528 extent_ins = &extent_root->fs_info->extent_ins;
1930 pending_del = &extent_root->fs_info->pending_del; 2529 pending_del = &extent_root->fs_info->pending_del;
1931 2530
2531again:
2532 mutex_lock(&info->extent_ins_mutex);
1932 while(1) { 2533 while(1) {
1933 mutex_lock(&info->extent_ins_mutex);
1934 ret = find_first_extent_bit(pending_del, search, &start, &end, 2534 ret = find_first_extent_bit(pending_del, search, &start, &end,
1935 EXTENT_WRITEBACK); 2535 EXTENT_WRITEBACK);
1936 if (ret) { 2536 if (ret) {
1937 mutex_unlock(&info->extent_ins_mutex); 2537 if (all && skipped && !nr) {
1938 if (all && search) {
1939 search = 0; 2538 search = 0;
1940 continue; 2539 continue;
1941 } 2540 }
2541 mutex_unlock(&info->extent_ins_mutex);
1942 break; 2542 break;
1943 } 2543 }
1944 2544
1945 ret = try_lock_extent(extent_ins, start, end, GFP_NOFS); 2545 ret = try_lock_extent(extent_ins, start, end, GFP_NOFS);
1946 if (!ret) { 2546 if (!ret) {
1947 search = end+1; 2547 search = end+1;
1948 mutex_unlock(&info->extent_ins_mutex); 2548 skipped = 1;
1949 cond_resched(); 2549
2550 if (need_resched()) {
2551 mutex_unlock(&info->extent_ins_mutex);
2552 cond_resched();
2553 mutex_lock(&info->extent_ins_mutex);
2554 }
2555
1950 continue; 2556 continue;
1951 } 2557 }
1952 BUG_ON(ret < 0); 2558 BUG_ON(ret < 0);
@@ -1959,15 +2565,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1959 GFP_NOFS); 2565 GFP_NOFS);
1960 if (!test_range_bit(extent_ins, start, end, 2566 if (!test_range_bit(extent_ins, start, end,
1961 EXTENT_WRITEBACK, 0)) { 2567 EXTENT_WRITEBACK, 0)) {
1962 mutex_unlock(&info->extent_ins_mutex); 2568 list_add_tail(&extent_op->list, &delete_list);
1963free_extent: 2569 nr++;
1964 ret = __free_extent(trans, extent_root,
1965 start, end + 1 - start,
1966 extent_op->orig_parent,
1967 extent_root->root_key.objectid,
1968 extent_op->orig_generation,
1969 extent_op->level, 1, 0);
1970 kfree(extent_op);
1971 } else { 2570 } else {
1972 kfree(extent_op); 2571 kfree(extent_op);
1973 2572
@@ -1980,10 +2579,12 @@ free_extent:
1980 clear_extent_bits(&info->extent_ins, start, end, 2579 clear_extent_bits(&info->extent_ins, start, end,
1981 EXTENT_WRITEBACK, GFP_NOFS); 2580 EXTENT_WRITEBACK, GFP_NOFS);
1982 2581
1983 mutex_unlock(&info->extent_ins_mutex); 2582 if (extent_op->type == PENDING_BACKREF_UPDATE) {
1984 2583 list_add_tail(&extent_op->list, &delete_list);
1985 if (extent_op->type == PENDING_BACKREF_UPDATE) 2584 search = end + 1;
1986 goto free_extent; 2585 nr++;
2586 continue;
2587 }
1987 2588
1988 mutex_lock(&extent_root->fs_info->pinned_mutex); 2589 mutex_lock(&extent_root->fs_info->pinned_mutex);
1989 ret = pin_down_bytes(trans, extent_root, start, 2590 ret = pin_down_bytes(trans, extent_root, start,
@@ -1993,19 +2594,34 @@ free_extent:
1993 ret = update_block_group(trans, extent_root, start, 2594 ret = update_block_group(trans, extent_root, start,
1994 end + 1 - start, 0, ret > 0); 2595 end + 1 - start, 0, ret > 0);
1995 2596
2597 unlock_extent(extent_ins, start, end, GFP_NOFS);
1996 BUG_ON(ret); 2598 BUG_ON(ret);
1997 kfree(extent_op); 2599 kfree(extent_op);
1998 } 2600 }
1999 if (ret) 2601 if (ret)
2000 err = ret; 2602 err = ret;
2001 unlock_extent(extent_ins, start, end, GFP_NOFS);
2002 2603
2003 if (all) 2604 search = end + 1;
2004 search = 0; 2605
2005 else 2606 if (need_resched()) {
2006 search = end + 1; 2607 mutex_unlock(&info->extent_ins_mutex);
2007 cond_resched(); 2608 cond_resched();
2609 mutex_lock(&info->extent_ins_mutex);
2610 }
2008 } 2611 }
2612
2613 if (nr) {
2614 ret = free_extents(trans, extent_root, &delete_list);
2615 BUG_ON(ret);
2616 }
2617
2618 if (all && skipped) {
2619 INIT_LIST_HEAD(&delete_list);
2620 search = 0;
2621 nr = 0;
2622 goto again;
2623 }
2624
2009 return err; 2625 return err;
2010} 2626}
2011 2627
@@ -2024,7 +2640,29 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2024 2640
2025 WARN_ON(num_bytes < root->sectorsize); 2641 WARN_ON(num_bytes < root->sectorsize);
2026 if (root == extent_root) { 2642 if (root == extent_root) {
2027 struct pending_extent_op *extent_op; 2643 struct pending_extent_op *extent_op = NULL;
2644
2645 mutex_lock(&root->fs_info->extent_ins_mutex);
2646 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
2647 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
2648 u64 priv;
2649 ret = get_state_private(&root->fs_info->extent_ins,
2650 bytenr, &priv);
2651 BUG_ON(ret);
2652 extent_op = (struct pending_extent_op *)
2653 (unsigned long)priv;
2654
2655 extent_op->del = 1;
2656 if (extent_op->type == PENDING_EXTENT_INSERT) {
2657 mutex_unlock(&root->fs_info->extent_ins_mutex);
2658 return 0;
2659 }
2660 }
2661
2662 if (extent_op) {
2663 ref_generation = extent_op->orig_generation;
2664 parent = extent_op->orig_parent;
2665 }
2028 2666
2029 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2667 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2030 BUG_ON(!extent_op); 2668 BUG_ON(!extent_op);
@@ -2037,8 +2675,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2037 extent_op->generation = ref_generation; 2675 extent_op->generation = ref_generation;
2038 extent_op->orig_generation = ref_generation; 2676 extent_op->orig_generation = ref_generation;
2039 extent_op->level = (int)owner_objectid; 2677 extent_op->level = (int)owner_objectid;
2678 INIT_LIST_HEAD(&extent_op->list);
2679 extent_op->del = 0;
2040 2680
2041 mutex_lock(&root->fs_info->extent_ins_mutex);
2042 set_extent_bits(&root->fs_info->pending_del, 2681 set_extent_bits(&root->fs_info->pending_del,
2043 bytenr, bytenr + num_bytes - 1, 2682 bytenr, bytenr + num_bytes - 1,
2044 EXTENT_WRITEBACK, GFP_NOFS); 2683 EXTENT_WRITEBACK, GFP_NOFS);
@@ -2515,6 +3154,8 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2515 extent_op->generation = ref_generation; 3154 extent_op->generation = ref_generation;
2516 extent_op->orig_generation = 0; 3155 extent_op->orig_generation = 0;
2517 extent_op->level = (int)owner; 3156 extent_op->level = (int)owner;
3157 INIT_LIST_HEAD(&extent_op->list);
3158 extent_op->del = 0;
2518 3159
2519 mutex_lock(&root->fs_info->extent_ins_mutex); 3160 mutex_lock(&root->fs_info->extent_ins_mutex);
2520 set_extent_bits(&root->fs_info->extent_ins, ins->objectid, 3161 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,