aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-03-13 10:17:05 -0400
committerChris Mason <chris.mason@oracle.com>2009-03-24 16:14:26 -0400
commitc3e69d58e86c3917ae4e9e31b4acf490a7cafe60 (patch)
treebd4f1e62446a208bdae26f0c36d67e3afbc1cd1d /fs/btrfs/extent-tree.c
parent1887be66dcc3140a81d1299958a41fc0eedfa64f (diff)
Btrfs: process the delayed reference queue in clusters
The delayed reference queue maintains pending operations that need to be done to the extent allocation tree. These are processed by finding records in the tree that are not currently being processed one at a time. This is slow because it uses lots of time searching through the rbtree and because it creates lock contention on the extent allocation tree when lots of different procs are running delayed refs at the same time. This commit changes things to grab a cluster of refs for processing, using a cursor into the rbtree as the starting point of the next search. This way we walk smoothly through the rbtree. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c149
1 files changed, 98 insertions, 51 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8471c79b0877..3b8b6c212701 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -926,52 +926,41 @@ again:
926 return NULL; 926 return NULL;
927} 927}
928 928
929/* 929static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
930 * this starts processing the delayed reference count updates and 930 struct btrfs_root *root,
931 * extent insertions we have queued up so far. count can be 931 struct list_head *cluster)
932 * 0, which means to process everything in the tree at the start
933 * of the run (but not newly added entries), or it can be some target
934 * number you'd like to process.
935 */
936int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
937 struct btrfs_root *root, unsigned long count)
938{ 932{
939 struct rb_node *node;
940 struct btrfs_delayed_ref_root *delayed_refs; 933 struct btrfs_delayed_ref_root *delayed_refs;
941 struct btrfs_delayed_ref_node *ref; 934 struct btrfs_delayed_ref_node *ref;
942 struct btrfs_delayed_ref_head *locked_ref = NULL; 935 struct btrfs_delayed_ref_head *locked_ref = NULL;
943 int ret; 936 int ret;
937 int count = 0;
944 int must_insert_reserved = 0; 938 int must_insert_reserved = 0;
945 int run_all = count == (unsigned long)-1;
946
947 if (root == root->fs_info->extent_root)
948 root = root->fs_info->tree_root;
949 939
950 delayed_refs = &trans->transaction->delayed_refs; 940 delayed_refs = &trans->transaction->delayed_refs;
951again:
952 spin_lock(&delayed_refs->lock);
953 if (count == 0)
954 count = delayed_refs->num_entries;
955 while (1) { 941 while (1) {
956 if (!locked_ref) { 942 if (!locked_ref) {
957 /* 943 /* pick a new head ref from the cluster list */
958 * no locked ref, go find something we can 944 if (list_empty(cluster))
959 * process in the rbtree. We start at
960 * the beginning of the tree, there may be less
961 * lock contention if we do something smarter here.
962 */
963 node = rb_first(&delayed_refs->root);
964 if (!node) {
965 spin_unlock(&delayed_refs->lock);
966 break; 945 break;
967 }
968 946
969 ref = rb_entry(node, struct btrfs_delayed_ref_node, 947 locked_ref = list_entry(cluster->next,
970 rb_node); 948 struct btrfs_delayed_ref_head, cluster);
971 ret = btrfs_lock_delayed_ref(trans, ref, &locked_ref); 949
972 if (ret) { 950 /* grab the lock that says we are going to process
973 spin_unlock(&delayed_refs->lock); 951 * all the refs for this head */
974 break; 952 ret = btrfs_delayed_ref_lock(trans, locked_ref);
953
954 /*
955 * we may have dropped the spin lock to get the head
956 * mutex lock, and that might have given someone else
957 * time to free the head. If that's true, it has been
958 * removed from our list and we can move on.
959 */
960 if (ret == -EAGAIN) {
961 locked_ref = NULL;
962 count++;
963 continue;
975 } 964 }
976 } 965 }
977 966
@@ -986,7 +975,6 @@ again:
986 * locked_ref is the head node, so we have to go one 975 * locked_ref is the head node, so we have to go one
987 * node back for any delayed ref updates 976 * node back for any delayed ref updates
988 */ 977 */
989
990 ref = select_delayed_ref(locked_ref); 978 ref = select_delayed_ref(locked_ref);
991 if (!ref) { 979 if (!ref) {
992 /* All delayed refs have been processed, Go ahead 980 /* All delayed refs have been processed, Go ahead
@@ -994,6 +982,7 @@ again:
994 * so that any accounting fixes can happen 982 * so that any accounting fixes can happen
995 */ 983 */
996 ref = &locked_ref->node; 984 ref = &locked_ref->node;
985 list_del_init(&locked_ref->cluster);
997 locked_ref = NULL; 986 locked_ref = NULL;
998 } 987 }
999 988
@@ -1007,30 +996,72 @@ again:
1007 BUG_ON(ret); 996 BUG_ON(ret);
1008 btrfs_put_delayed_ref(ref); 997 btrfs_put_delayed_ref(ref);
1009 998
1010 /* once we lock the head ref, we have to process all the 999 count++;
1011 * entries for it. So, we might end up doing more entries 1000 cond_resched();
1012 * that count was asking us to do. 1001 spin_lock(&delayed_refs->lock);
1013 */ 1002 }
1014 if (count > 0) 1003 return count;
1015 count--; 1004}
1005
1006/*
1007 * this starts processing the delayed reference count updates and
1008 * extent insertions we have queued up so far. count can be
1009 * 0, which means to process everything in the tree at the start
1010 * of the run (but not newly added entries), or it can be some target
1011 * number you'd like to process.
1012 */
1013int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1014 struct btrfs_root *root, unsigned long count)
1015{
1016 struct rb_node *node;
1017 struct btrfs_delayed_ref_root *delayed_refs;
1018 struct btrfs_delayed_ref_node *ref;
1019 struct list_head cluster;
1020 int ret;
1021 int run_all = count == (unsigned long)-1;
1022 int run_most = 0;
1023
1024 if (root == root->fs_info->extent_root)
1025 root = root->fs_info->tree_root;
1026
1027 delayed_refs = &trans->transaction->delayed_refs;
1028 INIT_LIST_HEAD(&cluster);
1029again:
1030 spin_lock(&delayed_refs->lock);
1031 if (count == 0) {
1032 count = delayed_refs->num_entries * 2;
1033 run_most = 1;
1034 }
1035 while (1) {
1036 if (!(run_all || run_most) &&
1037 delayed_refs->num_heads_ready < 64)
1038 break;
1016 1039
1017 /* 1040 /*
1018 * we set locked_ref to null above if we're all done 1041 * go find something we can process in the rbtree. We start at
1019 * with this bytenr 1042 * the beginning of the tree, and then build a cluster
1043 * of refs to process starting at the first one we are able to
1044 * lock
1020 */ 1045 */
1021 if (!locked_ref && count == 0) 1046 ret = btrfs_find_ref_cluster(trans, &cluster,
1047 delayed_refs->run_delayed_start);
1048 if (ret)
1022 break; 1049 break;
1023 1050
1024 cond_resched(); 1051 ret = run_clustered_refs(trans, root, &cluster);
1025 spin_lock(&delayed_refs->lock); 1052 BUG_ON(ret < 0);
1053
1054 count -= min_t(unsigned long, ret, count);
1055
1056 if (count == 0)
1057 break;
1026 } 1058 }
1059
1027 if (run_all) { 1060 if (run_all) {
1028 spin_lock(&delayed_refs->lock);
1029 node = rb_first(&delayed_refs->root); 1061 node = rb_first(&delayed_refs->root);
1030 if (!node) { 1062 if (!node)
1031 spin_unlock(&delayed_refs->lock);
1032 goto out; 1063 goto out;
1033 } 1064 count = (unsigned long)-1;
1034 1065
1035 while (node) { 1066 while (node) {
1036 ref = rb_entry(node, struct btrfs_delayed_ref_node, 1067 ref = rb_entry(node, struct btrfs_delayed_ref_node,
@@ -1052,11 +1083,11 @@ again:
1052 node = rb_next(node); 1083 node = rb_next(node);
1053 } 1084 }
1054 spin_unlock(&delayed_refs->lock); 1085 spin_unlock(&delayed_refs->lock);
1055 count = (unsigned long)-1;
1056 schedule_timeout(1); 1086 schedule_timeout(1);
1057 goto again; 1087 goto again;
1058 } 1088 }
1059out: 1089out:
1090 spin_unlock(&delayed_refs->lock);
1060 return 0; 1091 return 0;
1061} 1092}
1062 1093
@@ -2407,12 +2438,18 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
2407 */ 2438 */
2408 head->node.in_tree = 0; 2439 head->node.in_tree = 0;
2409 rb_erase(&head->node.rb_node, &delayed_refs->root); 2440 rb_erase(&head->node.rb_node, &delayed_refs->root);
2441
2410 delayed_refs->num_entries--; 2442 delayed_refs->num_entries--;
2411 2443
2412 /* 2444 /*
2413 * we don't take a ref on the node because we're removing it from the 2445 * we don't take a ref on the node because we're removing it from the
2414 * tree, so we just steal the ref the tree was holding. 2446 * tree, so we just steal the ref the tree was holding.
2415 */ 2447 */
2448 delayed_refs->num_heads--;
2449 if (list_empty(&head->cluster))
2450 delayed_refs->num_heads_ready--;
2451
2452 list_del_init(&head->cluster);
2416 spin_unlock(&delayed_refs->lock); 2453 spin_unlock(&delayed_refs->lock);
2417 2454
2418 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 2455 ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
@@ -3705,6 +3742,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3705 struct btrfs_path *path; 3742 struct btrfs_path *path;
3706 int i; 3743 int i;
3707 int orig_level; 3744 int orig_level;
3745 int update_count;
3708 struct btrfs_root_item *root_item = &root->root_item; 3746 struct btrfs_root_item *root_item = &root->root_item;
3709 3747
3710 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); 3748 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
@@ -3746,6 +3784,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3746 } 3784 }
3747 } 3785 }
3748 while (1) { 3786 while (1) {
3787 unsigned long update;
3749 wret = walk_down_tree(trans, root, path, &level); 3788 wret = walk_down_tree(trans, root, path, &level);
3750 if (wret > 0) 3789 if (wret > 0)
3751 break; 3790 break;
@@ -3764,6 +3803,14 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3764 } 3803 }
3765 atomic_inc(&root->fs_info->throttle_gen); 3804 atomic_inc(&root->fs_info->throttle_gen);
3766 wake_up(&root->fs_info->transaction_throttle); 3805 wake_up(&root->fs_info->transaction_throttle);
3806 for (update_count = 0; update_count < 16; update_count++) {
3807 update = trans->delayed_ref_updates;
3808 trans->delayed_ref_updates = 0;
3809 if (update)
3810 btrfs_run_delayed_refs(trans, root, update);
3811 else
3812 break;
3813 }
3767 } 3814 }
3768 for (i = 0; i <= orig_level; i++) { 3815 for (i = 0; i <= orig_level; i++) {
3769 if (path->nodes[i]) { 3816 if (path->nodes[i]) {