aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-03-13 10:17:05 -0400
committerChris Mason <chris.mason@oracle.com>2009-03-24 16:14:26 -0400
commitc3e69d58e86c3917ae4e9e31b4acf490a7cafe60 (patch)
treebd4f1e62446a208bdae26f0c36d67e3afbc1cd1d /fs
parent1887be66dcc3140a81d1299958a41fc0eedfa64f (diff)
Btrfs: process the delayed reference queue in clusters
The delayed reference queue maintains pending operations that need to be done to the extent allocation tree. These are processed by finding records in the tree that are not currently being processed one at a time. This is slow because it uses lots of time searching through the rbtree and because it creates lock contention on the extent allocation tree when lots of different procs are running delayed refs at the same time. This commit changes things to grab a cluster of refs for processing, using a cursor into the rbtree as the starting point of the next search. This way we walk smoothly through the rbtree. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/delayed-ref.c130
-rw-r--r--fs/btrfs/delayed-ref.h17
-rw-r--r--fs/btrfs/disk-io.c17
-rw-r--r--fs/btrfs/extent-tree.c149
-rw-r--r--fs/btrfs/transaction.c25
6 files changed, 226 insertions, 113 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ced5fd85dc36..08d9f8d15538 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -720,6 +720,7 @@ struct btrfs_fs_info {
720 struct mutex drop_mutex; 720 struct mutex drop_mutex;
721 struct mutex volume_mutex; 721 struct mutex volume_mutex;
722 struct mutex tree_reloc_mutex; 722 struct mutex tree_reloc_mutex;
723
723 struct list_head trans_list; 724 struct list_head trans_list;
724 struct list_head hashers; 725 struct list_head hashers;
725 struct list_head dead_roots; 726 struct list_head dead_roots;
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 3e7eeaf86408..759fa247ced8 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -93,7 +93,8 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
93 * ref if it was able to find one, or NULL if nothing was in that spot 93 * ref if it was able to find one, or NULL if nothing was in that spot
94 */ 94 */
95static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, 95static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
96 u64 bytenr, u64 parent) 96 u64 bytenr, u64 parent,
97 struct btrfs_delayed_ref_node **last)
97{ 98{
98 struct rb_node *n = root->rb_node; 99 struct rb_node *n = root->rb_node;
99 struct btrfs_delayed_ref_node *entry; 100 struct btrfs_delayed_ref_node *entry;
@@ -102,6 +103,8 @@ static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
102 while (n) { 103 while (n) {
103 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); 104 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
104 WARN_ON(!entry->in_tree); 105 WARN_ON(!entry->in_tree);
106 if (last)
107 *last = entry;
105 108
106 cmp = comp_entry(entry, bytenr, parent); 109 cmp = comp_entry(entry, bytenr, parent);
107 if (cmp < 0) 110 if (cmp < 0)
@@ -114,45 +117,99 @@ static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
114 return NULL; 117 return NULL;
115} 118}
116 119
117/* 120int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
118 * Locking on delayed refs is done by taking a lock on the head node, 121 struct btrfs_delayed_ref_head *head)
119 * which has the (impossible) parent id of (u64)-1. Once a lock is held
120 * on the head node, you're allowed (and required) to process all the
121 * delayed refs for a given byte number in the tree.
122 *
123 * This will walk forward in the rbtree until it finds a head node it
124 * is able to lock. It might not lock the delayed ref you asked for,
125 * and so it will return the one it did lock in next_ret and return 0.
126 *
127 * If no locks are taken, next_ret is set to null and 1 is returned. This
128 * means there are no more unlocked head nodes in the rbtree.
129 */
130int btrfs_lock_delayed_ref(struct btrfs_trans_handle *trans,
131 struct btrfs_delayed_ref_node *ref,
132 struct btrfs_delayed_ref_head **next_ret)
133{ 122{
123 struct btrfs_delayed_ref_root *delayed_refs;
124
125 delayed_refs = &trans->transaction->delayed_refs;
126 assert_spin_locked(&delayed_refs->lock);
127 if (mutex_trylock(&head->mutex))
128 return 0;
129
130 atomic_inc(&head->node.refs);
131 spin_unlock(&delayed_refs->lock);
132
133 mutex_lock(&head->mutex);
134 spin_lock(&delayed_refs->lock);
135 if (!head->node.in_tree) {
136 mutex_unlock(&head->mutex);
137 btrfs_put_delayed_ref(&head->node);
138 return -EAGAIN;
139 }
140 btrfs_put_delayed_ref(&head->node);
141 return 0;
142}
143
144int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
145 struct list_head *cluster, u64 start)
146{
147 int count = 0;
148 struct btrfs_delayed_ref_root *delayed_refs;
134 struct rb_node *node; 149 struct rb_node *node;
150 struct btrfs_delayed_ref_node *ref;
135 struct btrfs_delayed_ref_head *head; 151 struct btrfs_delayed_ref_head *head;
136 int ret = 0;
137 152
138 while (1) { 153 delayed_refs = &trans->transaction->delayed_refs;
154 if (start == 0) {
155 node = rb_first(&delayed_refs->root);
156 } else {
157 ref = NULL;
158 tree_search(&delayed_refs->root, start, (u64)-1, &ref);
159 if (ref) {
160 struct btrfs_delayed_ref_node *tmp;
161
162 node = rb_prev(&ref->rb_node);
163 while (node) {
164 tmp = rb_entry(node,
165 struct btrfs_delayed_ref_node,
166 rb_node);
167 if (tmp->bytenr < start)
168 break;
169 ref = tmp;
170 node = rb_prev(&ref->rb_node);
171 }
172 node = &ref->rb_node;
173 } else
174 node = rb_first(&delayed_refs->root);
175 }
176again:
177 while (node && count < 32) {
178 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
139 if (btrfs_delayed_ref_is_head(ref)) { 179 if (btrfs_delayed_ref_is_head(ref)) {
140 head = btrfs_delayed_node_to_head(ref); 180 head = btrfs_delayed_node_to_head(ref);
141 if (mutex_trylock(&head->mutex)) { 181 if (list_empty(&head->cluster)) {
142 *next_ret = head; 182 list_add_tail(&head->cluster, cluster);
143 ret = 0; 183 delayed_refs->run_delayed_start =
184 head->node.bytenr;
185 count++;
186
187 WARN_ON(delayed_refs->num_heads_ready == 0);
188 delayed_refs->num_heads_ready--;
189 } else if (count) {
190 /* the goal of the clustering is to find extents
191 * that are likely to end up in the same extent
192 * leaf on disk. So, we don't want them spread
193 * all over the tree. Stop now if we've hit
194 * a head that was already in use
195 */
144 break; 196 break;
145 } 197 }
146 } 198 }
147 node = rb_next(&ref->rb_node); 199 node = rb_next(node);
148 if (!node) {
149 ret = 1;
150 *next_ret = NULL;
151 break;
152 }
153 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
154 } 200 }
155 return ret; 201 if (count) {
202 return 0;
203 } else if (start) {
204 /*
205 * we've gone to the end of the rbtree without finding any
206 * clusters. start from the beginning and try again
207 */
208 start = 0;
209 node = rb_first(&delayed_refs->root);
210 goto again;
211 }
212 return 1;
156} 213}
157 214
158/* 215/*
@@ -178,7 +235,7 @@ int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
178 delayed_refs = &trans->transaction->delayed_refs; 235 delayed_refs = &trans->transaction->delayed_refs;
179 spin_lock(&delayed_refs->lock); 236 spin_lock(&delayed_refs->lock);
180 237
181 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1); 238 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
182 if (ref) { 239 if (ref) {
183 prev_node = rb_prev(&ref->rb_node); 240 prev_node = rb_prev(&ref->rb_node);
184 if (!prev_node) 241 if (!prev_node)
@@ -240,7 +297,7 @@ again:
240 } 297 }
241 298
242 spin_lock(&delayed_refs->lock); 299 spin_lock(&delayed_refs->lock);
243 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1); 300 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
244 if (ref) { 301 if (ref) {
245 head = btrfs_delayed_node_to_head(ref); 302 head = btrfs_delayed_node_to_head(ref);
246 if (mutex_trylock(&head->mutex)) { 303 if (mutex_trylock(&head->mutex)) {
@@ -384,7 +441,7 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
384{ 441{
385 struct btrfs_delayed_ref_node *existing; 442 struct btrfs_delayed_ref_node *existing;
386 struct btrfs_delayed_ref *full_ref; 443 struct btrfs_delayed_ref *full_ref;
387 struct btrfs_delayed_ref_head *head_ref; 444 struct btrfs_delayed_ref_head *head_ref = NULL;
388 struct btrfs_delayed_ref_root *delayed_refs; 445 struct btrfs_delayed_ref_root *delayed_refs;
389 int count_mod = 1; 446 int count_mod = 1;
390 int must_insert_reserved = 0; 447 int must_insert_reserved = 0;
@@ -428,6 +485,7 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
428 if (btrfs_delayed_ref_is_head(ref)) { 485 if (btrfs_delayed_ref_is_head(ref)) {
429 head_ref = btrfs_delayed_node_to_head(ref); 486 head_ref = btrfs_delayed_node_to_head(ref);
430 head_ref->must_insert_reserved = must_insert_reserved; 487 head_ref->must_insert_reserved = must_insert_reserved;
488 INIT_LIST_HEAD(&head_ref->cluster);
431 mutex_init(&head_ref->mutex); 489 mutex_init(&head_ref->mutex);
432 } else { 490 } else {
433 full_ref = btrfs_delayed_node_to_ref(ref); 491 full_ref = btrfs_delayed_node_to_ref(ref);
@@ -453,6 +511,10 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
453 */ 511 */
454 kfree(ref); 512 kfree(ref);
455 } else { 513 } else {
514 if (btrfs_delayed_ref_is_head(ref)) {
515 delayed_refs->num_heads++;
516 delayed_refs->num_heads_ready++;
517 }
456 delayed_refs->num_entries++; 518 delayed_refs->num_entries++;
457 trans->delayed_ref_updates++; 519 trans->delayed_ref_updates++;
458 } 520 }
@@ -522,7 +584,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
522 struct btrfs_delayed_ref_root *delayed_refs; 584 struct btrfs_delayed_ref_root *delayed_refs;
523 585
524 delayed_refs = &trans->transaction->delayed_refs; 586 delayed_refs = &trans->transaction->delayed_refs;
525 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1); 587 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
526 if (ref) 588 if (ref)
527 return btrfs_delayed_node_to_head(ref); 589 return btrfs_delayed_node_to_head(ref);
528 return NULL; 590 return NULL;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index c345fee9f96b..57153fcc347b 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -68,6 +68,8 @@ struct btrfs_delayed_ref_head {
68 */ 68 */
69 struct mutex mutex; 69 struct mutex mutex;
70 70
71 struct list_head cluster;
72
71 /* 73 /*
72 * when a new extent is allocated, it is just reserved in memory 74 * when a new extent is allocated, it is just reserved in memory
73 * The actual extent isn't inserted into the extent allocation tree 75 * The actual extent isn't inserted into the extent allocation tree
@@ -115,12 +117,20 @@ struct btrfs_delayed_ref_root {
115 */ 117 */
116 unsigned long num_entries; 118 unsigned long num_entries;
117 119
120 /* total number of head nodes in tree */
121 unsigned long num_heads;
122
123 /* total number of head nodes ready for processing */
124 unsigned long num_heads_ready;
125
118 /* 126 /*
119 * set when the tree is flushing before a transaction commit, 127 * set when the tree is flushing before a transaction commit,
120 * used by the throttling code to decide if new updates need 128 * used by the throttling code to decide if new updates need
121 * to be run right away 129 * to be run right away
122 */ 130 */
123 int flushing; 131 int flushing;
132
133 u64 run_delayed_start;
124}; 134};
125 135
126static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) 136static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
@@ -140,9 +150,6 @@ int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
140struct btrfs_delayed_ref_head * 150struct btrfs_delayed_ref_head *
141btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); 151btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
142int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); 152int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
143int btrfs_lock_delayed_ref(struct btrfs_trans_handle *trans,
144 struct btrfs_delayed_ref_node *ref,
145 struct btrfs_delayed_ref_head **next_ret);
146int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 153int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
147 struct btrfs_root *root, u64 bytenr, 154 struct btrfs_root *root, u64 bytenr,
148 u64 num_bytes, u32 *refs); 155 u64 num_bytes, u32 *refs);
@@ -151,6 +158,10 @@ int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
151 u64 parent, u64 orig_ref_root, u64 ref_root, 158 u64 parent, u64 orig_ref_root, u64 ref_root,
152 u64 orig_ref_generation, u64 ref_generation, 159 u64 orig_ref_generation, u64 ref_generation,
153 u64 owner_objectid, int pin); 160 u64 owner_objectid, int pin);
161int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
162 struct btrfs_delayed_ref_head *head);
163int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
164 struct list_head *cluster, u64 search_start);
154/* 165/*
155 * a node might live in a head or a regular ref, this lets you 166 * a node might live in a head or a regular ref, this lets you
156 * test for the proper type to use. 167 * test for the proper type to use.
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4f43e227a297..1f1d89b18818 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1458,7 +1458,6 @@ static int transaction_kthread(void *arg)
1458 struct btrfs_root *root = arg; 1458 struct btrfs_root *root = arg;
1459 struct btrfs_trans_handle *trans; 1459 struct btrfs_trans_handle *trans;
1460 struct btrfs_transaction *cur; 1460 struct btrfs_transaction *cur;
1461 struct btrfs_fs_info *info = root->fs_info;
1462 unsigned long now; 1461 unsigned long now;
1463 unsigned long delay; 1462 unsigned long delay;
1464 int ret; 1463 int ret;
@@ -1481,24 +1480,8 @@ static int transaction_kthread(void *arg)
1481 1480
1482 now = get_seconds(); 1481 now = get_seconds();
1483 if (now < cur->start_time || now - cur->start_time < 30) { 1482 if (now < cur->start_time || now - cur->start_time < 30) {
1484 unsigned long num_delayed;
1485 num_delayed = cur->delayed_refs.num_entries;
1486 mutex_unlock(&root->fs_info->trans_mutex); 1483 mutex_unlock(&root->fs_info->trans_mutex);
1487 delay = HZ * 5; 1484 delay = HZ * 5;
1488
1489 /*
1490 * we may have been woken up early to start
1491 * processing the delayed extent ref updates
1492 * If so, run some of them and then loop around again
1493 * to see if we need to force a commit
1494 */
1495 if (num_delayed > 64) {
1496 mutex_unlock(&info->transaction_kthread_mutex);
1497 trans = btrfs_start_transaction(root, 1);
1498 btrfs_run_delayed_refs(trans, root, 256);
1499 btrfs_end_transaction(trans, root);
1500 continue;
1501 }
1502 goto sleep; 1485 goto sleep;
1503 } 1486 }
1504 mutex_unlock(&root->fs_info->trans_mutex); 1487 mutex_unlock(&root->fs_info->trans_mutex);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8471c79b0877..3b8b6c212701 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -926,52 +926,41 @@ again:
926 return NULL; 926 return NULL;
927} 927}
928 928
929/* 929static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
930 * this starts processing the delayed reference count updates and 930 struct btrfs_root *root,
931 * extent insertions we have queued up so far. count can be 931 struct list_head *cluster)
932 * 0, which means to process everything in the tree at the start
933 * of the run (but not newly added entries), or it can be some target
934 * number you'd like to process.
935 */
936int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
937 struct btrfs_root *root, unsigned long count)
938{ 932{
939 struct rb_node *node;
940 struct btrfs_delayed_ref_root *delayed_refs; 933 struct btrfs_delayed_ref_root *delayed_refs;
941 struct btrfs_delayed_ref_node *ref; 934 struct btrfs_delayed_ref_node *ref;
942 struct btrfs_delayed_ref_head *locked_ref = NULL; 935 struct btrfs_delayed_ref_head *locked_ref = NULL;
943 int ret; 936 int ret;
937 int count = 0;
944 int must_insert_reserved = 0; 938 int must_insert_reserved = 0;
945 int run_all = count == (unsigned long)-1;
946
947 if (root == root->fs_info->extent_root)
948 root = root->fs_info->tree_root;
949 939
950 delayed_refs = &trans->transaction->delayed_refs; 940 delayed_refs = &trans->transaction->delayed_refs;
951again:
952 spin_lock(&delayed_refs->lock);
953 if (count == 0)
954 count = delayed_refs->num_entries;
955 while (1) { 941 while (1) {
956 if (!locked_ref) { 942 if (!locked_ref) {
957 /* 943 /* pick a new head ref from the cluster list */
958 * no locked ref, go find something we can 944 if (list_empty(cluster))
959 * process in the rbtree. We start at
960 * the beginning of the tree, there may be less
961 * lock contention if we do something smarter here.
962 */
963 node = rb_first(&delayed_refs->root);
964 if (!node) {
965 spin_unlock(&delayed_refs->lock);
966 break; 945 break;
967 }
968 946
969 ref = rb_entry(node, struct btrfs_delayed_ref_node, 947 locked_ref = list_entry(cluster->next,
970 rb_node); 948 struct btrfs_delayed_ref_head, cluster);
971 ret = btrfs_lock_delayed_ref(trans, ref, &locked_ref); 949
972 if (ret) { 950 /* grab the lock that says we are going to process
973 spin_unlock(&delayed_refs->lock); 951 * all the refs for this head */
974 break; 952 ret = btrfs_delayed_ref_lock(trans, locked_ref);
953
954 /*
955 * we may have dropped the spin lock to get the head
956 * mutex lock, and that might have given someone else
957 * time to free the head. If that's true, it has been
958 * removed from our list and we can move on.
959 */
960 if (ret == -EAGAIN) {
961 locked_ref = NULL;
962 count++;
963 continue;
975 } 964 }
976 } 965 }
977 966
@@ -986,7 +975,6 @@ again:
986 * locked_ref is the head node, so we have to go one 975 * locked_ref is the head node, so we have to go one
987 * node back for any delayed ref updates 976 * node back for any delayed ref updates
988 */ 977 */
989
990 ref = select_delayed_ref(locked_ref); 978 ref = select_delayed_ref(locked_ref);
991 if (!ref) { 979 if (!ref) {
992 /* All delayed refs have been processed, Go ahead 980 /* All delayed refs have been processed, Go ahead
@@ -994,6 +982,7 @@ again:
994 * so that any accounting fixes can happen 982 * so that any accounting fixes can happen
995 */ 983 */
996 ref = &locked_ref->node; 984 ref = &locked_ref->node;
985 list_del_init(&locked_ref->cluster);
997 locked_ref = NULL; 986 locked_ref = NULL;
998 } 987 }
999 988
@@ -1007,30 +996,72 @@ again:
1007 BUG_ON(ret); 996 BUG_ON(ret);
1008 btrfs_put_delayed_ref(ref); 997 btrfs_put_delayed_ref(ref);
1009 998
1010 /* once we lock the head ref, we have to process all the 999 count++;
1011 * entries for it. So, we might end up doing more entries 1000 cond_resched();
1012 * that count was asking us to do. 1001 spin_lock(&delayed_refs->lock);
1013 */ 1002 }
1014 if (count > 0) 1003 return count;
1015 count--; 1004}
1005
1006/*
1007 * this starts processing the delayed reference count updates and
1008 * extent insertions we have queued up so far. count can be
1009 * 0, which means to process everything in the tree at the start
1010 * of the run (but not newly added entries), or it can be some target
1011 * number you'd like to process.
1012 */
1013int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1014 struct btrfs_root *root, unsigned long count)
1015{
1016 struct rb_node *node;
1017 struct btrfs_delayed_ref_root *delayed_refs;
1018 struct btrfs_delayed_ref_node *ref;
1019 struct list_head cluster;
1020 int ret;
1021 int run_all = count == (unsigned long)-1;
1022 int run_most = 0;
1023
1024 if (root == root->fs_info->extent_root)
1025 root = root->fs_info->tree_root;
1026
1027 delayed_refs = &trans->transaction->delayed_refs;
1028 INIT_LIST_HEAD(&cluster);
1029again:
1030 spin_lock(&delayed_refs->lock);
1031 if (count == 0) {
1032 count = delayed_refs->num_entries * 2;
1033 run_most = 1;
1034 }
1035 while (1) {
1036 if (!(run_all || run_most) &&
1037 delayed_refs->num_heads_ready < 64)
1038 break;
1016 1039
1017 /* 1040 /*
1018 * we set locked_ref to null above if we're all done 1041 * go find something we can process in the rbtree. We start at
1019 * with this bytenr 1042 * the beginning of the tree, and then build a cluster
1043 * of refs to process starting at the first one we are able to
1044 * lock
1020 */ 1045 */
1021 if (!locked_ref && count == 0) 1046 ret = btrfs_find_ref_cluster(trans, &cluster,
1047 delayed_refs->run_delayed_start);
1048 if (ret)
1022 break; 1049 break;
1023 1050
1024 cond_resched(); 1051 ret = run_clustered_refs(trans, root, &cluster);
1025 spin_lock(&delayed_refs->lock); 1052 BUG_ON(ret < 0);
1053
1054 count -= min_t(unsigned long, ret, count);
1055
1056 if (count == 0)
1057 break;
1026 } 1058 }
1059
1027 if (run_all) { 1060 if (run_all) {
1028 spin_lock(&delayed_refs->lock);
1029 node = rb_first(&delayed_refs->root); 1061 node = rb_first(&delayed_refs->root);
1030 if (!node) { 1062 if (!node)
1031 spin_unlock(&delayed_refs->lock);
1032 goto out; 1063 goto out;
1033 } 1064 count = (unsigned long)-1;
1034 1065
1035 while (node) { 1066 while (node) {
1036 ref = rb_entry(node, struct btrfs_delayed_ref_node, 1067 ref = rb_entry(node, struct btrfs_delayed_ref_node,
@@ -1052,11 +1083,11 @@ again:
1052 node = rb_next(node); 1083 node = rb_next(node);
1053 } 1084 }
1054 spin_unlock(&delayed_refs->lock); 1085 spin_unlock(&delayed_refs->lock);
1055 count = (unsigned long)-1;
1056 schedule_timeout(1); 1086 schedule_timeout(1);
1057 goto again; 1087 goto again;
1058 } 1088 }
1059out: 1089out:
1090 spin_unlock(&delayed_refs->lock);
1060 return 0; 1091 return 0;
1061} 1092}
1062 1093
@@ -2407,12 +2438,18 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
2407 */ 2438 */
2408 head->node.in_tree = 0; 2439 head->node.in_tree = 0;
2409 rb_erase(&head->node.rb_node, &delayed_refs->root); 2440 rb_erase(&head->node.rb_node, &delayed_refs->root);
2441
2410 delayed_refs->num_entries--; 2442 delayed_refs->num_entries--;
2411 2443
2412 /* 2444 /*
2413 * we don't take a ref on the node because we're removing it from the 2445 * we don't take a ref on the node because we're removing it from the
2414 * tree, so we just steal the ref the tree was holding. 2446 * tree, so we just steal the ref the tree was holding.
2415 */ 2447 */
2448 delayed_refs->num_heads--;
2449 if (list_empty(&head->cluster))
2450 delayed_refs->num_heads_ready--;
2451
2452 list_del_init(&head->cluster);
2416 spin_unlock(&delayed_refs->lock); 2453 spin_unlock(&delayed_refs->lock);
2417 2454
2418 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 2455 ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
@@ -3705,6 +3742,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3705 struct btrfs_path *path; 3742 struct btrfs_path *path;
3706 int i; 3743 int i;
3707 int orig_level; 3744 int orig_level;
3745 int update_count;
3708 struct btrfs_root_item *root_item = &root->root_item; 3746 struct btrfs_root_item *root_item = &root->root_item;
3709 3747
3710 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); 3748 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
@@ -3746,6 +3784,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3746 } 3784 }
3747 } 3785 }
3748 while (1) { 3786 while (1) {
3787 unsigned long update;
3749 wret = walk_down_tree(trans, root, path, &level); 3788 wret = walk_down_tree(trans, root, path, &level);
3750 if (wret > 0) 3789 if (wret > 0)
3751 break; 3790 break;
@@ -3764,6 +3803,14 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3764 } 3803 }
3765 atomic_inc(&root->fs_info->throttle_gen); 3804 atomic_inc(&root->fs_info->throttle_gen);
3766 wake_up(&root->fs_info->transaction_throttle); 3805 wake_up(&root->fs_info->transaction_throttle);
3806 for (update_count = 0; update_count < 16; update_count++) {
3807 update = trans->delayed_ref_updates;
3808 trans->delayed_ref_updates = 0;
3809 if (update)
3810 btrfs_run_delayed_refs(trans, root, update);
3811 else
3812 break;
3813 }
3767 } 3814 }
3768 for (i = 0; i <= orig_level; i++) { 3815 for (i = 0; i <= orig_level; i++) {
3769 if (path->nodes[i]) { 3816 if (path->nodes[i]) {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f94c2ad8996c..903edab3659a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -68,7 +68,10 @@ static noinline int join_transaction(struct btrfs_root *root)
68 68
69 cur_trans->delayed_refs.root.rb_node = NULL; 69 cur_trans->delayed_refs.root.rb_node = NULL;
70 cur_trans->delayed_refs.num_entries = 0; 70 cur_trans->delayed_refs.num_entries = 0;
71 cur_trans->delayed_refs.num_heads_ready = 0;
72 cur_trans->delayed_refs.num_heads = 0;
71 cur_trans->delayed_refs.flushing = 0; 73 cur_trans->delayed_refs.flushing = 0;
74 cur_trans->delayed_refs.run_delayed_start = 0;
72 spin_lock_init(&cur_trans->delayed_refs.lock); 75 spin_lock_init(&cur_trans->delayed_refs.lock);
73 76
74 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 77 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
@@ -287,13 +290,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
287{ 290{
288 struct btrfs_transaction *cur_trans; 291 struct btrfs_transaction *cur_trans;
289 struct btrfs_fs_info *info = root->fs_info; 292 struct btrfs_fs_info *info = root->fs_info;
290 293 int count = 0;
291 if (trans->delayed_ref_updates && 294
292 (trans->transaction->delayed_refs.flushing || 295 while (count < 4) {
293 trans->transaction->delayed_refs.num_entries > 16384)) { 296 unsigned long cur = trans->delayed_ref_updates;
294 btrfs_run_delayed_refs(trans, root, trans->delayed_ref_updates); 297 trans->delayed_ref_updates = 0;
295 } else if (trans->transaction->delayed_refs.num_entries > 64) { 298 if (cur &&
296 wake_up_process(root->fs_info->transaction_kthread); 299 trans->transaction->delayed_refs.num_heads_ready > 64) {
300 trans->delayed_ref_updates = 0;
301 btrfs_run_delayed_refs(trans, root, cur);
302 } else {
303 break;
304 }
305 count++;
297 } 306 }
298 307
299 mutex_lock(&info->trans_mutex); 308 mutex_lock(&info->trans_mutex);
@@ -929,7 +938,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
929 */ 938 */
930 trans->transaction->delayed_refs.flushing = 1; 939 trans->transaction->delayed_refs.flushing = 1;
931 940
932 ret = btrfs_run_delayed_refs(trans, root, (u64)-1); 941 ret = btrfs_run_delayed_refs(trans, root, 0);
933 BUG_ON(ret); 942 BUG_ON(ret);
934 943
935 INIT_LIST_HEAD(&dirty_fs_roots); 944 INIT_LIST_HEAD(&dirty_fs_roots);