aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/delayed-ref.c130
-rw-r--r--fs/btrfs/delayed-ref.h17
-rw-r--r--fs/btrfs/disk-io.c17
-rw-r--r--fs/btrfs/extent-tree.c149
-rw-r--r--fs/btrfs/transaction.c25
6 files changed, 226 insertions, 113 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ced5fd85dc36..08d9f8d15538 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -720,6 +720,7 @@ struct btrfs_fs_info {
720 struct mutex drop_mutex; 720 struct mutex drop_mutex;
721 struct mutex volume_mutex; 721 struct mutex volume_mutex;
722 struct mutex tree_reloc_mutex; 722 struct mutex tree_reloc_mutex;
723
723 struct list_head trans_list; 724 struct list_head trans_list;
724 struct list_head hashers; 725 struct list_head hashers;
725 struct list_head dead_roots; 726 struct list_head dead_roots;
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 3e7eeaf86408..759fa247ced8 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -93,7 +93,8 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
93 * ref if it was able to find one, or NULL if nothing was in that spot 93 * ref if it was able to find one, or NULL if nothing was in that spot
94 */ 94 */
95static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, 95static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
96 u64 bytenr, u64 parent) 96 u64 bytenr, u64 parent,
97 struct btrfs_delayed_ref_node **last)
97{ 98{
98 struct rb_node *n = root->rb_node; 99 struct rb_node *n = root->rb_node;
99 struct btrfs_delayed_ref_node *entry; 100 struct btrfs_delayed_ref_node *entry;
@@ -102,6 +103,8 @@ static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
102 while (n) { 103 while (n) {
103 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); 104 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
104 WARN_ON(!entry->in_tree); 105 WARN_ON(!entry->in_tree);
106 if (last)
107 *last = entry;
105 108
106 cmp = comp_entry(entry, bytenr, parent); 109 cmp = comp_entry(entry, bytenr, parent);
107 if (cmp < 0) 110 if (cmp < 0)
@@ -114,45 +117,99 @@ static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
114 return NULL; 117 return NULL;
115} 118}
116 119
117/* 120int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
118 * Locking on delayed refs is done by taking a lock on the head node, 121 struct btrfs_delayed_ref_head *head)
119 * which has the (impossible) parent id of (u64)-1. Once a lock is held
120 * on the head node, you're allowed (and required) to process all the
121 * delayed refs for a given byte number in the tree.
122 *
123 * This will walk forward in the rbtree until it finds a head node it
124 * is able to lock. It might not lock the delayed ref you asked for,
125 * and so it will return the one it did lock in next_ret and return 0.
126 *
127 * If no locks are taken, next_ret is set to null and 1 is returned. This
128 * means there are no more unlocked head nodes in the rbtree.
129 */
130int btrfs_lock_delayed_ref(struct btrfs_trans_handle *trans,
131 struct btrfs_delayed_ref_node *ref,
132 struct btrfs_delayed_ref_head **next_ret)
133{ 122{
123 struct btrfs_delayed_ref_root *delayed_refs;
124
125 delayed_refs = &trans->transaction->delayed_refs;
126 assert_spin_locked(&delayed_refs->lock);
127 if (mutex_trylock(&head->mutex))
128 return 0;
129
130 atomic_inc(&head->node.refs);
131 spin_unlock(&delayed_refs->lock);
132
133 mutex_lock(&head->mutex);
134 spin_lock(&delayed_refs->lock);
135 if (!head->node.in_tree) {
136 mutex_unlock(&head->mutex);
137 btrfs_put_delayed_ref(&head->node);
138 return -EAGAIN;
139 }
140 btrfs_put_delayed_ref(&head->node);
141 return 0;
142}
143
144int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
145 struct list_head *cluster, u64 start)
146{
147 int count = 0;
148 struct btrfs_delayed_ref_root *delayed_refs;
134 struct rb_node *node; 149 struct rb_node *node;
150 struct btrfs_delayed_ref_node *ref;
135 struct btrfs_delayed_ref_head *head; 151 struct btrfs_delayed_ref_head *head;
136 int ret = 0;
137 152
138 while (1) { 153 delayed_refs = &trans->transaction->delayed_refs;
154 if (start == 0) {
155 node = rb_first(&delayed_refs->root);
156 } else {
157 ref = NULL;
158 tree_search(&delayed_refs->root, start, (u64)-1, &ref);
159 if (ref) {
160 struct btrfs_delayed_ref_node *tmp;
161
162 node = rb_prev(&ref->rb_node);
163 while (node) {
164 tmp = rb_entry(node,
165 struct btrfs_delayed_ref_node,
166 rb_node);
167 if (tmp->bytenr < start)
168 break;
169 ref = tmp;
170 node = rb_prev(&ref->rb_node);
171 }
172 node = &ref->rb_node;
173 } else
174 node = rb_first(&delayed_refs->root);
175 }
176again:
177 while (node && count < 32) {
178 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
139 if (btrfs_delayed_ref_is_head(ref)) { 179 if (btrfs_delayed_ref_is_head(ref)) {
140 head = btrfs_delayed_node_to_head(ref); 180 head = btrfs_delayed_node_to_head(ref);
141 if (mutex_trylock(&head->mutex)) { 181 if (list_empty(&head->cluster)) {
142 *next_ret = head; 182 list_add_tail(&head->cluster, cluster);
143 ret = 0; 183 delayed_refs->run_delayed_start =
184 head->node.bytenr;
185 count++;
186
187 WARN_ON(delayed_refs->num_heads_ready == 0);
188 delayed_refs->num_heads_ready--;
189 } else if (count) {
190 /* the goal of the clustering is to find extents
191 * that are likely to end up in the same extent
192 * leaf on disk. So, we don't want them spread
193 * all over the tree. Stop now if we've hit
194 * a head that was already in use
195 */
144 break; 196 break;
145 } 197 }
146 } 198 }
147 node = rb_next(&ref->rb_node); 199 node = rb_next(node);
148 if (!node) {
149 ret = 1;
150 *next_ret = NULL;
151 break;
152 }
153 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
154 } 200 }
155 return ret; 201 if (count) {
202 return 0;
203 } else if (start) {
204 /*
205 * we've gone to the end of the rbtree without finding any
206 * clusters. start from the beginning and try again
207 */
208 start = 0;
209 node = rb_first(&delayed_refs->root);
210 goto again;
211 }
212 return 1;
156} 213}
157 214
158/* 215/*
@@ -178,7 +235,7 @@ int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
178 delayed_refs = &trans->transaction->delayed_refs; 235 delayed_refs = &trans->transaction->delayed_refs;
179 spin_lock(&delayed_refs->lock); 236 spin_lock(&delayed_refs->lock);
180 237
181 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1); 238 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
182 if (ref) { 239 if (ref) {
183 prev_node = rb_prev(&ref->rb_node); 240 prev_node = rb_prev(&ref->rb_node);
184 if (!prev_node) 241 if (!prev_node)
@@ -240,7 +297,7 @@ again:
240 } 297 }
241 298
242 spin_lock(&delayed_refs->lock); 299 spin_lock(&delayed_refs->lock);
243 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1); 300 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
244 if (ref) { 301 if (ref) {
245 head = btrfs_delayed_node_to_head(ref); 302 head = btrfs_delayed_node_to_head(ref);
246 if (mutex_trylock(&head->mutex)) { 303 if (mutex_trylock(&head->mutex)) {
@@ -384,7 +441,7 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
384{ 441{
385 struct btrfs_delayed_ref_node *existing; 442 struct btrfs_delayed_ref_node *existing;
386 struct btrfs_delayed_ref *full_ref; 443 struct btrfs_delayed_ref *full_ref;
387 struct btrfs_delayed_ref_head *head_ref; 444 struct btrfs_delayed_ref_head *head_ref = NULL;
388 struct btrfs_delayed_ref_root *delayed_refs; 445 struct btrfs_delayed_ref_root *delayed_refs;
389 int count_mod = 1; 446 int count_mod = 1;
390 int must_insert_reserved = 0; 447 int must_insert_reserved = 0;
@@ -428,6 +485,7 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
428 if (btrfs_delayed_ref_is_head(ref)) { 485 if (btrfs_delayed_ref_is_head(ref)) {
429 head_ref = btrfs_delayed_node_to_head(ref); 486 head_ref = btrfs_delayed_node_to_head(ref);
430 head_ref->must_insert_reserved = must_insert_reserved; 487 head_ref->must_insert_reserved = must_insert_reserved;
488 INIT_LIST_HEAD(&head_ref->cluster);
431 mutex_init(&head_ref->mutex); 489 mutex_init(&head_ref->mutex);
432 } else { 490 } else {
433 full_ref = btrfs_delayed_node_to_ref(ref); 491 full_ref = btrfs_delayed_node_to_ref(ref);
@@ -453,6 +511,10 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
453 */ 511 */
454 kfree(ref); 512 kfree(ref);
455 } else { 513 } else {
514 if (btrfs_delayed_ref_is_head(ref)) {
515 delayed_refs->num_heads++;
516 delayed_refs->num_heads_ready++;
517 }
456 delayed_refs->num_entries++; 518 delayed_refs->num_entries++;
457 trans->delayed_ref_updates++; 519 trans->delayed_ref_updates++;
458 } 520 }
@@ -522,7 +584,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
522 struct btrfs_delayed_ref_root *delayed_refs; 584 struct btrfs_delayed_ref_root *delayed_refs;
523 585
524 delayed_refs = &trans->transaction->delayed_refs; 586 delayed_refs = &trans->transaction->delayed_refs;
525 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1); 587 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
526 if (ref) 588 if (ref)
527 return btrfs_delayed_node_to_head(ref); 589 return btrfs_delayed_node_to_head(ref);
528 return NULL; 590 return NULL;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index c345fee9f96b..57153fcc347b 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -68,6 +68,8 @@ struct btrfs_delayed_ref_head {
68 */ 68 */
69 struct mutex mutex; 69 struct mutex mutex;
70 70
71 struct list_head cluster;
72
71 /* 73 /*
72 * when a new extent is allocated, it is just reserved in memory 74 * when a new extent is allocated, it is just reserved in memory
73 * The actual extent isn't inserted into the extent allocation tree 75 * The actual extent isn't inserted into the extent allocation tree
@@ -115,12 +117,20 @@ struct btrfs_delayed_ref_root {
115 */ 117 */
116 unsigned long num_entries; 118 unsigned long num_entries;
117 119
120 /* total number of head nodes in tree */
121 unsigned long num_heads;
122
123 /* total number of head nodes ready for processing */
124 unsigned long num_heads_ready;
125
118 /* 126 /*
119 * set when the tree is flushing before a transaction commit, 127 * set when the tree is flushing before a transaction commit,
120 * used by the throttling code to decide if new updates need 128 * used by the throttling code to decide if new updates need
121 * to be run right away 129 * to be run right away
122 */ 130 */
123 int flushing; 131 int flushing;
132
133 u64 run_delayed_start;
124}; 134};
125 135
126static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) 136static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
@@ -140,9 +150,6 @@ int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
140struct btrfs_delayed_ref_head * 150struct btrfs_delayed_ref_head *
141btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); 151btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
142int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); 152int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
143int btrfs_lock_delayed_ref(struct btrfs_trans_handle *trans,
144 struct btrfs_delayed_ref_node *ref,
145 struct btrfs_delayed_ref_head **next_ret);
146int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 153int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
147 struct btrfs_root *root, u64 bytenr, 154 struct btrfs_root *root, u64 bytenr,
148 u64 num_bytes, u32 *refs); 155 u64 num_bytes, u32 *refs);
@@ -151,6 +158,10 @@ int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
151 u64 parent, u64 orig_ref_root, u64 ref_root, 158 u64 parent, u64 orig_ref_root, u64 ref_root,
152 u64 orig_ref_generation, u64 ref_generation, 159 u64 orig_ref_generation, u64 ref_generation,
153 u64 owner_objectid, int pin); 160 u64 owner_objectid, int pin);
161int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
162 struct btrfs_delayed_ref_head *head);
163int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
164 struct list_head *cluster, u64 search_start);
154/* 165/*
155 * a node might live in a head or a regular ref, this lets you 166 * a node might live in a head or a regular ref, this lets you
156 * test for the proper type to use. 167 * test for the proper type to use.
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4f43e227a297..1f1d89b18818 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1458,7 +1458,6 @@ static int transaction_kthread(void *arg)
1458 struct btrfs_root *root = arg; 1458 struct btrfs_root *root = arg;
1459 struct btrfs_trans_handle *trans; 1459 struct btrfs_trans_handle *trans;
1460 struct btrfs_transaction *cur; 1460 struct btrfs_transaction *cur;
1461 struct btrfs_fs_info *info = root->fs_info;
1462 unsigned long now; 1461 unsigned long now;
1463 unsigned long delay; 1462 unsigned long delay;
1464 int ret; 1463 int ret;
@@ -1481,24 +1480,8 @@ static int transaction_kthread(void *arg)
1481 1480
1482 now = get_seconds(); 1481 now = get_seconds();
1483 if (now < cur->start_time || now - cur->start_time < 30) { 1482 if (now < cur->start_time || now - cur->start_time < 30) {
1484 unsigned long num_delayed;
1485 num_delayed = cur->delayed_refs.num_entries;
1486 mutex_unlock(&root->fs_info->trans_mutex); 1483 mutex_unlock(&root->fs_info->trans_mutex);
1487 delay = HZ * 5; 1484 delay = HZ * 5;
1488
1489 /*
1490 * we may have been woken up early to start
1491 * processing the delayed extent ref updates
1492 * If so, run some of them and then loop around again
1493 * to see if we need to force a commit
1494 */
1495 if (num_delayed > 64) {
1496 mutex_unlock(&info->transaction_kthread_mutex);
1497 trans = btrfs_start_transaction(root, 1);
1498 btrfs_run_delayed_refs(trans, root, 256);
1499 btrfs_end_transaction(trans, root);
1500 continue;
1501 }
1502 goto sleep; 1485 goto sleep;
1503 } 1486 }
1504 mutex_unlock(&root->fs_info->trans_mutex); 1487 mutex_unlock(&root->fs_info->trans_mutex);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8471c79b0877..3b8b6c212701 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -926,52 +926,41 @@ again:
926 return NULL; 926 return NULL;
927} 927}
928 928
929/* 929static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
930 * this starts processing the delayed reference count updates and 930 struct btrfs_root *root,
931 * extent insertions we have queued up so far. count can be 931 struct list_head *cluster)
932 * 0, which means to process everything in the tree at the start
933 * of the run (but not newly added entries), or it can be some target
934 * number you'd like to process.
935 */
936int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
937 struct btrfs_root *root, unsigned long count)
938{ 932{
939 struct rb_node *node;
940 struct btrfs_delayed_ref_root *delayed_refs; 933 struct btrfs_delayed_ref_root *delayed_refs;
941 struct btrfs_delayed_ref_node *ref; 934 struct btrfs_delayed_ref_node *ref;
942 struct btrfs_delayed_ref_head *locked_ref = NULL; 935 struct btrfs_delayed_ref_head *locked_ref = NULL;
943 int ret; 936 int ret;
937 int count = 0;
944 int must_insert_reserved = 0; 938 int must_insert_reserved = 0;
945 int run_all = count == (unsigned long)-1;
946
947 if (root == root->fs_info->extent_root)
948 root = root->fs_info->tree_root;
949 939
950 delayed_refs = &trans->transaction->delayed_refs; 940 delayed_refs = &trans->transaction->delayed_refs;
951again:
952 spin_lock(&delayed_refs->lock);
953 if (count == 0)
954 count = delayed_refs->num_entries;
955 while (1) { 941 while (1) {
956 if (!locked_ref) { 942 if (!locked_ref) {
957 /* 943 /* pick a new head ref from the cluster list */
958 * no locked ref, go find something we can 944 if (list_empty(cluster))
959 * process in the rbtree. We start at
960 * the beginning of the tree, there may be less
961 * lock contention if we do something smarter here.
962 */
963 node = rb_first(&delayed_refs->root);
964 if (!node) {
965 spin_unlock(&delayed_refs->lock);
966 break; 945 break;
967 }
968 946
969 ref = rb_entry(node, struct btrfs_delayed_ref_node, 947 locked_ref = list_entry(cluster->next,
970 rb_node); 948 struct btrfs_delayed_ref_head, cluster);
971 ret = btrfs_lock_delayed_ref(trans, ref, &locked_ref); 949
972 if (ret) { 950 /* grab the lock that says we are going to process
973 spin_unlock(&delayed_refs->lock); 951 * all the refs for this head */
974 break; 952 ret = btrfs_delayed_ref_lock(trans, locked_ref);
953
954 /*
955 * we may have dropped the spin lock to get the head
956 * mutex lock, and that might have given someone else
957 * time to free the head. If that's true, it has been
958 * removed from our list and we can move on.
959 */
960 if (ret == -EAGAIN) {
961 locked_ref = NULL;
962 count++;
963 continue;
975 } 964 }
976 } 965 }
977 966
@@ -986,7 +975,6 @@ again:
986 * locked_ref is the head node, so we have to go one 975 * locked_ref is the head node, so we have to go one
987 * node back for any delayed ref updates 976 * node back for any delayed ref updates
988 */ 977 */
989
990 ref = select_delayed_ref(locked_ref); 978 ref = select_delayed_ref(locked_ref);
991 if (!ref) { 979 if (!ref) {
992 /* All delayed refs have been processed, Go ahead 980 /* All delayed refs have been processed, Go ahead
@@ -994,6 +982,7 @@ again:
994 * so that any accounting fixes can happen 982 * so that any accounting fixes can happen
995 */ 983 */
996 ref = &locked_ref->node; 984 ref = &locked_ref->node;
985 list_del_init(&locked_ref->cluster);
997 locked_ref = NULL; 986 locked_ref = NULL;
998 } 987 }
999 988
@@ -1007,30 +996,72 @@ again:
1007 BUG_ON(ret); 996 BUG_ON(ret);
1008 btrfs_put_delayed_ref(ref); 997 btrfs_put_delayed_ref(ref);
1009 998
1010 /* once we lock the head ref, we have to process all the 999 count++;
1011 * entries for it. So, we might end up doing more entries 1000 cond_resched();
1012 * that count was asking us to do. 1001 spin_lock(&delayed_refs->lock);
1013 */ 1002 }
1014 if (count > 0) 1003 return count;
1015 count--; 1004}
1005
1006/*
1007 * this starts processing the delayed reference count updates and
1008 * extent insertions we have queued up so far. count can be
1009 * 0, which means to process everything in the tree at the start
1010 * of the run (but not newly added entries), or it can be some target
1011 * number you'd like to process.
1012 */
1013int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1014 struct btrfs_root *root, unsigned long count)
1015{
1016 struct rb_node *node;
1017 struct btrfs_delayed_ref_root *delayed_refs;
1018 struct btrfs_delayed_ref_node *ref;
1019 struct list_head cluster;
1020 int ret;
1021 int run_all = count == (unsigned long)-1;
1022 int run_most = 0;
1023
1024 if (root == root->fs_info->extent_root)
1025 root = root->fs_info->tree_root;
1026
1027 delayed_refs = &trans->transaction->delayed_refs;
1028 INIT_LIST_HEAD(&cluster);
1029again:
1030 spin_lock(&delayed_refs->lock);
1031 if (count == 0) {
1032 count = delayed_refs->num_entries * 2;
1033 run_most = 1;
1034 }
1035 while (1) {
1036 if (!(run_all || run_most) &&
1037 delayed_refs->num_heads_ready < 64)
1038 break;
1016 1039
1017 /* 1040 /*
1018 * we set locked_ref to null above if we're all done 1041 * go find something we can process in the rbtree. We start at
1019 * with this bytenr 1042 * the beginning of the tree, and then build a cluster
1043 * of refs to process starting at the first one we are able to
1044 * lock
1020 */ 1045 */
1021 if (!locked_ref && count == 0) 1046 ret = btrfs_find_ref_cluster(trans, &cluster,
1047 delayed_refs->run_delayed_start);
1048 if (ret)
1022 break; 1049 break;
1023 1050
1024 cond_resched(); 1051 ret = run_clustered_refs(trans, root, &cluster);
1025 spin_lock(&delayed_refs->lock); 1052 BUG_ON(ret < 0);
1053
1054 count -= min_t(unsigned long, ret, count);
1055
1056 if (count == 0)
1057 break;
1026 } 1058 }
1059
1027 if (run_all) { 1060 if (run_all) {
1028 spin_lock(&delayed_refs->lock);
1029 node = rb_first(&delayed_refs->root); 1061 node = rb_first(&delayed_refs->root);
1030 if (!node) { 1062 if (!node)
1031 spin_unlock(&delayed_refs->lock);
1032 goto out; 1063 goto out;
1033 } 1064 count = (unsigned long)-1;
1034 1065
1035 while (node) { 1066 while (node) {
1036 ref = rb_entry(node, struct btrfs_delayed_ref_node, 1067 ref = rb_entry(node, struct btrfs_delayed_ref_node,
@@ -1052,11 +1083,11 @@ again:
1052 node = rb_next(node); 1083 node = rb_next(node);
1053 } 1084 }
1054 spin_unlock(&delayed_refs->lock); 1085 spin_unlock(&delayed_refs->lock);
1055 count = (unsigned long)-1;
1056 schedule_timeout(1); 1086 schedule_timeout(1);
1057 goto again; 1087 goto again;
1058 } 1088 }
1059out: 1089out:
1090 spin_unlock(&delayed_refs->lock);
1060 return 0; 1091 return 0;
1061} 1092}
1062 1093
@@ -2407,12 +2438,18 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
2407 */ 2438 */
2408 head->node.in_tree = 0; 2439 head->node.in_tree = 0;
2409 rb_erase(&head->node.rb_node, &delayed_refs->root); 2440 rb_erase(&head->node.rb_node, &delayed_refs->root);
2441
2410 delayed_refs->num_entries--; 2442 delayed_refs->num_entries--;
2411 2443
2412 /* 2444 /*
2413 * we don't take a ref on the node because we're removing it from the 2445 * we don't take a ref on the node because we're removing it from the
2414 * tree, so we just steal the ref the tree was holding. 2446 * tree, so we just steal the ref the tree was holding.
2415 */ 2447 */
2448 delayed_refs->num_heads--;
2449 if (list_empty(&head->cluster))
2450 delayed_refs->num_heads_ready--;
2451
2452 list_del_init(&head->cluster);
2416 spin_unlock(&delayed_refs->lock); 2453 spin_unlock(&delayed_refs->lock);
2417 2454
2418 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 2455 ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
@@ -3705,6 +3742,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3705 struct btrfs_path *path; 3742 struct btrfs_path *path;
3706 int i; 3743 int i;
3707 int orig_level; 3744 int orig_level;
3745 int update_count;
3708 struct btrfs_root_item *root_item = &root->root_item; 3746 struct btrfs_root_item *root_item = &root->root_item;
3709 3747
3710 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); 3748 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
@@ -3746,6 +3784,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3746 } 3784 }
3747 } 3785 }
3748 while (1) { 3786 while (1) {
3787 unsigned long update;
3749 wret = walk_down_tree(trans, root, path, &level); 3788 wret = walk_down_tree(trans, root, path, &level);
3750 if (wret > 0) 3789 if (wret > 0)
3751 break; 3790 break;
@@ -3764,6 +3803,14 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3764 } 3803 }
3765 atomic_inc(&root->fs_info->throttle_gen); 3804 atomic_inc(&root->fs_info->throttle_gen);
3766 wake_up(&root->fs_info->transaction_throttle); 3805 wake_up(&root->fs_info->transaction_throttle);
3806 for (update_count = 0; update_count < 16; update_count++) {
3807 update = trans->delayed_ref_updates;
3808 trans->delayed_ref_updates = 0;
3809 if (update)
3810 btrfs_run_delayed_refs(trans, root, update);
3811 else
3812 break;
3813 }
3767 } 3814 }
3768 for (i = 0; i <= orig_level; i++) { 3815 for (i = 0; i <= orig_level; i++) {
3769 if (path->nodes[i]) { 3816 if (path->nodes[i]) {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f94c2ad8996c..903edab3659a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -68,7 +68,10 @@ static noinline int join_transaction(struct btrfs_root *root)
68 68
69 cur_trans->delayed_refs.root.rb_node = NULL; 69 cur_trans->delayed_refs.root.rb_node = NULL;
70 cur_trans->delayed_refs.num_entries = 0; 70 cur_trans->delayed_refs.num_entries = 0;
71 cur_trans->delayed_refs.num_heads_ready = 0;
72 cur_trans->delayed_refs.num_heads = 0;
71 cur_trans->delayed_refs.flushing = 0; 73 cur_trans->delayed_refs.flushing = 0;
74 cur_trans->delayed_refs.run_delayed_start = 0;
72 spin_lock_init(&cur_trans->delayed_refs.lock); 75 spin_lock_init(&cur_trans->delayed_refs.lock);
73 76
74 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 77 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
@@ -287,13 +290,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
287{ 290{
288 struct btrfs_transaction *cur_trans; 291 struct btrfs_transaction *cur_trans;
289 struct btrfs_fs_info *info = root->fs_info; 292 struct btrfs_fs_info *info = root->fs_info;
290 293 int count = 0;
291 if (trans->delayed_ref_updates && 294
292 (trans->transaction->delayed_refs.flushing || 295 while (count < 4) {
293 trans->transaction->delayed_refs.num_entries > 16384)) { 296 unsigned long cur = trans->delayed_ref_updates;
294 btrfs_run_delayed_refs(trans, root, trans->delayed_ref_updates); 297 trans->delayed_ref_updates = 0;
295 } else if (trans->transaction->delayed_refs.num_entries > 64) { 298 if (cur &&
296 wake_up_process(root->fs_info->transaction_kthread); 299 trans->transaction->delayed_refs.num_heads_ready > 64) {
300 trans->delayed_ref_updates = 0;
301 btrfs_run_delayed_refs(trans, root, cur);
302 } else {
303 break;
304 }
305 count++;
297 } 306 }
298 307
299 mutex_lock(&info->trans_mutex); 308 mutex_lock(&info->trans_mutex);
@@ -929,7 +938,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
929 */ 938 */
930 trans->transaction->delayed_refs.flushing = 1; 939 trans->transaction->delayed_refs.flushing = 1;
931 940
932 ret = btrfs_run_delayed_refs(trans, root, (u64)-1); 941 ret = btrfs_run_delayed_refs(trans, root, 0);
933 BUG_ON(ret); 942 BUG_ON(ret);
934 943
935 INIT_LIST_HEAD(&dirty_fs_roots); 944 INIT_LIST_HEAD(&dirty_fs_roots);