aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLiu Bo <bo.li.liu@oracle.com>2013-10-14 00:59:45 -0400
committerChris Mason <clm@fb.com>2014-01-28 16:19:22 -0500
commitc46effa601f869f3d20a7386a745d9c002838eb8 (patch)
tree7aa114c1a78e1834950a34524c8ada82569af50b
parente20d6c5ba38d066c7dc0f7d3b68da14b9ae7fe37 (diff)
Btrfs: introduce a head ref rbtree
The way how we process delayed refs is 1) get a bunch of head refs, 2) pick up one head ref, 3) go one node back for any delayed ref updates. The head ref is also linked in the same rbtree as the delayed ref is, so in 1) stage, we have to walk one by one including not only head refs, but delayed refs. When we have a great number of delayed refs pending to process, this'll cost time a lot. Here we introduce a head ref specific rbtree, it only has head refs, so troubles go away. Signed-off-by: Liu Bo <bo.li.liu@oracle.com> Signed-off-by: Josef Bacik <jbacik@fusionio.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/delayed-ref.c126
-rw-r--r--fs/btrfs/delayed-ref.h5
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/extent-tree.c21
-rw-r--r--fs/btrfs/transaction.c4
5 files changed, 99 insertions, 60 deletions
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index e4d467be2dd4..9bbac6ddf415 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -161,35 +161,61 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
161 return NULL; 161 return NULL;
162} 162}
163 163
164/* insert a new ref to head ref rbtree */
165static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
166 struct rb_node *node)
167{
168 struct rb_node **p = &root->rb_node;
169 struct rb_node *parent_node = NULL;
170 struct btrfs_delayed_ref_head *entry;
171 struct btrfs_delayed_ref_head *ins;
172 u64 bytenr;
173
174 ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
175 bytenr = ins->node.bytenr;
176 while (*p) {
177 parent_node = *p;
178 entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
179 href_node);
180
181 if (bytenr < entry->node.bytenr)
182 p = &(*p)->rb_left;
183 else if (bytenr > entry->node.bytenr)
184 p = &(*p)->rb_right;
185 else
186 return entry;
187 }
188
189 rb_link_node(node, parent_node, p);
190 rb_insert_color(node, root);
191 return NULL;
192}
193
164/* 194/*
165 * find an head entry based on bytenr. This returns the delayed ref 195 * find an head entry based on bytenr. This returns the delayed ref
166 * head if it was able to find one, or NULL if nothing was in that spot. 196 * head if it was able to find one, or NULL if nothing was in that spot.
167 * If return_bigger is given, the next bigger entry is returned if no exact 197 * If return_bigger is given, the next bigger entry is returned if no exact
168 * match is found. 198 * match is found.
169 */ 199 */
170static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root, 200static struct btrfs_delayed_ref_head *
171 u64 bytenr, 201find_ref_head(struct rb_root *root, u64 bytenr,
172 struct btrfs_delayed_ref_node **last, 202 struct btrfs_delayed_ref_head **last, int return_bigger)
173 int return_bigger)
174{ 203{
175 struct rb_node *n; 204 struct rb_node *n;
176 struct btrfs_delayed_ref_node *entry; 205 struct btrfs_delayed_ref_head *entry;
177 int cmp = 0; 206 int cmp = 0;
178 207
179again: 208again:
180 n = root->rb_node; 209 n = root->rb_node;
181 entry = NULL; 210 entry = NULL;
182 while (n) { 211 while (n) {
183 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); 212 entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
184 WARN_ON(!entry->in_tree);
185 if (last) 213 if (last)
186 *last = entry; 214 *last = entry;
187 215
188 if (bytenr < entry->bytenr) 216 if (bytenr < entry->node.bytenr)
189 cmp = -1; 217 cmp = -1;
190 else if (bytenr > entry->bytenr) 218 else if (bytenr > entry->node.bytenr)
191 cmp = 1;
192 else if (!btrfs_delayed_ref_is_head(entry))
193 cmp = 1; 219 cmp = 1;
194 else 220 else
195 cmp = 0; 221 cmp = 0;
@@ -203,12 +229,12 @@ again:
203 } 229 }
204 if (entry && return_bigger) { 230 if (entry && return_bigger) {
205 if (cmp > 0) { 231 if (cmp > 0) {
206 n = rb_next(&entry->rb_node); 232 n = rb_next(&entry->href_node);
207 if (!n) 233 if (!n)
208 n = rb_first(root); 234 n = rb_first(root);
209 entry = rb_entry(n, struct btrfs_delayed_ref_node, 235 entry = rb_entry(n, struct btrfs_delayed_ref_head,
210 rb_node); 236 href_node);
211 bytenr = entry->bytenr; 237 bytenr = entry->node.bytenr;
212 return_bigger = 0; 238 return_bigger = 0;
213 goto again; 239 goto again;
214 } 240 }
@@ -246,6 +272,12 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
246 struct btrfs_delayed_ref_node *ref) 272 struct btrfs_delayed_ref_node *ref)
247{ 273{
248 rb_erase(&ref->rb_node, &delayed_refs->root); 274 rb_erase(&ref->rb_node, &delayed_refs->root);
275 if (btrfs_delayed_ref_is_head(ref)) {
276 struct btrfs_delayed_ref_head *head;
277
278 head = btrfs_delayed_node_to_head(ref);
279 rb_erase(&head->href_node, &delayed_refs->href_root);
280 }
249 ref->in_tree = 0; 281 ref->in_tree = 0;
250 btrfs_put_delayed_ref(ref); 282 btrfs_put_delayed_ref(ref);
251 delayed_refs->num_entries--; 283 delayed_refs->num_entries--;
@@ -379,42 +411,35 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
379 int count = 0; 411 int count = 0;
380 struct btrfs_delayed_ref_root *delayed_refs; 412 struct btrfs_delayed_ref_root *delayed_refs;
381 struct rb_node *node; 413 struct rb_node *node;
382 struct btrfs_delayed_ref_node *ref; 414 struct btrfs_delayed_ref_head *head = NULL;
383 struct btrfs_delayed_ref_head *head;
384 415
385 delayed_refs = &trans->transaction->delayed_refs; 416 delayed_refs = &trans->transaction->delayed_refs;
386 if (start == 0) { 417 node = rb_first(&delayed_refs->href_root);
387 node = rb_first(&delayed_refs->root); 418
388 } else { 419 if (start) {
389 ref = NULL; 420 find_ref_head(&delayed_refs->href_root, start + 1, &head, 1);
390 find_ref_head(&delayed_refs->root, start + 1, &ref, 1); 421 if (head)
391 if (ref) { 422 node = &head->href_node;
392 node = &ref->rb_node;
393 } else
394 node = rb_first(&delayed_refs->root);
395 } 423 }
396again: 424again:
397 while (node && count < 32) { 425 while (node && count < 32) {
398 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 426 head = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
399 if (btrfs_delayed_ref_is_head(ref)) { 427 if (list_empty(&head->cluster)) {
400 head = btrfs_delayed_node_to_head(ref); 428 list_add_tail(&head->cluster, cluster);
401 if (list_empty(&head->cluster)) { 429 delayed_refs->run_delayed_start =
402 list_add_tail(&head->cluster, cluster); 430 head->node.bytenr;
403 delayed_refs->run_delayed_start = 431 count++;
404 head->node.bytenr; 432
405 count++; 433 WARN_ON(delayed_refs->num_heads_ready == 0);
406 434 delayed_refs->num_heads_ready--;
407 WARN_ON(delayed_refs->num_heads_ready == 0); 435 } else if (count) {
408 delayed_refs->num_heads_ready--; 436 /* the goal of the clustering is to find extents
409 } else if (count) { 437 * that are likely to end up in the same extent
410 /* the goal of the clustering is to find extents 438 * leaf on disk. So, we don't want them spread
411 * that are likely to end up in the same extent 439 * all over the tree. Stop now if we've hit
412 * leaf on disk. So, we don't want them spread 440 * a head that was already in use
413 * all over the tree. Stop now if we've hit 441 */
414 * a head that was already in use 442 break;
415 */
416 break;
417 }
418 } 443 }
419 node = rb_next(node); 444 node = rb_next(node);
420 } 445 }
@@ -426,7 +451,7 @@ again:
426 * clusters. start from the beginning and try again 451 * clusters. start from the beginning and try again
427 */ 452 */
428 start = 0; 453 start = 0;
429 node = rb_first(&delayed_refs->root); 454 node = rb_first(&delayed_refs->href_root);
430 goto again; 455 goto again;
431 } 456 }
432 return 1; 457 return 1;
@@ -612,6 +637,7 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
612 */ 637 */
613 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 638 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
614 } else { 639 } else {
640 htree_insert(&delayed_refs->href_root, &head_ref->href_node);
615 delayed_refs->num_heads++; 641 delayed_refs->num_heads++;
616 delayed_refs->num_heads_ready++; 642 delayed_refs->num_heads_ready++;
617 delayed_refs->num_entries++; 643 delayed_refs->num_entries++;
@@ -869,14 +895,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
869struct btrfs_delayed_ref_head * 895struct btrfs_delayed_ref_head *
870btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) 896btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
871{ 897{
872 struct btrfs_delayed_ref_node *ref;
873 struct btrfs_delayed_ref_root *delayed_refs; 898 struct btrfs_delayed_ref_root *delayed_refs;
874 899
875 delayed_refs = &trans->transaction->delayed_refs; 900 delayed_refs = &trans->transaction->delayed_refs;
876 ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0); 901 return find_ref_head(&delayed_refs->href_root, bytenr, NULL, 0);
877 if (ref)
878 return btrfs_delayed_node_to_head(ref);
879 return NULL;
880} 902}
881 903
882void btrfs_delayed_ref_exit(void) 904void btrfs_delayed_ref_exit(void)
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 70b962cc177d..a54c9d47918f 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -83,6 +83,8 @@ struct btrfs_delayed_ref_head {
83 83
84 struct list_head cluster; 84 struct list_head cluster;
85 85
86 struct rb_node href_node;
87
86 struct btrfs_delayed_extent_op *extent_op; 88 struct btrfs_delayed_extent_op *extent_op;
87 /* 89 /*
88 * when a new extent is allocated, it is just reserved in memory 90 * when a new extent is allocated, it is just reserved in memory
@@ -118,6 +120,9 @@ struct btrfs_delayed_data_ref {
118struct btrfs_delayed_ref_root { 120struct btrfs_delayed_ref_root {
119 struct rb_root root; 121 struct rb_root root;
120 122
123 /* head ref rbtree */
124 struct rb_root href_root;
125
121 /* this spin lock protects the rbtree and the entries inside */ 126 /* this spin lock protects the rbtree and the entries inside */
122 spinlock_t lock; 127 spinlock_t lock;
123 128
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8072cfa8a3b1..435ef132b800 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3842,6 +3842,9 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3842 3842
3843 ref->in_tree = 0; 3843 ref->in_tree = 0;
3844 rb_erase(&ref->rb_node, &delayed_refs->root); 3844 rb_erase(&ref->rb_node, &delayed_refs->root);
3845 if (head)
3846 rb_erase(&head->href_node, &delayed_refs->href_root);
3847
3845 delayed_refs->num_entries--; 3848 delayed_refs->num_entries--;
3846 spin_unlock(&delayed_refs->lock); 3849 spin_unlock(&delayed_refs->lock);
3847 if (head) { 3850 if (head) {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9c01509dd8ab..d15b4fc07554 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2438,6 +2438,10 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2438 2438
2439 ref->in_tree = 0; 2439 ref->in_tree = 0;
2440 rb_erase(&ref->rb_node, &delayed_refs->root); 2440 rb_erase(&ref->rb_node, &delayed_refs->root);
2441 if (btrfs_delayed_ref_is_head(ref)) {
2442 rb_erase(&locked_ref->href_node,
2443 &delayed_refs->href_root);
2444 }
2441 delayed_refs->num_entries--; 2445 delayed_refs->num_entries--;
2442 if (!btrfs_delayed_ref_is_head(ref)) { 2446 if (!btrfs_delayed_ref_is_head(ref)) {
2443 /* 2447 /*
@@ -2640,7 +2644,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2640{ 2644{
2641 struct rb_node *node; 2645 struct rb_node *node;
2642 struct btrfs_delayed_ref_root *delayed_refs; 2646 struct btrfs_delayed_ref_root *delayed_refs;
2643 struct btrfs_delayed_ref_node *ref; 2647 struct btrfs_delayed_ref_head *head;
2644 struct list_head cluster; 2648 struct list_head cluster;
2645 int ret; 2649 int ret;
2646 u64 delayed_start; 2650 u64 delayed_start;
@@ -2770,18 +2774,18 @@ again:
2770 spin_lock(&delayed_refs->lock); 2774 spin_lock(&delayed_refs->lock);
2771 } 2775 }
2772 2776
2773 node = rb_first(&delayed_refs->root); 2777 node = rb_first(&delayed_refs->href_root);
2774 if (!node) 2778 if (!node)
2775 goto out; 2779 goto out;
2776 count = (unsigned long)-1; 2780 count = (unsigned long)-1;
2777 2781
2778 while (node) { 2782 while (node) {
2779 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2783 head = rb_entry(node, struct btrfs_delayed_ref_head,
2780 rb_node); 2784 href_node);
2781 if (btrfs_delayed_ref_is_head(ref)) { 2785 if (btrfs_delayed_ref_is_head(&head->node)) {
2782 struct btrfs_delayed_ref_head *head; 2786 struct btrfs_delayed_ref_node *ref;
2783 2787
2784 head = btrfs_delayed_node_to_head(ref); 2788 ref = &head->node;
2785 atomic_inc(&ref->refs); 2789 atomic_inc(&ref->refs);
2786 2790
2787 spin_unlock(&delayed_refs->lock); 2791 spin_unlock(&delayed_refs->lock);
@@ -2795,6 +2799,8 @@ again:
2795 btrfs_put_delayed_ref(ref); 2799 btrfs_put_delayed_ref(ref);
2796 cond_resched(); 2800 cond_resched();
2797 goto again; 2801 goto again;
2802 } else {
2803 WARN_ON(1);
2798 } 2804 }
2799 node = rb_next(node); 2805 node = rb_next(node);
2800 } 2806 }
@@ -5956,6 +5962,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5956 */ 5962 */
5957 head->node.in_tree = 0; 5963 head->node.in_tree = 0;
5958 rb_erase(&head->node.rb_node, &delayed_refs->root); 5964 rb_erase(&head->node.rb_node, &delayed_refs->root);
5965 rb_erase(&head->href_node, &delayed_refs->href_root);
5959 5966
5960 delayed_refs->num_entries--; 5967 delayed_refs->num_entries--;
5961 5968
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c6a872a8a468..14516375777e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -62,7 +62,8 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
62 WARN_ON(atomic_read(&transaction->use_count) == 0); 62 WARN_ON(atomic_read(&transaction->use_count) == 0);
63 if (atomic_dec_and_test(&transaction->use_count)) { 63 if (atomic_dec_and_test(&transaction->use_count)) {
64 BUG_ON(!list_empty(&transaction->list)); 64 BUG_ON(!list_empty(&transaction->list));
65 WARN_ON(transaction->delayed_refs.root.rb_node); 65 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.root));
66 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
66 while (!list_empty(&transaction->pending_chunks)) { 67 while (!list_empty(&transaction->pending_chunks)) {
67 struct extent_map *em; 68 struct extent_map *em;
68 69
@@ -184,6 +185,7 @@ loop:
184 cur_trans->start_time = get_seconds(); 185 cur_trans->start_time = get_seconds();
185 186
186 cur_trans->delayed_refs.root = RB_ROOT; 187 cur_trans->delayed_refs.root = RB_ROOT;
188 cur_trans->delayed_refs.href_root = RB_ROOT;
187 cur_trans->delayed_refs.num_entries = 0; 189 cur_trans->delayed_refs.num_entries = 0;
188 cur_trans->delayed_refs.num_heads_ready = 0; 190 cur_trans->delayed_refs.num_heads_ready = 0;
189 cur_trans->delayed_refs.num_heads = 0; 191 cur_trans->delayed_refs.num_heads = 0;