summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorEdmund Nadolski <enadolski@suse.com>2017-07-12 18:20:06 -0400
committerDavid Sterba <dsterba@suse.com>2017-08-16 10:11:55 -0400
commit86d5f994425252d8a40e2184c94a2682ae8ecfbf (patch)
tree4b6014e8e13e66a102745af0d9ef761f8a4e4105 /fs
parentf6954245d9e17902a66a1253d2a3afc05e335172 (diff)
btrfs: convert prelimary reference tracking to use rbtrees
It's been known for a while that the use of multiple lists that are periodically merged was an algorithmic problem within btrfs. There are several workloads that don't complete in any reasonable amount of time (e.g. btrfs/130) and others that cause soft lockups. The solution is to use a set of rbtrees that do insertion merging for both indirect and direct refs, with the former converting refs into the latter. The result is a btrfs/130 workload that used to take several hours now takes about half of that. This runtime still isn't acceptable and a future patch will address that by moving the rbtrees higher in the stack so the lookups can be shared across multiple calls to find_parent_nodes. Signed-off-by: Edmund Nadolski <enadolski@suse.com> Signed-off-by: Jeff Mahoney <jeffm@suse.com> Reviewed-by: Liu Bo <bo.li.liu@oracle.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c442
1 files changed, 285 insertions, 157 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 6cac5ab8d5e0..baf907adede1 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -26,11 +26,6 @@
26#include "delayed-ref.h" 26#include "delayed-ref.h"
27#include "locking.h" 27#include "locking.h"
28 28
29enum merge_mode {
30 MERGE_IDENTICAL_KEYS = 1,
31 MERGE_IDENTICAL_PARENTS,
32};
33
34/* Just an arbitrary number so we can be sure this happened */ 29/* Just an arbitrary number so we can be sure this happened */
35#define BACKREF_FOUND_SHARED 6 30#define BACKREF_FOUND_SHARED 6
36 31
@@ -129,7 +124,7 @@ static int find_extent_in_eb(const struct extent_buffer *eb,
129 * this structure records all encountered refs on the way up to the root 124 * this structure records all encountered refs on the way up to the root
130 */ 125 */
131struct prelim_ref { 126struct prelim_ref {
132 struct list_head list; 127 struct rb_node rbnode;
133 u64 root_id; 128 u64 root_id;
134 struct btrfs_key key_for_search; 129 struct btrfs_key key_for_search;
135 int level; 130 int level;
@@ -139,6 +134,18 @@ struct prelim_ref {
139 u64 wanted_disk_byte; 134 u64 wanted_disk_byte;
140}; 135};
141 136
137struct preftree {
138 struct rb_root root;
139};
140
141#define PREFTREE_INIT { .root = RB_ROOT }
142
143struct preftrees {
144 struct preftree direct; /* BTRFS_SHARED_[DATA|BLOCK]_REF_KEY */
145 struct preftree indirect; /* BTRFS_[TREE_BLOCK|EXTENT_DATA]_REF_KEY */
146 struct preftree indirect_missing_keys;
147};
148
142static struct kmem_cache *btrfs_prelim_ref_cache; 149static struct kmem_cache *btrfs_prelim_ref_cache;
143 150
144int __init btrfs_prelim_ref_init(void) 151int __init btrfs_prelim_ref_init(void)
@@ -158,6 +165,108 @@ void btrfs_prelim_ref_exit(void)
158 kmem_cache_destroy(btrfs_prelim_ref_cache); 165 kmem_cache_destroy(btrfs_prelim_ref_cache);
159} 166}
160 167
168static void free_pref(struct prelim_ref *ref)
169{
170 kmem_cache_free(btrfs_prelim_ref_cache, ref);
171}
172
173/*
174 * Return 0 when both refs are for the same block (and can be merged).
175 * A -1 return indicates ref1 is a 'lower' block than ref2, while 1
176 * indicates a 'higher' block.
177 */
178static int prelim_ref_compare(struct prelim_ref *ref1,
179 struct prelim_ref *ref2)
180{
181 if (ref1->level < ref2->level)
182 return -1;
183 if (ref1->level > ref2->level)
184 return 1;
185 if (ref1->root_id < ref2->root_id)
186 return -1;
187 if (ref1->root_id > ref2->root_id)
188 return 1;
189 if (ref1->key_for_search.type < ref2->key_for_search.type)
190 return -1;
191 if (ref1->key_for_search.type > ref2->key_for_search.type)
192 return 1;
193 if (ref1->key_for_search.objectid < ref2->key_for_search.objectid)
194 return -1;
195 if (ref1->key_for_search.objectid > ref2->key_for_search.objectid)
196 return 1;
197 if (ref1->key_for_search.offset < ref2->key_for_search.offset)
198 return -1;
199 if (ref1->key_for_search.offset > ref2->key_for_search.offset)
200 return 1;
201 if (ref1->parent < ref2->parent)
202 return -1;
203 if (ref1->parent > ref2->parent)
204 return 1;
205
206 return 0;
207}
208
209/*
210 * Add @newref to the @root rbtree, merging identical refs.
211 *
212 * Callers should assumed that newref has been freed after calling.
213 */
214static void prelim_ref_insert(struct preftree *preftree,
215 struct prelim_ref *newref)
216{
217 struct rb_root *root;
218 struct rb_node **p;
219 struct rb_node *parent = NULL;
220 struct prelim_ref *ref;
221 int result;
222
223 root = &preftree->root;
224 p = &root->rb_node;
225
226 while (*p) {
227 parent = *p;
228 ref = rb_entry(parent, struct prelim_ref, rbnode);
229 result = prelim_ref_compare(ref, newref);
230 if (result < 0) {
231 p = &(*p)->rb_left;
232 } else if (result > 0) {
233 p = &(*p)->rb_right;
234 } else {
235 /* Identical refs, merge them and free @newref */
236 struct extent_inode_elem *eie = ref->inode_list;
237
238 while (eie && eie->next)
239 eie = eie->next;
240
241 if (!eie)
242 ref->inode_list = newref->inode_list;
243 else
244 eie->next = newref->inode_list;
245 ref->count += newref->count;
246 free_pref(newref);
247 return;
248 }
249 }
250
251 rb_link_node(&newref->rbnode, parent, p);
252 rb_insert_color(&newref->rbnode, root);
253}
254
255/*
256 * Release the entire tree. We don't care about internal consistency so
257 * just free everything and then reset the tree root.
258 */
259static void prelim_release(struct preftree *preftree)
260{
261 struct prelim_ref *ref, *next_ref;
262
263 rbtree_postorder_for_each_entry_safe(ref, next_ref, &preftree->root,
264 rbnode)
265 free_pref(ref);
266
267 preftree->root = RB_ROOT;
268}
269
161/* 270/*
162 * the rules for all callers of this function are: 271 * the rules for all callers of this function are:
163 * - obtaining the parent is the goal 272 * - obtaining the parent is the goal
@@ -196,7 +305,7 @@ void btrfs_prelim_ref_exit(void)
196 * additional information that's available but not required to find the parent 305 * additional information that's available but not required to find the parent
197 * block might help in merging entries to gain some speed. 306 * block might help in merging entries to gain some speed.
198 */ 307 */
199static int add_prelim_ref(struct list_head *head, u64 root_id, 308static int add_prelim_ref(struct preftree *preftree, u64 root_id,
200 const struct btrfs_key *key, int level, u64 parent, 309 const struct btrfs_key *key, int level, u64 parent,
201 u64 wanted_disk_byte, int count, gfp_t gfp_mask) 310 u64 wanted_disk_byte, int count, gfp_t gfp_mask)
202{ 311{
@@ -243,11 +352,32 @@ static int add_prelim_ref(struct list_head *head, u64 root_id,
243 ref->count = count; 352 ref->count = count;
244 ref->parent = parent; 353 ref->parent = parent;
245 ref->wanted_disk_byte = wanted_disk_byte; 354 ref->wanted_disk_byte = wanted_disk_byte;
246 list_add_tail(&ref->list, head); 355 prelim_ref_insert(preftree, ref);
247 356
248 return 0; 357 return 0;
249} 358}
250 359
360/* direct refs use root == 0, key == NULL */
361static int add_direct_ref(struct preftrees *preftrees, int level, u64 parent,
362 u64 wanted_disk_byte, int count, gfp_t gfp_mask)
363{
364 return add_prelim_ref(&preftrees->direct, 0, NULL, level, parent,
365 wanted_disk_byte, count, gfp_mask);
366}
367
368/* indirect refs use parent == 0 */
369static int add_indirect_ref(struct preftrees *preftrees, u64 root_id,
370 const struct btrfs_key *key, int level,
371 u64 wanted_disk_byte, int count, gfp_t gfp_mask)
372{
373 struct preftree *tree = &preftrees->indirect;
374
375 if (!key)
376 tree = &preftrees->indirect_missing_keys;
377 return add_prelim_ref(tree, root_id, key, level, 0,
378 wanted_disk_byte, count, gfp_mask);
379}
380
251static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, 381static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
252 struct ulist *parents, struct prelim_ref *ref, 382 struct ulist *parents, struct prelim_ref *ref,
253 int level, u64 time_seq, const u64 *extent_item_pos, 383 int level, u64 time_seq, const u64 *extent_item_pos,
@@ -429,38 +559,63 @@ unode_aux_to_inode_list(struct ulist_node *node)
429} 559}
430 560
431/* 561/*
432 * resolve all indirect backrefs from the list 562 * We maintain three seperate rbtrees: one for direct refs, one for
563 * indirect refs which have a key, and one for indirect refs which do not
564 * have a key. Each tree does merge on insertion.
565 *
566 * Once all of the references are located, we iterate over the tree of
567 * indirect refs with missing keys. An appropriate key is located and
568 * the ref is moved onto the tree for indirect refs. After all missing
569 * keys are thus located, we iterate over the indirect ref tree, resolve
570 * each reference, and then insert the resolved reference onto the
571 * direct tree (merging there too).
572 *
573 * New backrefs (i.e., for parent nodes) are added to the appropriate
574 * rbtree as they are encountered. The new backrefs are subsequently
575 * resolved as above.
433 */ 576 */
434static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, 577static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
435 struct btrfs_path *path, u64 time_seq, 578 struct btrfs_path *path, u64 time_seq,
436 struct list_head *head, 579 struct preftrees *preftrees,
437 const u64 *extent_item_pos, u64 total_refs, 580 const u64 *extent_item_pos, u64 total_refs,
438 u64 root_objectid) 581 u64 root_objectid)
439{ 582{
440 int err; 583 int err;
441 int ret = 0; 584 int ret = 0;
442 struct prelim_ref *ref;
443 struct prelim_ref *ref_safe;
444 struct prelim_ref *new_ref;
445 struct ulist *parents; 585 struct ulist *parents;
446 struct ulist_node *node; 586 struct ulist_node *node;
447 struct ulist_iterator uiter; 587 struct ulist_iterator uiter;
588 struct rb_node *rnode;
448 589
449 parents = ulist_alloc(GFP_NOFS); 590 parents = ulist_alloc(GFP_NOFS);
450 if (!parents) 591 if (!parents)
451 return -ENOMEM; 592 return -ENOMEM;
452 593
453 /* 594 /*
454 * _safe allows us to insert directly after the current item without 595 * We could trade memory usage for performance here by iterating
455 * iterating over the newly inserted items. 596 * the tree, allocating new refs for each insertion, and then
456 * we're also allowed to re-assign ref during iteration. 597 * freeing the entire indirect tree when we're done. In some test
598 * cases, the tree can grow quite large (~200k objects).
457 */ 599 */
458 list_for_each_entry_safe(ref, ref_safe, head, list) { 600 while ((rnode = rb_first(&preftrees->indirect.root))) {
459 if (ref->parent) /* already direct */ 601 struct prelim_ref *ref;
460 continue; 602
461 if (ref->count == 0) 603 ref = rb_entry(rnode, struct prelim_ref, rbnode);
604 if (WARN(ref->parent,
605 "BUG: direct ref found in indirect tree")) {
606 ret = -EINVAL;
607 goto out;
608 }
609
610 rb_erase(&ref->rbnode, &preftrees->indirect.root);
611
612 if (ref->count == 0) {
613 free_pref(ref);
462 continue; 614 continue;
615 }
616
463 if (root_objectid && ref->root_id != root_objectid) { 617 if (root_objectid && ref->root_id != root_objectid) {
618 free_pref(ref);
464 ret = BACKREF_FOUND_SHARED; 619 ret = BACKREF_FOUND_SHARED;
465 goto out; 620 goto out;
466 } 621 }
@@ -472,8 +627,10 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
472 * and return directly. 627 * and return directly.
473 */ 628 */
474 if (err == -ENOENT) { 629 if (err == -ENOENT) {
630 prelim_ref_insert(&preftrees->direct, ref);
475 continue; 631 continue;
476 } else if (err) { 632 } else if (err) {
633 free_pref(ref);
477 ret = err; 634 ret = err;
478 goto out; 635 goto out;
479 } 636 }
@@ -484,19 +641,26 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
484 ref->parent = node ? node->val : 0; 641 ref->parent = node ? node->val : 0;
485 ref->inode_list = unode_aux_to_inode_list(node); 642 ref->inode_list = unode_aux_to_inode_list(node);
486 643
487 /* additional parents require new refs being added here */ 644 /* Add a prelim_ref(s) for any other parent(s). */
488 while ((node = ulist_next(parents, &uiter))) { 645 while ((node = ulist_next(parents, &uiter))) {
646 struct prelim_ref *new_ref;
647
489 new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache, 648 new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache,
490 GFP_NOFS); 649 GFP_NOFS);
491 if (!new_ref) { 650 if (!new_ref) {
651 free_pref(ref);
492 ret = -ENOMEM; 652 ret = -ENOMEM;
493 goto out; 653 goto out;
494 } 654 }
495 memcpy(new_ref, ref, sizeof(*ref)); 655 memcpy(new_ref, ref, sizeof(*ref));
496 new_ref->parent = node->val; 656 new_ref->parent = node->val;
497 new_ref->inode_list = unode_aux_to_inode_list(node); 657 new_ref->inode_list = unode_aux_to_inode_list(node);
498 list_add(&new_ref->list, &ref->list); 658 prelim_ref_insert(&preftrees->direct, new_ref);
499 } 659 }
660
661 /* Now it's a direct ref, put it in the the direct tree */
662 prelim_ref_insert(&preftrees->direct, ref);
663
500 ulist_reinit(parents); 664 ulist_reinit(parents);
501 } 665 }
502out: 666out:
@@ -504,44 +668,31 @@ out:
504 return ret; 668 return ret;
505} 669}
506 670
507static inline int ref_for_same_block(struct prelim_ref *ref1,
508 struct prelim_ref *ref2)
509{
510 if (ref1->level != ref2->level)
511 return 0;
512 if (ref1->root_id != ref2->root_id)
513 return 0;
514 if (ref1->key_for_search.type != ref2->key_for_search.type)
515 return 0;
516 if (ref1->key_for_search.objectid != ref2->key_for_search.objectid)
517 return 0;
518 if (ref1->key_for_search.offset != ref2->key_for_search.offset)
519 return 0;
520 if (ref1->parent != ref2->parent)
521 return 0;
522
523 return 1;
524}
525
526/* 671/*
527 * read tree blocks and add keys where required. 672 * read tree blocks and add keys where required.
528 */ 673 */
529static int add_missing_keys(struct btrfs_fs_info *fs_info, 674static int add_missing_keys(struct btrfs_fs_info *fs_info,
530 struct list_head *head) 675 struct preftrees *preftrees)
531{ 676{
532 struct prelim_ref *ref; 677 struct prelim_ref *ref;
533 struct extent_buffer *eb; 678 struct extent_buffer *eb;
679 struct preftree *tree = &preftrees->indirect_missing_keys;
680 struct rb_node *node;
534 681
535 list_for_each_entry(ref, head, list) { 682 while ((node = rb_first(&tree->root))) {
536 if (ref->parent) 683 ref = rb_entry(node, struct prelim_ref, rbnode);
537 continue; 684 rb_erase(node, &tree->root);
538 if (ref->key_for_search.type) 685
539 continue; 686 BUG_ON(ref->parent); /* should not be a direct ref */
687 BUG_ON(ref->key_for_search.type);
540 BUG_ON(!ref->wanted_disk_byte); 688 BUG_ON(!ref->wanted_disk_byte);
689
541 eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0); 690 eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0);
542 if (IS_ERR(eb)) { 691 if (IS_ERR(eb)) {
692 free_pref(ref);
543 return PTR_ERR(eb); 693 return PTR_ERR(eb);
544 } else if (!extent_buffer_uptodate(eb)) { 694 } else if (!extent_buffer_uptodate(eb)) {
695 free_pref(ref);
545 free_extent_buffer(eb); 696 free_extent_buffer(eb);
546 return -EIO; 697 return -EIO;
547 } 698 }
@@ -552,73 +703,31 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
552 btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0); 703 btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
553 btrfs_tree_read_unlock(eb); 704 btrfs_tree_read_unlock(eb);
554 free_extent_buffer(eb); 705 free_extent_buffer(eb);
706 prelim_ref_insert(&preftrees->indirect, ref);
555 } 707 }
556 return 0; 708 return 0;
557} 709}
558 710
559/* 711/*
560 * merge backrefs and adjust counts accordingly
561 *
562 * FIXME: For MERGE_IDENTICAL_KEYS, if we add more keys in add_prelim_ref
563 * then we can merge more here. Additionally, we could even add a key
564 * range for the blocks we looked into to merge even more (-> replace
565 * unresolved refs by those having a parent).
566 */
567static void merge_refs(struct list_head *head, enum merge_mode mode)
568{
569 struct prelim_ref *pos1;
570
571 list_for_each_entry(pos1, head, list) {
572 struct prelim_ref *pos2 = pos1, *tmp;
573
574 list_for_each_entry_safe_continue(pos2, tmp, head, list) {
575 struct prelim_ref *ref1 = pos1, *ref2 = pos2;
576 struct extent_inode_elem *eie;
577
578 if (!ref_for_same_block(ref1, ref2))
579 continue;
580 if (mode == MERGE_IDENTICAL_KEYS) {
581 if (!ref1->parent && ref2->parent)
582 swap(ref1, ref2);
583 } else {
584 if (ref1->parent != ref2->parent)
585 continue;
586 }
587
588 eie = ref1->inode_list;
589 while (eie && eie->next)
590 eie = eie->next;
591 if (eie)
592 eie->next = ref2->inode_list;
593 else
594 ref1->inode_list = ref2->inode_list;
595 ref1->count += ref2->count;
596
597 list_del(&ref2->list);
598 kmem_cache_free(btrfs_prelim_ref_cache, ref2);
599 cond_resched();
600 }
601
602 }
603}
604
605/*
606 * add all currently queued delayed refs from this head whose seq nr is 712 * add all currently queued delayed refs from this head whose seq nr is
607 * smaller or equal that seq to the list 713 * smaller or equal that seq to the list
608 */ 714 */
609static int add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, 715static int add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
610 struct list_head *prefs, u64 *total_refs, 716 struct preftrees *preftrees, u64 *total_refs,
611 u64 inum) 717 u64 inum)
612{ 718{
613 struct btrfs_delayed_ref_node *node; 719 struct btrfs_delayed_ref_node *node;
614 struct btrfs_delayed_extent_op *extent_op = head->extent_op; 720 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
615 struct btrfs_key key; 721 struct btrfs_key key;
616 struct btrfs_key op_key = {0}; 722 struct btrfs_key tmp_op_key;
723 struct btrfs_key *op_key = NULL;
617 int sgn; 724 int sgn;
618 int ret = 0; 725 int ret = 0;
619 726
620 if (extent_op && extent_op->update_key) 727 if (extent_op && extent_op->update_key) {
621 btrfs_disk_key_to_cpu(&op_key, &extent_op->key); 728 btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
729 op_key = &tmp_op_key;
730 }
622 731
623 spin_lock(&head->lock); 732 spin_lock(&head->lock);
624 list_for_each_entry(node, &head->ref_list, list) { 733 list_for_each_entry(node, &head->ref_list, list) {
@@ -642,24 +751,30 @@ static int add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
642 *total_refs += (node->ref_mod * sgn); 751 *total_refs += (node->ref_mod * sgn);
643 switch (node->type) { 752 switch (node->type) {
644 case BTRFS_TREE_BLOCK_REF_KEY: { 753 case BTRFS_TREE_BLOCK_REF_KEY: {
754 /* NORMAL INDIRECT METADATA backref */
645 struct btrfs_delayed_tree_ref *ref; 755 struct btrfs_delayed_tree_ref *ref;
646 756
647 ref = btrfs_delayed_node_to_tree_ref(node); 757 ref = btrfs_delayed_node_to_tree_ref(node);
648 ret = add_prelim_ref(prefs, ref->root, &op_key, 758 ret = add_indirect_ref(preftrees, ref->root, &tmp_op_key,
649 ref->level + 1, 0, node->bytenr, 759 ref->level + 1, node->bytenr,
650 node->ref_mod * sgn, GFP_ATOMIC); 760 node->ref_mod * sgn,
761 GFP_ATOMIC);
651 break; 762 break;
652 } 763 }
653 case BTRFS_SHARED_BLOCK_REF_KEY: { 764 case BTRFS_SHARED_BLOCK_REF_KEY: {
765 /* SHARED DIRECT METADATA backref */
654 struct btrfs_delayed_tree_ref *ref; 766 struct btrfs_delayed_tree_ref *ref;
655 767
656 ref = btrfs_delayed_node_to_tree_ref(node); 768 ref = btrfs_delayed_node_to_tree_ref(node);
657 ret = add_prelim_ref(prefs, 0, NULL, ref->level + 1, 769
770 ret = add_direct_ref(preftrees, ref->level + 1,
658 ref->parent, node->bytenr, 771 ref->parent, node->bytenr,
659 node->ref_mod * sgn, GFP_ATOMIC); 772 node->ref_mod * sgn,
773 GFP_ATOMIC);
660 break; 774 break;
661 } 775 }
662 case BTRFS_EXTENT_DATA_REF_KEY: { 776 case BTRFS_EXTENT_DATA_REF_KEY: {
777 /* NORMAL INDIRECT DATA backref */
663 struct btrfs_delayed_data_ref *ref; 778 struct btrfs_delayed_data_ref *ref;
664 ref = btrfs_delayed_node_to_data_ref(node); 779 ref = btrfs_delayed_node_to_data_ref(node);
665 780
@@ -676,17 +791,21 @@ static int add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
676 break; 791 break;
677 } 792 }
678 793
679 ret = add_prelim_ref(prefs, ref->root, &key, 0, 0, 794 ret = add_indirect_ref(preftrees, ref->root, &key, 0,
680 node->bytenr, node->ref_mod * sgn, 795 node->bytenr,
681 GFP_ATOMIC); 796 node->ref_mod * sgn,
797 GFP_ATOMIC);
682 break; 798 break;
683 } 799 }
684 case BTRFS_SHARED_DATA_REF_KEY: { 800 case BTRFS_SHARED_DATA_REF_KEY: {
801 /* SHARED DIRECT FULL backref */
685 struct btrfs_delayed_data_ref *ref; 802 struct btrfs_delayed_data_ref *ref;
686 803
687 ref = btrfs_delayed_node_to_data_ref(node); 804 ref = btrfs_delayed_node_to_data_ref(node);
688 ret = add_prelim_ref(prefs, 0, NULL, 0, ref->parent, 805
689 node->bytenr, node->ref_mod * sgn, 806 ret = add_direct_ref(preftrees, 0, ref->parent,
807 node->bytenr,
808 node->ref_mod * sgn,
690 GFP_ATOMIC); 809 GFP_ATOMIC);
691 break; 810 break;
692 } 811 }
@@ -704,7 +823,7 @@ static int add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
704 * add all inline backrefs for bytenr to the list 823 * add all inline backrefs for bytenr to the list
705 */ 824 */
706static int add_inline_refs(struct btrfs_path *path, u64 bytenr, 825static int add_inline_refs(struct btrfs_path *path, u64 bytenr,
707 int *info_level, struct list_head *prefs, 826 int *info_level, struct preftrees *preftrees,
708 u64 *total_refs, u64 inum) 827 u64 *total_refs, u64 inum)
709{ 828{
710 int ret = 0; 829 int ret = 0;
@@ -760,8 +879,8 @@ static int add_inline_refs(struct btrfs_path *path, u64 bytenr,
760 879
761 switch (type) { 880 switch (type) {
762 case BTRFS_SHARED_BLOCK_REF_KEY: 881 case BTRFS_SHARED_BLOCK_REF_KEY:
763 ret = add_prelim_ref(prefs, 0, NULL, *info_level + 1, 882 ret = add_direct_ref(preftrees, *info_level + 1, offset,
764 offset, bytenr, 1, GFP_NOFS); 883 bytenr, 1, GFP_NOFS);
765 break; 884 break;
766 case BTRFS_SHARED_DATA_REF_KEY: { 885 case BTRFS_SHARED_DATA_REF_KEY: {
767 struct btrfs_shared_data_ref *sdref; 886 struct btrfs_shared_data_ref *sdref;
@@ -769,14 +888,15 @@ static int add_inline_refs(struct btrfs_path *path, u64 bytenr,
769 888
770 sdref = (struct btrfs_shared_data_ref *)(iref + 1); 889 sdref = (struct btrfs_shared_data_ref *)(iref + 1);
771 count = btrfs_shared_data_ref_count(leaf, sdref); 890 count = btrfs_shared_data_ref_count(leaf, sdref);
772 ret = add_prelim_ref(prefs, 0, NULL, 0, offset, 891
892 ret = add_direct_ref(preftrees, 0, offset,
773 bytenr, count, GFP_NOFS); 893 bytenr, count, GFP_NOFS);
774 break; 894 break;
775 } 895 }
776 case BTRFS_TREE_BLOCK_REF_KEY: 896 case BTRFS_TREE_BLOCK_REF_KEY:
777 ret = add_prelim_ref(prefs, offset, NULL, 897 ret = add_indirect_ref(preftrees, offset, NULL,
778 *info_level + 1, 0, 898 *info_level + 1, bytenr, 1,
779 bytenr, 1, GFP_NOFS); 899 GFP_NOFS);
780 break; 900 break;
781 case BTRFS_EXTENT_DATA_REF_KEY: { 901 case BTRFS_EXTENT_DATA_REF_KEY: {
782 struct btrfs_extent_data_ref *dref; 902 struct btrfs_extent_data_ref *dref;
@@ -796,8 +916,9 @@ static int add_inline_refs(struct btrfs_path *path, u64 bytenr,
796 } 916 }
797 917
798 root = btrfs_extent_data_ref_root(leaf, dref); 918 root = btrfs_extent_data_ref_root(leaf, dref);
799 ret = add_prelim_ref(prefs, root, &key, 0, 0, 919
800 bytenr, count, GFP_NOFS); 920 ret = add_indirect_ref(preftrees, root, &key, 0, bytenr,
921 count, GFP_NOFS);
801 break; 922 break;
802 } 923 }
803 default: 924 default:
@@ -816,7 +937,8 @@ static int add_inline_refs(struct btrfs_path *path, u64 bytenr,
816 */ 937 */
817static int add_keyed_refs(struct btrfs_fs_info *fs_info, 938static int add_keyed_refs(struct btrfs_fs_info *fs_info,
818 struct btrfs_path *path, u64 bytenr, 939 struct btrfs_path *path, u64 bytenr,
819 int info_level, struct list_head *prefs, u64 inum) 940 int info_level, struct preftrees *preftrees,
941 u64 inum)
820{ 942{
821 struct btrfs_root *extent_root = fs_info->extent_root; 943 struct btrfs_root *extent_root = fs_info->extent_root;
822 int ret; 944 int ret;
@@ -846,26 +968,31 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
846 968
847 switch (key.type) { 969 switch (key.type) {
848 case BTRFS_SHARED_BLOCK_REF_KEY: 970 case BTRFS_SHARED_BLOCK_REF_KEY:
849 ret = add_prelim_ref(prefs, 0, NULL, info_level + 1, 971 /* SHARED DIRECT METADATA backref */
850 key.offset, bytenr, 1, GFP_NOFS); 972 ret = add_direct_ref(preftrees, info_level + 1,
973 key.offset, bytenr, 1,
974 GFP_NOFS);
851 break; 975 break;
852 case BTRFS_SHARED_DATA_REF_KEY: { 976 case BTRFS_SHARED_DATA_REF_KEY: {
977 /* SHARED DIRECT FULL backref */
853 struct btrfs_shared_data_ref *sdref; 978 struct btrfs_shared_data_ref *sdref;
854 int count; 979 int count;
855 980
856 sdref = btrfs_item_ptr(leaf, slot, 981 sdref = btrfs_item_ptr(leaf, slot,
857 struct btrfs_shared_data_ref); 982 struct btrfs_shared_data_ref);
858 count = btrfs_shared_data_ref_count(leaf, sdref); 983 count = btrfs_shared_data_ref_count(leaf, sdref);
859 ret = add_prelim_ref(prefs, 0, NULL, 0, key.offset, 984 ret = add_direct_ref(preftrees, 0, key.offset, bytenr,
860 bytenr, count, GFP_NOFS); 985 count, GFP_NOFS);
861 break; 986 break;
862 } 987 }
863 case BTRFS_TREE_BLOCK_REF_KEY: 988 case BTRFS_TREE_BLOCK_REF_KEY:
864 ret = add_prelim_ref(prefs, key.offset, NULL, 989 /* NORMAL INDIRECT METADATA backref */
865 info_level + 1, 0, 990 ret = add_indirect_ref(preftrees, key.offset, NULL,
866 bytenr, 1, GFP_NOFS); 991 info_level + 1, bytenr, 1,
992 GFP_NOFS);
867 break; 993 break;
868 case BTRFS_EXTENT_DATA_REF_KEY: { 994 case BTRFS_EXTENT_DATA_REF_KEY: {
995 /* NORMAL INDIRECT DATA backref */
869 struct btrfs_extent_data_ref *dref; 996 struct btrfs_extent_data_ref *dref;
870 int count; 997 int count;
871 u64 root; 998 u64 root;
@@ -884,8 +1011,8 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
884 } 1011 }
885 1012
886 root = btrfs_extent_data_ref_root(leaf, dref); 1013 root = btrfs_extent_data_ref_root(leaf, dref);
887 ret = add_prelim_ref(prefs, root, &key, 0, 0, 1014 ret = add_indirect_ref(preftrees, root, &key, 0, bytenr,
888 bytenr, count, GFP_NOFS); 1015 count, GFP_NOFS);
889 break; 1016 break;
890 } 1017 }
891 default: 1018 default:
@@ -926,14 +1053,16 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
926 struct btrfs_delayed_ref_head *head; 1053 struct btrfs_delayed_ref_head *head;
927 int info_level = 0; 1054 int info_level = 0;
928 int ret; 1055 int ret;
929 struct list_head prefs_delayed;
930 struct list_head prefs;
931 struct prelim_ref *ref; 1056 struct prelim_ref *ref;
1057 struct rb_node *node;
932 struct extent_inode_elem *eie = NULL; 1058 struct extent_inode_elem *eie = NULL;
1059 /* total of both direct AND indirect refs! */
933 u64 total_refs = 0; 1060 u64 total_refs = 0;
934 1061 struct preftrees preftrees = {
935 INIT_LIST_HEAD(&prefs); 1062 .direct = PREFTREE_INIT,
936 INIT_LIST_HEAD(&prefs_delayed); 1063 .indirect = PREFTREE_INIT,
1064 .indirect_missing_keys = PREFTREE_INIT
1065 };
937 1066
938 key.objectid = bytenr; 1067 key.objectid = bytenr;
939 key.offset = (u64)-1; 1068 key.offset = (u64)-1;
@@ -996,9 +1125,8 @@ again:
996 goto again; 1125 goto again;
997 } 1126 }
998 spin_unlock(&delayed_refs->lock); 1127 spin_unlock(&delayed_refs->lock);
999 ret = add_delayed_refs(head, time_seq, 1128 ret = add_delayed_refs(head, time_seq, &preftrees,
1000 &prefs_delayed, &total_refs, 1129 &total_refs, inum);
1001 inum);
1002 mutex_unlock(&head->mutex); 1130 mutex_unlock(&head->mutex);
1003 if (ret) 1131 if (ret)
1004 goto out; 1132 goto out;
@@ -1019,35 +1147,43 @@ again:
1019 (key.type == BTRFS_EXTENT_ITEM_KEY || 1147 (key.type == BTRFS_EXTENT_ITEM_KEY ||
1020 key.type == BTRFS_METADATA_ITEM_KEY)) { 1148 key.type == BTRFS_METADATA_ITEM_KEY)) {
1021 ret = add_inline_refs(path, bytenr, &info_level, 1149 ret = add_inline_refs(path, bytenr, &info_level,
1022 &prefs, &total_refs, inum); 1150 &preftrees, &total_refs, inum);
1023 if (ret) 1151 if (ret)
1024 goto out; 1152 goto out;
1025 ret = add_keyed_refs(fs_info, path, bytenr, info_level, 1153 ret = add_keyed_refs(fs_info, path, bytenr, info_level,
1026 &prefs, inum); 1154 &preftrees, inum);
1027 if (ret) 1155 if (ret)
1028 goto out; 1156 goto out;
1029 } 1157 }
1030 } 1158 }
1031 btrfs_release_path(path);
1032 1159
1033 list_splice_init(&prefs_delayed, &prefs); 1160 btrfs_release_path(path);
1034 1161
1035 ret = add_missing_keys(fs_info, &prefs); 1162 ret = add_missing_keys(fs_info, &preftrees);
1036 if (ret) 1163 if (ret)
1037 goto out; 1164 goto out;
1038 1165
1039 merge_refs(&prefs, MERGE_IDENTICAL_KEYS); 1166 WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root));
1040 1167
1041 ret = resolve_indirect_refs(fs_info, path, time_seq, &prefs, 1168 ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
1042 extent_item_pos, total_refs, 1169 extent_item_pos, total_refs,
1043 root_objectid); 1170 root_objectid);
1044 if (ret) 1171 if (ret)
1045 goto out; 1172 goto out;
1046 1173
1047 merge_refs(&prefs, MERGE_IDENTICAL_PARENTS); 1174 WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect.root));
1048 1175
1049 while (!list_empty(&prefs)) { 1176 /*
1050 ref = list_first_entry(&prefs, struct prelim_ref, list); 1177 * This walks the tree of merged and resolved refs. Tree blocks are
1178 * read in as needed. Unique entries are added to the ulist, and
1179 * the list of found roots is updated.
1180 *
1181 * We release the entire tree in one go before returning.
1182 */
1183 node = rb_first(&preftrees.direct.root);
1184 while (node) {
1185 ref = rb_entry(node, struct prelim_ref, rbnode);
1186 node = rb_next(&ref->rbnode);
1051 WARN_ON(ref->count < 0); 1187 WARN_ON(ref->count < 0);
1052 if (roots && ref->count && ref->root_id && ref->parent == 0) { 1188 if (roots && ref->count && ref->root_id && ref->parent == 0) {
1053 if (root_objectid && ref->root_id != root_objectid) { 1189 if (root_objectid && ref->root_id != root_objectid) {
@@ -1101,23 +1237,15 @@ again:
1101 } 1237 }
1102 eie = NULL; 1238 eie = NULL;
1103 } 1239 }
1104 list_del(&ref->list);
1105 kmem_cache_free(btrfs_prelim_ref_cache, ref);
1106 } 1240 }
1107 1241
1108out: 1242out:
1109 btrfs_free_path(path); 1243 btrfs_free_path(path);
1110 while (!list_empty(&prefs)) { 1244
1111 ref = list_first_entry(&prefs, struct prelim_ref, list); 1245 prelim_release(&preftrees.direct);
1112 list_del(&ref->list); 1246 prelim_release(&preftrees.indirect);
1113 kmem_cache_free(btrfs_prelim_ref_cache, ref); 1247 prelim_release(&preftrees.indirect_missing_keys);
1114 } 1248
1115 while (!list_empty(&prefs_delayed)) {
1116 ref = list_first_entry(&prefs_delayed, struct prelim_ref,
1117 list);
1118 list_del(&ref->list);
1119 kmem_cache_free(btrfs_prelim_ref_cache, ref);
1120 }
1121 if (ret < 0) 1249 if (ret < 0)
1122 free_inode_elem_list(eie); 1250 free_inode_elem_list(eie);
1123 return ret; 1251 return ret;