summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorMatthew Wilcox <willy@linux.intel.com>2016-12-14 18:08:58 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 19:04:10 -0500
commit175542f575723e43f897ddb09d0011c13f7cf0ec (patch)
treec9373e49ab8636db5a48f7017f4e942486acf1ef /lib
parent268f42de718128cd0301293177e79c08c38e39a6 (diff)
radix-tree: add radix_tree_join
This new function allows for the replacement of many smaller entries in the radix tree with one larger multiorder entry. From the point of view of an RCU walker, they may see a mixture of the smaller entries and the large entry during the same walk, but they will never see NULL for an index which was populated before the join. Link: http://lkml.kernel.org/r/1480369871-5271-58-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox <willy@linux.intel.com> Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Matthew Wilcox <mawilcox@microsoft.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/radix-tree.c183
1 files changed, 152 insertions, 31 deletions
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index d1f4ca5b7989..962cfb3a7671 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -339,17 +339,14 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
339{ 339{
340 struct radix_tree_node *node = 340 struct radix_tree_node *node =
341 container_of(head, struct radix_tree_node, rcu_head); 341 container_of(head, struct radix_tree_node, rcu_head);
342 int i;
343 342
344 /* 343 /*
345 * must only free zeroed nodes into the slab. radix_tree_shrink 344 * Must only free zeroed nodes into the slab. We can be left with
346 * can leave us with a non-NULL entry in the first slot, so clear 345 * non-NULL entries by radix_tree_free_nodes, so clear the entries
347 * that here to make sure. 346 * and tags here.
348 */ 347 */
349 for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) 348 memset(node->slots, 0, sizeof(node->slots));
350 tag_clear(node, i, 0); 349 memset(node->tags, 0, sizeof(node->tags));
351
352 node->slots[0] = NULL;
353 INIT_LIST_HEAD(&node->private_list); 350 INIT_LIST_HEAD(&node->private_list);
354 351
355 kmem_cache_free(radix_tree_node_cachep, node); 352 kmem_cache_free(radix_tree_node_cachep, node);
@@ -678,14 +675,14 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
678 shift = radix_tree_load_root(root, &child, &maxindex); 675 shift = radix_tree_load_root(root, &child, &maxindex);
679 676
680 /* Make sure the tree is high enough. */ 677 /* Make sure the tree is high enough. */
678 if (order > 0 && max == ((1UL << order) - 1))
679 max++;
681 if (max > maxindex) { 680 if (max > maxindex) {
682 int error = radix_tree_extend(root, max, shift); 681 int error = radix_tree_extend(root, max, shift);
683 if (error < 0) 682 if (error < 0)
684 return error; 683 return error;
685 shift = error; 684 shift = error;
686 child = root->rnode; 685 child = root->rnode;
687 if (order == shift)
688 shift += RADIX_TREE_MAP_SHIFT;
689 } 686 }
690 687
691 while (shift > order) { 688 while (shift > order) {
@@ -697,6 +694,8 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
697 return -ENOMEM; 694 return -ENOMEM;
698 child->shift = shift; 695 child->shift = shift;
699 child->offset = offset; 696 child->offset = offset;
697 child->count = 0;
698 child->exceptional = 0;
700 child->parent = node; 699 child->parent = node;
701 rcu_assign_pointer(*slot, node_to_entry(child)); 700 rcu_assign_pointer(*slot, node_to_entry(child));
702 if (node) 701 if (node)
@@ -710,31 +709,121 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
710 slot = &node->slots[offset]; 709 slot = &node->slots[offset];
711 } 710 }
712 711
712 if (nodep)
713 *nodep = node;
714 if (slotp)
715 *slotp = slot;
716 return 0;
717}
718
713#ifdef CONFIG_RADIX_TREE_MULTIORDER 719#ifdef CONFIG_RADIX_TREE_MULTIORDER
714 /* Insert pointers to the canonical entry */ 720/*
715 if (order > shift) { 721 * Free any nodes below this node. The tree is presumed to not need
716 unsigned i, n = 1 << (order - shift); 722 * shrinking, and any user data in the tree is presumed to not need a
723 * destructor called on it. If we need to add a destructor, we can
724 * add that functionality later. Note that we may not clear tags or
725 * slots from the tree as an RCU walker may still have a pointer into
726 * this subtree. We could replace the entries with RADIX_TREE_RETRY,
727 * but we'll still have to clear those in rcu_free.
728 */
729static void radix_tree_free_nodes(struct radix_tree_node *node)
730{
731 unsigned offset = 0;
732 struct radix_tree_node *child = entry_to_node(node);
733
734 for (;;) {
735 void *entry = child->slots[offset];
736 if (radix_tree_is_internal_node(entry) &&
737 !is_sibling_entry(child, entry)) {
738 child = entry_to_node(entry);
739 offset = 0;
740 continue;
741 }
742 offset++;
743 while (offset == RADIX_TREE_MAP_SIZE) {
744 struct radix_tree_node *old = child;
745 offset = child->offset + 1;
746 child = child->parent;
747 radix_tree_node_free(old);
748 if (old == entry_to_node(node))
749 return;
750 }
751 }
752}
753
754static inline int insert_entries(struct radix_tree_node *node, void **slot,
755 void *item, unsigned order, bool replace)
756{
757 struct radix_tree_node *child;
758 unsigned i, n, tag, offset, tags = 0;
759
760 if (node) {
761 n = 1 << (order - node->shift);
762 offset = get_slot_offset(node, slot);
763 } else {
764 n = 1;
765 offset = 0;
766 }
767
768 if (n > 1) {
717 offset = offset & ~(n - 1); 769 offset = offset & ~(n - 1);
718 slot = &node->slots[offset]; 770 slot = &node->slots[offset];
719 child = node_to_entry(slot); 771 }
720 for (i = 0; i < n; i++) { 772 child = node_to_entry(slot);
721 if (slot[i]) 773
774 for (i = 0; i < n; i++) {
775 if (slot[i]) {
776 if (replace) {
777 node->count--;
778 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
779 if (tag_get(node, tag, offset + i))
780 tags |= 1 << tag;
781 } else
722 return -EEXIST; 782 return -EEXIST;
723 } 783 }
784 }
724 785
725 for (i = 1; i < n; i++) { 786 for (i = 0; i < n; i++) {
787 struct radix_tree_node *old = slot[i];
788 if (i) {
726 rcu_assign_pointer(slot[i], child); 789 rcu_assign_pointer(slot[i], child);
727 node->count++; 790 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
791 if (tags & (1 << tag))
792 tag_clear(node, tag, offset + i);
793 } else {
794 rcu_assign_pointer(slot[i], item);
795 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
796 if (tags & (1 << tag))
797 tag_set(node, tag, offset);
728 } 798 }
799 if (radix_tree_is_internal_node(old) &&
800 !is_sibling_entry(node, old))
801 radix_tree_free_nodes(old);
802 if (radix_tree_exceptional_entry(old))
803 node->exceptional--;
729 } 804 }
730#endif 805 if (node) {
731 806 node->count += n;
732 if (nodep) 807 if (radix_tree_exceptional_entry(item))
733 *nodep = node; 808 node->exceptional += n;
734 if (slotp) 809 }
735 *slotp = slot; 810 return n;
736 return 0;
737} 811}
812#else
813static inline int insert_entries(struct radix_tree_node *node, void **slot,
814 void *item, unsigned order, bool replace)
815{
816 if (*slot)
817 return -EEXIST;
818 rcu_assign_pointer(*slot, item);
819 if (node) {
820 node->count++;
821 if (radix_tree_exceptional_entry(item))
822 node->exceptional++;
823 }
824 return 1;
825}
826#endif
738 827
739/** 828/**
740 * __radix_tree_insert - insert into a radix tree 829 * __radix_tree_insert - insert into a radix tree
@@ -757,15 +846,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
757 error = __radix_tree_create(root, index, order, &node, &slot); 846 error = __radix_tree_create(root, index, order, &node, &slot);
758 if (error) 847 if (error)
759 return error; 848 return error;
760 if (*slot != NULL) 849
761 return -EEXIST; 850 error = insert_entries(node, slot, item, order, false);
762 rcu_assign_pointer(*slot, item); 851 if (error < 0)
852 return error;
763 853
764 if (node) { 854 if (node) {
765 unsigned offset = get_slot_offset(node, slot); 855 unsigned offset = get_slot_offset(node, slot);
766 node->count++;
767 if (radix_tree_exceptional_entry(item))
768 node->exceptional++;
769 BUG_ON(tag_get(node, 0, offset)); 856 BUG_ON(tag_get(node, 0, offset));
770 BUG_ON(tag_get(node, 1, offset)); 857 BUG_ON(tag_get(node, 1, offset));
771 BUG_ON(tag_get(node, 2, offset)); 858 BUG_ON(tag_get(node, 2, offset));
@@ -942,6 +1029,40 @@ void radix_tree_replace_slot(struct radix_tree_root *root,
942 replace_slot(root, NULL, slot, item, true); 1029 replace_slot(root, NULL, slot, item, true);
943} 1030}
944 1031
1032#ifdef CONFIG_RADIX_TREE_MULTIORDER
1033/**
1034 * radix_tree_join - replace multiple entries with one multiorder entry
1035 * @root: radix tree root
1036 * @index: an index inside the new entry
1037 * @order: order of the new entry
1038 * @item: new entry
1039 *
1040 * Call this function to replace several entries with one larger entry.
1041 * The existing entries are presumed to not need freeing as a result of
1042 * this call.
1043 *
1044 * The replacement entry will have all the tags set on it that were set
1045 * on any of the entries it is replacing.
1046 */
1047int radix_tree_join(struct radix_tree_root *root, unsigned long index,
1048 unsigned order, void *item)
1049{
1050 struct radix_tree_node *node;
1051 void **slot;
1052 int error;
1053
1054 BUG_ON(radix_tree_is_internal_node(item));
1055
1056 error = __radix_tree_create(root, index, order, &node, &slot);
1057 if (!error)
1058 error = insert_entries(node, slot, item, order, true);
1059 if (error > 0)
1060 error = 0;
1061
1062 return error;
1063}
1064#endif
1065
945/** 1066/**
946 * radix_tree_tag_set - set a tag on a radix tree node 1067 * radix_tree_tag_set - set a tag on a radix tree node
947 * @root: radix tree root 1068 * @root: radix tree root