diff options
Diffstat (limited to 'lib/radix-tree.c')
-rw-r--r-- | lib/radix-tree.c | 177 |
1 files changed, 153 insertions, 24 deletions
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 92cdd9936e3d..6f412ab4c24f 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/notifier.h> | 29 | #include <linux/notifier.h> |
30 | #include <linux/cpu.h> | 30 | #include <linux/cpu.h> |
31 | #include <linux/gfp.h> | ||
32 | #include <linux/string.h> | 31 | #include <linux/string.h> |
33 | #include <linux/bitops.h> | 32 | #include <linux/bitops.h> |
34 | #include <linux/rcupdate.h> | 33 | #include <linux/rcupdate.h> |
@@ -50,7 +49,7 @@ struct radix_tree_node { | |||
50 | unsigned int height; /* Height from the bottom */ | 49 | unsigned int height; /* Height from the bottom */ |
51 | unsigned int count; | 50 | unsigned int count; |
52 | struct rcu_head rcu_head; | 51 | struct rcu_head rcu_head; |
53 | void *slots[RADIX_TREE_MAP_SIZE]; | 52 | void __rcu *slots[RADIX_TREE_MAP_SIZE]; |
54 | unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; | 53 | unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; |
55 | }; | 54 | }; |
56 | 55 | ||
@@ -175,14 +174,16 @@ static void radix_tree_node_rcu_free(struct rcu_head *head) | |||
175 | { | 174 | { |
176 | struct radix_tree_node *node = | 175 | struct radix_tree_node *node = |
177 | container_of(head, struct radix_tree_node, rcu_head); | 176 | container_of(head, struct radix_tree_node, rcu_head); |
177 | int i; | ||
178 | 178 | ||
179 | /* | 179 | /* |
180 | * must only free zeroed nodes into the slab. radix_tree_shrink | 180 | * must only free zeroed nodes into the slab. radix_tree_shrink |
181 | * can leave us with a non-NULL entry in the first slot, so clear | 181 | * can leave us with a non-NULL entry in the first slot, so clear |
182 | * that here to make sure. | 182 | * that here to make sure. |
183 | */ | 183 | */ |
184 | tag_clear(node, 0, 0); | 184 | for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) |
185 | tag_clear(node, 1, 0); | 185 | tag_clear(node, i, 0); |
186 | |||
186 | node->slots[0] = NULL; | 187 | node->slots[0] = NULL; |
187 | node->count = 0; | 188 | node->count = 0; |
188 | 189 | ||
@@ -364,7 +365,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, | |||
364 | unsigned int height, shift; | 365 | unsigned int height, shift; |
365 | struct radix_tree_node *node, **slot; | 366 | struct radix_tree_node *node, **slot; |
366 | 367 | ||
367 | node = rcu_dereference(root->rnode); | 368 | node = rcu_dereference_raw(root->rnode); |
368 | if (node == NULL) | 369 | if (node == NULL) |
369 | return NULL; | 370 | return NULL; |
370 | 371 | ||
@@ -384,7 +385,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, | |||
384 | do { | 385 | do { |
385 | slot = (struct radix_tree_node **) | 386 | slot = (struct radix_tree_node **) |
386 | (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); | 387 | (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); |
387 | node = rcu_dereference(*slot); | 388 | node = rcu_dereference_raw(*slot); |
388 | if (node == NULL) | 389 | if (node == NULL) |
389 | return NULL; | 390 | return NULL; |
390 | 391 | ||
@@ -556,6 +557,10 @@ EXPORT_SYMBOL(radix_tree_tag_clear); | |||
556 | * | 557 | * |
557 | * 0: tag not present or not set | 558 | * 0: tag not present or not set |
558 | * 1: tag set | 559 | * 1: tag set |
560 | * | ||
561 | * Note that the return value of this function may not be relied on, even if | ||
562 | * the RCU lock is held, unless tag modification and node deletion are excluded | ||
563 | * from concurrency. | ||
559 | */ | 564 | */ |
560 | int radix_tree_tag_get(struct radix_tree_root *root, | 565 | int radix_tree_tag_get(struct radix_tree_root *root, |
561 | unsigned long index, unsigned int tag) | 566 | unsigned long index, unsigned int tag) |
@@ -568,7 +573,7 @@ int radix_tree_tag_get(struct radix_tree_root *root, | |||
568 | if (!root_tag_get(root, tag)) | 573 | if (!root_tag_get(root, tag)) |
569 | return 0; | 574 | return 0; |
570 | 575 | ||
571 | node = rcu_dereference(root->rnode); | 576 | node = rcu_dereference_raw(root->rnode); |
572 | if (node == NULL) | 577 | if (node == NULL) |
573 | return 0; | 578 | return 0; |
574 | 579 | ||
@@ -596,13 +601,9 @@ int radix_tree_tag_get(struct radix_tree_root *root, | |||
596 | */ | 601 | */ |
597 | if (!tag_get(node, tag, offset)) | 602 | if (!tag_get(node, tag, offset)) |
598 | saw_unset_tag = 1; | 603 | saw_unset_tag = 1; |
599 | if (height == 1) { | 604 | if (height == 1) |
600 | int ret = tag_get(node, tag, offset); | 605 | return !!tag_get(node, tag, offset); |
601 | 606 | node = rcu_dereference_raw(node->slots[offset]); | |
602 | BUG_ON(ret && saw_unset_tag); | ||
603 | return !!ret; | ||
604 | } | ||
605 | node = rcu_dereference(node->slots[offset]); | ||
606 | shift -= RADIX_TREE_MAP_SHIFT; | 607 | shift -= RADIX_TREE_MAP_SHIFT; |
607 | height--; | 608 | height--; |
608 | } | 609 | } |
@@ -610,6 +611,134 @@ int radix_tree_tag_get(struct radix_tree_root *root, | |||
610 | EXPORT_SYMBOL(radix_tree_tag_get); | 611 | EXPORT_SYMBOL(radix_tree_tag_get); |
611 | 612 | ||
612 | /** | 613 | /** |
614 | * radix_tree_range_tag_if_tagged - for each item in given range set given | ||
615 | * tag if item has another tag set | ||
616 | * @root: radix tree root | ||
617 | * @first_indexp: pointer to a starting index of a range to scan | ||
618 | * @last_index: last index of a range to scan | ||
619 | * @nr_to_tag: maximum number items to tag | ||
620 | * @iftag: tag index to test | ||
621 | * @settag: tag index to set if tested tag is set | ||
622 | * | ||
623 | * This function scans range of radix tree from first_index to last_index | ||
624 | * (inclusive). For each item in the range if iftag is set, the function sets | ||
625 | * also settag. The function stops either after tagging nr_to_tag items or | ||
626 | * after reaching last_index. | ||
627 | * | ||
628 | * The tags must be set from the leaf level only and propagated back up the | ||
629 | * path to the root. We must do this so that we resolve the full path before | ||
630 | * setting any tags on intermediate nodes. If we set tags as we descend, then | ||
631 | * we can get to the leaf node and find that the index that has the iftag | ||
632 | * set is outside the range we are scanning. This reults in dangling tags and | ||
633 | * can lead to problems with later tag operations (e.g. livelocks on lookups). | ||
634 | * | ||
635 | * The function returns number of leaves where the tag was set and sets | ||
636 | * *first_indexp to the first unscanned index. | ||
637 | * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must | ||
638 | * be prepared to handle that. | ||
639 | */ | ||
640 | unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, | ||
641 | unsigned long *first_indexp, unsigned long last_index, | ||
642 | unsigned long nr_to_tag, | ||
643 | unsigned int iftag, unsigned int settag) | ||
644 | { | ||
645 | unsigned int height = root->height; | ||
646 | struct radix_tree_path path[height]; | ||
647 | struct radix_tree_path *pathp = path; | ||
648 | struct radix_tree_node *slot; | ||
649 | unsigned int shift; | ||
650 | unsigned long tagged = 0; | ||
651 | unsigned long index = *first_indexp; | ||
652 | |||
653 | last_index = min(last_index, radix_tree_maxindex(height)); | ||
654 | if (index > last_index) | ||
655 | return 0; | ||
656 | if (!nr_to_tag) | ||
657 | return 0; | ||
658 | if (!root_tag_get(root, iftag)) { | ||
659 | *first_indexp = last_index + 1; | ||
660 | return 0; | ||
661 | } | ||
662 | if (height == 0) { | ||
663 | *first_indexp = last_index + 1; | ||
664 | root_tag_set(root, settag); | ||
665 | return 1; | ||
666 | } | ||
667 | |||
668 | shift = (height - 1) * RADIX_TREE_MAP_SHIFT; | ||
669 | slot = radix_tree_indirect_to_ptr(root->rnode); | ||
670 | |||
671 | /* | ||
672 | * we fill the path from (root->height - 2) to 0, leaving the index at | ||
673 | * (root->height - 1) as a terminator. Zero the node in the terminator | ||
674 | * so that we can use this to end walk loops back up the path. | ||
675 | */ | ||
676 | path[height - 1].node = NULL; | ||
677 | |||
678 | for (;;) { | ||
679 | int offset; | ||
680 | |||
681 | offset = (index >> shift) & RADIX_TREE_MAP_MASK; | ||
682 | if (!slot->slots[offset]) | ||
683 | goto next; | ||
684 | if (!tag_get(slot, iftag, offset)) | ||
685 | goto next; | ||
686 | if (height > 1) { | ||
687 | /* Go down one level */ | ||
688 | height--; | ||
689 | shift -= RADIX_TREE_MAP_SHIFT; | ||
690 | path[height - 1].node = slot; | ||
691 | path[height - 1].offset = offset; | ||
692 | slot = slot->slots[offset]; | ||
693 | continue; | ||
694 | } | ||
695 | |||
696 | /* tag the leaf */ | ||
697 | tagged++; | ||
698 | tag_set(slot, settag, offset); | ||
699 | |||
700 | /* walk back up the path tagging interior nodes */ | ||
701 | pathp = &path[0]; | ||
702 | while (pathp->node) { | ||
703 | /* stop if we find a node with the tag already set */ | ||
704 | if (tag_get(pathp->node, settag, pathp->offset)) | ||
705 | break; | ||
706 | tag_set(pathp->node, settag, pathp->offset); | ||
707 | pathp++; | ||
708 | } | ||
709 | |||
710 | next: | ||
711 | /* Go to next item at level determined by 'shift' */ | ||
712 | index = ((index >> shift) + 1) << shift; | ||
713 | /* Overflow can happen when last_index is ~0UL... */ | ||
714 | if (index > last_index || !index) | ||
715 | break; | ||
716 | if (tagged >= nr_to_tag) | ||
717 | break; | ||
718 | while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) { | ||
719 | /* | ||
720 | * We've fully scanned this node. Go up. Because | ||
721 | * last_index is guaranteed to be in the tree, what | ||
722 | * we do below cannot wander astray. | ||
723 | */ | ||
724 | slot = path[height - 1].node; | ||
725 | height++; | ||
726 | shift += RADIX_TREE_MAP_SHIFT; | ||
727 | } | ||
728 | } | ||
729 | /* | ||
730 | * The iftag must have been set somewhere because otherwise | ||
731 | * we would return immediated at the beginning of the function | ||
732 | */ | ||
733 | root_tag_set(root, settag); | ||
734 | *first_indexp = index; | ||
735 | |||
736 | return tagged; | ||
737 | } | ||
738 | EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); | ||
739 | |||
740 | |||
741 | /** | ||
613 | * radix_tree_next_hole - find the next hole (not-present entry) | 742 | * radix_tree_next_hole - find the next hole (not-present entry) |
614 | * @root: tree root | 743 | * @root: tree root |
615 | * @index: index key | 744 | * @index: index key |
@@ -657,7 +786,7 @@ EXPORT_SYMBOL(radix_tree_next_hole); | |||
657 | * | 786 | * |
658 | * Returns: the index of the hole if found, otherwise returns an index | 787 | * Returns: the index of the hole if found, otherwise returns an index |
659 | * outside of the set specified (in which case 'index - return >= max_scan' | 788 | * outside of the set specified (in which case 'index - return >= max_scan' |
660 | * will be true). In rare cases of wrap-around, LONG_MAX will be returned. | 789 | * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. |
661 | * | 790 | * |
662 | * radix_tree_next_hole may be called under rcu_read_lock. However, like | 791 | * radix_tree_next_hole may be called under rcu_read_lock. However, like |
663 | * radix_tree_gang_lookup, this will not atomically search a snapshot of | 792 | * radix_tree_gang_lookup, this will not atomically search a snapshot of |
@@ -675,7 +804,7 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root, | |||
675 | if (!radix_tree_lookup(root, index)) | 804 | if (!radix_tree_lookup(root, index)) |
676 | break; | 805 | break; |
677 | index--; | 806 | index--; |
678 | if (index == LONG_MAX) | 807 | if (index == ULONG_MAX) |
679 | break; | 808 | break; |
680 | } | 809 | } |
681 | 810 | ||
@@ -711,7 +840,7 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, | |||
711 | } | 840 | } |
712 | 841 | ||
713 | shift -= RADIX_TREE_MAP_SHIFT; | 842 | shift -= RADIX_TREE_MAP_SHIFT; |
714 | slot = rcu_dereference(slot->slots[i]); | 843 | slot = rcu_dereference_raw(slot->slots[i]); |
715 | if (slot == NULL) | 844 | if (slot == NULL) |
716 | goto out; | 845 | goto out; |
717 | } | 846 | } |
@@ -758,7 +887,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, | |||
758 | unsigned long cur_index = first_index; | 887 | unsigned long cur_index = first_index; |
759 | unsigned int ret; | 888 | unsigned int ret; |
760 | 889 | ||
761 | node = rcu_dereference(root->rnode); | 890 | node = rcu_dereference_raw(root->rnode); |
762 | if (!node) | 891 | if (!node) |
763 | return 0; | 892 | return 0; |
764 | 893 | ||
@@ -787,7 +916,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, | |||
787 | slot = *(((void ***)results)[ret + i]); | 916 | slot = *(((void ***)results)[ret + i]); |
788 | if (!slot) | 917 | if (!slot) |
789 | continue; | 918 | continue; |
790 | results[ret + nr_found] = rcu_dereference(slot); | 919 | results[ret + nr_found] = rcu_dereference_raw(slot); |
791 | nr_found++; | 920 | nr_found++; |
792 | } | 921 | } |
793 | ret += nr_found; | 922 | ret += nr_found; |
@@ -826,7 +955,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, | |||
826 | unsigned long cur_index = first_index; | 955 | unsigned long cur_index = first_index; |
827 | unsigned int ret; | 956 | unsigned int ret; |
828 | 957 | ||
829 | node = rcu_dereference(root->rnode); | 958 | node = rcu_dereference_raw(root->rnode); |
830 | if (!node) | 959 | if (!node) |
831 | return 0; | 960 | return 0; |
832 | 961 | ||
@@ -915,7 +1044,7 @@ __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index, | |||
915 | } | 1044 | } |
916 | } | 1045 | } |
917 | shift -= RADIX_TREE_MAP_SHIFT; | 1046 | shift -= RADIX_TREE_MAP_SHIFT; |
918 | slot = rcu_dereference(slot->slots[i]); | 1047 | slot = rcu_dereference_raw(slot->slots[i]); |
919 | if (slot == NULL) | 1048 | if (slot == NULL) |
920 | break; | 1049 | break; |
921 | } | 1050 | } |
@@ -951,7 +1080,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, | |||
951 | if (!root_tag_get(root, tag)) | 1080 | if (!root_tag_get(root, tag)) |
952 | return 0; | 1081 | return 0; |
953 | 1082 | ||
954 | node = rcu_dereference(root->rnode); | 1083 | node = rcu_dereference_raw(root->rnode); |
955 | if (!node) | 1084 | if (!node) |
956 | return 0; | 1085 | return 0; |
957 | 1086 | ||
@@ -980,7 +1109,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, | |||
980 | slot = *(((void ***)results)[ret + i]); | 1109 | slot = *(((void ***)results)[ret + i]); |
981 | if (!slot) | 1110 | if (!slot) |
982 | continue; | 1111 | continue; |
983 | results[ret + nr_found] = rcu_dereference(slot); | 1112 | results[ret + nr_found] = rcu_dereference_raw(slot); |
984 | nr_found++; | 1113 | nr_found++; |
985 | } | 1114 | } |
986 | ret += nr_found; | 1115 | ret += nr_found; |
@@ -1020,7 +1149,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, | |||
1020 | if (!root_tag_get(root, tag)) | 1149 | if (!root_tag_get(root, tag)) |
1021 | return 0; | 1150 | return 0; |
1022 | 1151 | ||
1023 | node = rcu_dereference(root->rnode); | 1152 | node = rcu_dereference_raw(root->rnode); |
1024 | if (!node) | 1153 | if (!node) |
1025 | return 0; | 1154 | return 0; |
1026 | 1155 | ||