diff options
Diffstat (limited to 'lib/radix-tree.c')
-rw-r--r-- | lib/radix-tree.c | 254 |
1 files changed, 208 insertions, 46 deletions
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 92cdd9936e3d..5086bb962b4d 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/notifier.h> | 29 | #include <linux/notifier.h> |
30 | #include <linux/cpu.h> | 30 | #include <linux/cpu.h> |
31 | #include <linux/gfp.h> | ||
32 | #include <linux/string.h> | 31 | #include <linux/string.h> |
33 | #include <linux/bitops.h> | 32 | #include <linux/bitops.h> |
34 | #include <linux/rcupdate.h> | 33 | #include <linux/rcupdate.h> |
@@ -50,7 +49,7 @@ struct radix_tree_node { | |||
50 | unsigned int height; /* Height from the bottom */ | 49 | unsigned int height; /* Height from the bottom */ |
51 | unsigned int count; | 50 | unsigned int count; |
52 | struct rcu_head rcu_head; | 51 | struct rcu_head rcu_head; |
53 | void *slots[RADIX_TREE_MAP_SIZE]; | 52 | void __rcu *slots[RADIX_TREE_MAP_SIZE]; |
54 | unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; | 53 | unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; |
55 | }; | 54 | }; |
56 | 55 | ||
@@ -83,6 +82,16 @@ struct radix_tree_preload { | |||
83 | }; | 82 | }; |
84 | static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; | 83 | static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; |
85 | 84 | ||
85 | static inline void *ptr_to_indirect(void *ptr) | ||
86 | { | ||
87 | return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR); | ||
88 | } | ||
89 | |||
90 | static inline void *indirect_to_ptr(void *ptr) | ||
91 | { | ||
92 | return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); | ||
93 | } | ||
94 | |||
86 | static inline gfp_t root_gfp_mask(struct radix_tree_root *root) | 95 | static inline gfp_t root_gfp_mask(struct radix_tree_root *root) |
87 | { | 96 | { |
88 | return root->gfp_mask & __GFP_BITS_MASK; | 97 | return root->gfp_mask & __GFP_BITS_MASK; |
@@ -175,14 +184,16 @@ static void radix_tree_node_rcu_free(struct rcu_head *head) | |||
175 | { | 184 | { |
176 | struct radix_tree_node *node = | 185 | struct radix_tree_node *node = |
177 | container_of(head, struct radix_tree_node, rcu_head); | 186 | container_of(head, struct radix_tree_node, rcu_head); |
187 | int i; | ||
178 | 188 | ||
179 | /* | 189 | /* |
180 | * must only free zeroed nodes into the slab. radix_tree_shrink | 190 | * must only free zeroed nodes into the slab. radix_tree_shrink |
181 | * can leave us with a non-NULL entry in the first slot, so clear | 191 | * can leave us with a non-NULL entry in the first slot, so clear |
182 | * that here to make sure. | 192 | * that here to make sure. |
183 | */ | 193 | */ |
184 | tag_clear(node, 0, 0); | 194 | for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) |
185 | tag_clear(node, 1, 0); | 195 | tag_clear(node, i, 0); |
196 | |||
186 | node->slots[0] = NULL; | 197 | node->slots[0] = NULL; |
187 | node->count = 0; | 198 | node->count = 0; |
188 | 199 | ||
@@ -264,7 +275,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) | |||
264 | return -ENOMEM; | 275 | return -ENOMEM; |
265 | 276 | ||
266 | /* Increase the height. */ | 277 | /* Increase the height. */ |
267 | node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); | 278 | node->slots[0] = indirect_to_ptr(root->rnode); |
268 | 279 | ||
269 | /* Propagate the aggregated tag info into the new root */ | 280 | /* Propagate the aggregated tag info into the new root */ |
270 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { | 281 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { |
@@ -275,7 +286,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) | |||
275 | newheight = root->height+1; | 286 | newheight = root->height+1; |
276 | node->height = newheight; | 287 | node->height = newheight; |
277 | node->count = 1; | 288 | node->count = 1; |
278 | node = radix_tree_ptr_to_indirect(node); | 289 | node = ptr_to_indirect(node); |
279 | rcu_assign_pointer(root->rnode, node); | 290 | rcu_assign_pointer(root->rnode, node); |
280 | root->height = newheight; | 291 | root->height = newheight; |
281 | } while (height > root->height); | 292 | } while (height > root->height); |
@@ -308,7 +319,7 @@ int radix_tree_insert(struct radix_tree_root *root, | |||
308 | return error; | 319 | return error; |
309 | } | 320 | } |
310 | 321 | ||
311 | slot = radix_tree_indirect_to_ptr(root->rnode); | 322 | slot = indirect_to_ptr(root->rnode); |
312 | 323 | ||
313 | height = root->height; | 324 | height = root->height; |
314 | shift = (height-1) * RADIX_TREE_MAP_SHIFT; | 325 | shift = (height-1) * RADIX_TREE_MAP_SHIFT; |
@@ -324,8 +335,7 @@ int radix_tree_insert(struct radix_tree_root *root, | |||
324 | rcu_assign_pointer(node->slots[offset], slot); | 335 | rcu_assign_pointer(node->slots[offset], slot); |
325 | node->count++; | 336 | node->count++; |
326 | } else | 337 | } else |
327 | rcu_assign_pointer(root->rnode, | 338 | rcu_assign_pointer(root->rnode, ptr_to_indirect(slot)); |
328 | radix_tree_ptr_to_indirect(slot)); | ||
329 | } | 339 | } |
330 | 340 | ||
331 | /* Go a level down */ | 341 | /* Go a level down */ |
@@ -364,7 +374,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, | |||
364 | unsigned int height, shift; | 374 | unsigned int height, shift; |
365 | struct radix_tree_node *node, **slot; | 375 | struct radix_tree_node *node, **slot; |
366 | 376 | ||
367 | node = rcu_dereference(root->rnode); | 377 | node = rcu_dereference_raw(root->rnode); |
368 | if (node == NULL) | 378 | if (node == NULL) |
369 | return NULL; | 379 | return NULL; |
370 | 380 | ||
@@ -373,7 +383,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, | |||
373 | return NULL; | 383 | return NULL; |
374 | return is_slot ? (void *)&root->rnode : node; | 384 | return is_slot ? (void *)&root->rnode : node; |
375 | } | 385 | } |
376 | node = radix_tree_indirect_to_ptr(node); | 386 | node = indirect_to_ptr(node); |
377 | 387 | ||
378 | height = node->height; | 388 | height = node->height; |
379 | if (index > radix_tree_maxindex(height)) | 389 | if (index > radix_tree_maxindex(height)) |
@@ -384,7 +394,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, | |||
384 | do { | 394 | do { |
385 | slot = (struct radix_tree_node **) | 395 | slot = (struct radix_tree_node **) |
386 | (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); | 396 | (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); |
387 | node = rcu_dereference(*slot); | 397 | node = rcu_dereference_raw(*slot); |
388 | if (node == NULL) | 398 | if (node == NULL) |
389 | return NULL; | 399 | return NULL; |
390 | 400 | ||
@@ -392,7 +402,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, | |||
392 | height--; | 402 | height--; |
393 | } while (height > 0); | 403 | } while (height > 0); |
394 | 404 | ||
395 | return is_slot ? (void *)slot:node; | 405 | return is_slot ? (void *)slot : indirect_to_ptr(node); |
396 | } | 406 | } |
397 | 407 | ||
398 | /** | 408 | /** |
@@ -454,7 +464,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root, | |||
454 | height = root->height; | 464 | height = root->height; |
455 | BUG_ON(index > radix_tree_maxindex(height)); | 465 | BUG_ON(index > radix_tree_maxindex(height)); |
456 | 466 | ||
457 | slot = radix_tree_indirect_to_ptr(root->rnode); | 467 | slot = indirect_to_ptr(root->rnode); |
458 | shift = (height - 1) * RADIX_TREE_MAP_SHIFT; | 468 | shift = (height - 1) * RADIX_TREE_MAP_SHIFT; |
459 | 469 | ||
460 | while (height > 0) { | 470 | while (height > 0) { |
@@ -508,7 +518,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, | |||
508 | 518 | ||
509 | shift = (height - 1) * RADIX_TREE_MAP_SHIFT; | 519 | shift = (height - 1) * RADIX_TREE_MAP_SHIFT; |
510 | pathp->node = NULL; | 520 | pathp->node = NULL; |
511 | slot = radix_tree_indirect_to_ptr(root->rnode); | 521 | slot = indirect_to_ptr(root->rnode); |
512 | 522 | ||
513 | while (height > 0) { | 523 | while (height > 0) { |
514 | int offset; | 524 | int offset; |
@@ -556,6 +566,10 @@ EXPORT_SYMBOL(radix_tree_tag_clear); | |||
556 | * | 566 | * |
557 | * 0: tag not present or not set | 567 | * 0: tag not present or not set |
558 | * 1: tag set | 568 | * 1: tag set |
569 | * | ||
570 | * Note that the return value of this function may not be relied on, even if | ||
571 | * the RCU lock is held, unless tag modification and node deletion are excluded | ||
572 | * from concurrency. | ||
559 | */ | 573 | */ |
560 | int radix_tree_tag_get(struct radix_tree_root *root, | 574 | int radix_tree_tag_get(struct radix_tree_root *root, |
561 | unsigned long index, unsigned int tag) | 575 | unsigned long index, unsigned int tag) |
@@ -568,13 +582,13 @@ int radix_tree_tag_get(struct radix_tree_root *root, | |||
568 | if (!root_tag_get(root, tag)) | 582 | if (!root_tag_get(root, tag)) |
569 | return 0; | 583 | return 0; |
570 | 584 | ||
571 | node = rcu_dereference(root->rnode); | 585 | node = rcu_dereference_raw(root->rnode); |
572 | if (node == NULL) | 586 | if (node == NULL) |
573 | return 0; | 587 | return 0; |
574 | 588 | ||
575 | if (!radix_tree_is_indirect_ptr(node)) | 589 | if (!radix_tree_is_indirect_ptr(node)) |
576 | return (index == 0); | 590 | return (index == 0); |
577 | node = radix_tree_indirect_to_ptr(node); | 591 | node = indirect_to_ptr(node); |
578 | 592 | ||
579 | height = node->height; | 593 | height = node->height; |
580 | if (index > radix_tree_maxindex(height)) | 594 | if (index > radix_tree_maxindex(height)) |
@@ -596,13 +610,9 @@ int radix_tree_tag_get(struct radix_tree_root *root, | |||
596 | */ | 610 | */ |
597 | if (!tag_get(node, tag, offset)) | 611 | if (!tag_get(node, tag, offset)) |
598 | saw_unset_tag = 1; | 612 | saw_unset_tag = 1; |
599 | if (height == 1) { | 613 | if (height == 1) |
600 | int ret = tag_get(node, tag, offset); | 614 | return !!tag_get(node, tag, offset); |
601 | 615 | node = rcu_dereference_raw(node->slots[offset]); | |
602 | BUG_ON(ret && saw_unset_tag); | ||
603 | return !!ret; | ||
604 | } | ||
605 | node = rcu_dereference(node->slots[offset]); | ||
606 | shift -= RADIX_TREE_MAP_SHIFT; | 616 | shift -= RADIX_TREE_MAP_SHIFT; |
607 | height--; | 617 | height--; |
608 | } | 618 | } |
@@ -610,6 +620,134 @@ int radix_tree_tag_get(struct radix_tree_root *root, | |||
610 | EXPORT_SYMBOL(radix_tree_tag_get); | 620 | EXPORT_SYMBOL(radix_tree_tag_get); |
611 | 621 | ||
612 | /** | 622 | /** |
623 | * radix_tree_range_tag_if_tagged - for each item in given range set given | ||
624 | * tag if item has another tag set | ||
625 | * @root: radix tree root | ||
626 | * @first_indexp: pointer to a starting index of a range to scan | ||
627 | * @last_index: last index of a range to scan | ||
628 | * @nr_to_tag: maximum number items to tag | ||
629 | * @iftag: tag index to test | ||
630 | * @settag: tag index to set if tested tag is set | ||
631 | * | ||
632 | * This function scans range of radix tree from first_index to last_index | ||
633 | * (inclusive). For each item in the range if iftag is set, the function sets | ||
634 | * also settag. The function stops either after tagging nr_to_tag items or | ||
635 | * after reaching last_index. | ||
636 | * | ||
637 | * The tags must be set from the leaf level only and propagated back up the | ||
638 | * path to the root. We must do this so that we resolve the full path before | ||
639 | * setting any tags on intermediate nodes. If we set tags as we descend, then | ||
640 | * we can get to the leaf node and find that the index that has the iftag | ||
641 | * set is outside the range we are scanning. This reults in dangling tags and | ||
642 | * can lead to problems with later tag operations (e.g. livelocks on lookups). | ||
643 | * | ||
644 | * The function returns number of leaves where the tag was set and sets | ||
645 | * *first_indexp to the first unscanned index. | ||
646 | * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must | ||
647 | * be prepared to handle that. | ||
648 | */ | ||
649 | unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, | ||
650 | unsigned long *first_indexp, unsigned long last_index, | ||
651 | unsigned long nr_to_tag, | ||
652 | unsigned int iftag, unsigned int settag) | ||
653 | { | ||
654 | unsigned int height = root->height; | ||
655 | struct radix_tree_path path[height]; | ||
656 | struct radix_tree_path *pathp = path; | ||
657 | struct radix_tree_node *slot; | ||
658 | unsigned int shift; | ||
659 | unsigned long tagged = 0; | ||
660 | unsigned long index = *first_indexp; | ||
661 | |||
662 | last_index = min(last_index, radix_tree_maxindex(height)); | ||
663 | if (index > last_index) | ||
664 | return 0; | ||
665 | if (!nr_to_tag) | ||
666 | return 0; | ||
667 | if (!root_tag_get(root, iftag)) { | ||
668 | *first_indexp = last_index + 1; | ||
669 | return 0; | ||
670 | } | ||
671 | if (height == 0) { | ||
672 | *first_indexp = last_index + 1; | ||
673 | root_tag_set(root, settag); | ||
674 | return 1; | ||
675 | } | ||
676 | |||
677 | shift = (height - 1) * RADIX_TREE_MAP_SHIFT; | ||
678 | slot = indirect_to_ptr(root->rnode); | ||
679 | |||
680 | /* | ||
681 | * we fill the path from (root->height - 2) to 0, leaving the index at | ||
682 | * (root->height - 1) as a terminator. Zero the node in the terminator | ||
683 | * so that we can use this to end walk loops back up the path. | ||
684 | */ | ||
685 | path[height - 1].node = NULL; | ||
686 | |||
687 | for (;;) { | ||
688 | int offset; | ||
689 | |||
690 | offset = (index >> shift) & RADIX_TREE_MAP_MASK; | ||
691 | if (!slot->slots[offset]) | ||
692 | goto next; | ||
693 | if (!tag_get(slot, iftag, offset)) | ||
694 | goto next; | ||
695 | if (height > 1) { | ||
696 | /* Go down one level */ | ||
697 | height--; | ||
698 | shift -= RADIX_TREE_MAP_SHIFT; | ||
699 | path[height - 1].node = slot; | ||
700 | path[height - 1].offset = offset; | ||
701 | slot = slot->slots[offset]; | ||
702 | continue; | ||
703 | } | ||
704 | |||
705 | /* tag the leaf */ | ||
706 | tagged++; | ||
707 | tag_set(slot, settag, offset); | ||
708 | |||
709 | /* walk back up the path tagging interior nodes */ | ||
710 | pathp = &path[0]; | ||
711 | while (pathp->node) { | ||
712 | /* stop if we find a node with the tag already set */ | ||
713 | if (tag_get(pathp->node, settag, pathp->offset)) | ||
714 | break; | ||
715 | tag_set(pathp->node, settag, pathp->offset); | ||
716 | pathp++; | ||
717 | } | ||
718 | |||
719 | next: | ||
720 | /* Go to next item at level determined by 'shift' */ | ||
721 | index = ((index >> shift) + 1) << shift; | ||
722 | /* Overflow can happen when last_index is ~0UL... */ | ||
723 | if (index > last_index || !index) | ||
724 | break; | ||
725 | if (tagged >= nr_to_tag) | ||
726 | break; | ||
727 | while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) { | ||
728 | /* | ||
729 | * We've fully scanned this node. Go up. Because | ||
730 | * last_index is guaranteed to be in the tree, what | ||
731 | * we do below cannot wander astray. | ||
732 | */ | ||
733 | slot = path[height - 1].node; | ||
734 | height++; | ||
735 | shift += RADIX_TREE_MAP_SHIFT; | ||
736 | } | ||
737 | } | ||
738 | /* | ||
739 | * The iftag must have been set somewhere because otherwise | ||
740 | * we would return immediated at the beginning of the function | ||
741 | */ | ||
742 | root_tag_set(root, settag); | ||
743 | *first_indexp = index; | ||
744 | |||
745 | return tagged; | ||
746 | } | ||
747 | EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); | ||
748 | |||
749 | |||
750 | /** | ||
613 | * radix_tree_next_hole - find the next hole (not-present entry) | 751 | * radix_tree_next_hole - find the next hole (not-present entry) |
614 | * @root: tree root | 752 | * @root: tree root |
615 | * @index: index key | 753 | * @index: index key |
@@ -657,7 +795,7 @@ EXPORT_SYMBOL(radix_tree_next_hole); | |||
657 | * | 795 | * |
658 | * Returns: the index of the hole if found, otherwise returns an index | 796 | * Returns: the index of the hole if found, otherwise returns an index |
659 | * outside of the set specified (in which case 'index - return >= max_scan' | 797 | * outside of the set specified (in which case 'index - return >= max_scan' |
660 | * will be true). In rare cases of wrap-around, LONG_MAX will be returned. | 798 | * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. |
661 | * | 799 | * |
662 | * radix_tree_next_hole may be called under rcu_read_lock. However, like | 800 | * radix_tree_next_hole may be called under rcu_read_lock. However, like |
663 | * radix_tree_gang_lookup, this will not atomically search a snapshot of | 801 | * radix_tree_gang_lookup, this will not atomically search a snapshot of |
@@ -675,7 +813,7 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root, | |||
675 | if (!radix_tree_lookup(root, index)) | 813 | if (!radix_tree_lookup(root, index)) |
676 | break; | 814 | break; |
677 | index--; | 815 | index--; |
678 | if (index == LONG_MAX) | 816 | if (index == ULONG_MAX) |
679 | break; | 817 | break; |
680 | } | 818 | } |
681 | 819 | ||
@@ -711,7 +849,7 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, | |||
711 | } | 849 | } |
712 | 850 | ||
713 | shift -= RADIX_TREE_MAP_SHIFT; | 851 | shift -= RADIX_TREE_MAP_SHIFT; |
714 | slot = rcu_dereference(slot->slots[i]); | 852 | slot = rcu_dereference_raw(slot->slots[i]); |
715 | if (slot == NULL) | 853 | if (slot == NULL) |
716 | goto out; | 854 | goto out; |
717 | } | 855 | } |
@@ -758,7 +896,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, | |||
758 | unsigned long cur_index = first_index; | 896 | unsigned long cur_index = first_index; |
759 | unsigned int ret; | 897 | unsigned int ret; |
760 | 898 | ||
761 | node = rcu_dereference(root->rnode); | 899 | node = rcu_dereference_raw(root->rnode); |
762 | if (!node) | 900 | if (!node) |
763 | return 0; | 901 | return 0; |
764 | 902 | ||
@@ -768,7 +906,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, | |||
768 | results[0] = node; | 906 | results[0] = node; |
769 | return 1; | 907 | return 1; |
770 | } | 908 | } |
771 | node = radix_tree_indirect_to_ptr(node); | 909 | node = indirect_to_ptr(node); |
772 | 910 | ||
773 | max_index = radix_tree_maxindex(node->height); | 911 | max_index = radix_tree_maxindex(node->height); |
774 | 912 | ||
@@ -787,7 +925,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, | |||
787 | slot = *(((void ***)results)[ret + i]); | 925 | slot = *(((void ***)results)[ret + i]); |
788 | if (!slot) | 926 | if (!slot) |
789 | continue; | 927 | continue; |
790 | results[ret + nr_found] = rcu_dereference(slot); | 928 | results[ret + nr_found] = |
929 | indirect_to_ptr(rcu_dereference_raw(slot)); | ||
791 | nr_found++; | 930 | nr_found++; |
792 | } | 931 | } |
793 | ret += nr_found; | 932 | ret += nr_found; |
@@ -826,7 +965,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, | |||
826 | unsigned long cur_index = first_index; | 965 | unsigned long cur_index = first_index; |
827 | unsigned int ret; | 966 | unsigned int ret; |
828 | 967 | ||
829 | node = rcu_dereference(root->rnode); | 968 | node = rcu_dereference_raw(root->rnode); |
830 | if (!node) | 969 | if (!node) |
831 | return 0; | 970 | return 0; |
832 | 971 | ||
@@ -836,7 +975,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, | |||
836 | results[0] = (void **)&root->rnode; | 975 | results[0] = (void **)&root->rnode; |
837 | return 1; | 976 | return 1; |
838 | } | 977 | } |
839 | node = radix_tree_indirect_to_ptr(node); | 978 | node = indirect_to_ptr(node); |
840 | 979 | ||
841 | max_index = radix_tree_maxindex(node->height); | 980 | max_index = radix_tree_maxindex(node->height); |
842 | 981 | ||
@@ -915,7 +1054,7 @@ __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index, | |||
915 | } | 1054 | } |
916 | } | 1055 | } |
917 | shift -= RADIX_TREE_MAP_SHIFT; | 1056 | shift -= RADIX_TREE_MAP_SHIFT; |
918 | slot = rcu_dereference(slot->slots[i]); | 1057 | slot = rcu_dereference_raw(slot->slots[i]); |
919 | if (slot == NULL) | 1058 | if (slot == NULL) |
920 | break; | 1059 | break; |
921 | } | 1060 | } |
@@ -951,7 +1090,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, | |||
951 | if (!root_tag_get(root, tag)) | 1090 | if (!root_tag_get(root, tag)) |
952 | return 0; | 1091 | return 0; |
953 | 1092 | ||
954 | node = rcu_dereference(root->rnode); | 1093 | node = rcu_dereference_raw(root->rnode); |
955 | if (!node) | 1094 | if (!node) |
956 | return 0; | 1095 | return 0; |
957 | 1096 | ||
@@ -961,7 +1100,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, | |||
961 | results[0] = node; | 1100 | results[0] = node; |
962 | return 1; | 1101 | return 1; |
963 | } | 1102 | } |
964 | node = radix_tree_indirect_to_ptr(node); | 1103 | node = indirect_to_ptr(node); |
965 | 1104 | ||
966 | max_index = radix_tree_maxindex(node->height); | 1105 | max_index = radix_tree_maxindex(node->height); |
967 | 1106 | ||
@@ -980,7 +1119,8 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, | |||
980 | slot = *(((void ***)results)[ret + i]); | 1119 | slot = *(((void ***)results)[ret + i]); |
981 | if (!slot) | 1120 | if (!slot) |
982 | continue; | 1121 | continue; |
983 | results[ret + nr_found] = rcu_dereference(slot); | 1122 | results[ret + nr_found] = |
1123 | indirect_to_ptr(rcu_dereference_raw(slot)); | ||
984 | nr_found++; | 1124 | nr_found++; |
985 | } | 1125 | } |
986 | ret += nr_found; | 1126 | ret += nr_found; |
@@ -1020,7 +1160,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, | |||
1020 | if (!root_tag_get(root, tag)) | 1160 | if (!root_tag_get(root, tag)) |
1021 | return 0; | 1161 | return 0; |
1022 | 1162 | ||
1023 | node = rcu_dereference(root->rnode); | 1163 | node = rcu_dereference_raw(root->rnode); |
1024 | if (!node) | 1164 | if (!node) |
1025 | return 0; | 1165 | return 0; |
1026 | 1166 | ||
@@ -1030,7 +1170,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, | |||
1030 | results[0] = (void **)&root->rnode; | 1170 | results[0] = (void **)&root->rnode; |
1031 | return 1; | 1171 | return 1; |
1032 | } | 1172 | } |
1033 | node = radix_tree_indirect_to_ptr(node); | 1173 | node = indirect_to_ptr(node); |
1034 | 1174 | ||
1035 | max_index = radix_tree_maxindex(node->height); | 1175 | max_index = radix_tree_maxindex(node->height); |
1036 | 1176 | ||
@@ -1066,7 +1206,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) | |||
1066 | void *newptr; | 1206 | void *newptr; |
1067 | 1207 | ||
1068 | BUG_ON(!radix_tree_is_indirect_ptr(to_free)); | 1208 | BUG_ON(!radix_tree_is_indirect_ptr(to_free)); |
1069 | to_free = radix_tree_indirect_to_ptr(to_free); | 1209 | to_free = indirect_to_ptr(to_free); |
1070 | 1210 | ||
1071 | /* | 1211 | /* |
1072 | * The candidate node has more than one child, or its child | 1212 | * The candidate node has more than one child, or its child |
@@ -1079,16 +1219,39 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) | |||
1079 | 1219 | ||
1080 | /* | 1220 | /* |
1081 | * We don't need rcu_assign_pointer(), since we are simply | 1221 | * We don't need rcu_assign_pointer(), since we are simply |
1082 | * moving the node from one part of the tree to another. If | 1222 | * moving the node from one part of the tree to another: if it |
1083 | * it was safe to dereference the old pointer to it | 1223 | * was safe to dereference the old pointer to it |
1084 | * (to_free->slots[0]), it will be safe to dereference the new | 1224 | * (to_free->slots[0]), it will be safe to dereference the new |
1085 | * one (root->rnode). | 1225 | * one (root->rnode) as far as dependent read barriers go. |
1086 | */ | 1226 | */ |
1087 | newptr = to_free->slots[0]; | 1227 | newptr = to_free->slots[0]; |
1088 | if (root->height > 1) | 1228 | if (root->height > 1) |
1089 | newptr = radix_tree_ptr_to_indirect(newptr); | 1229 | newptr = ptr_to_indirect(newptr); |
1090 | root->rnode = newptr; | 1230 | root->rnode = newptr; |
1091 | root->height--; | 1231 | root->height--; |
1232 | |||
1233 | /* | ||
1234 | * We have a dilemma here. The node's slot[0] must not be | ||
1235 | * NULLed in case there are concurrent lookups expecting to | ||
1236 | * find the item. However if this was a bottom-level node, | ||
1237 | * then it may be subject to the slot pointer being visible | ||
1238 | * to callers dereferencing it. If item corresponding to | ||
1239 | * slot[0] is subsequently deleted, these callers would expect | ||
1240 | * their slot to become empty sooner or later. | ||
1241 | * | ||
1242 | * For example, lockless pagecache will look up a slot, deref | ||
1243 | * the page pointer, and if the page is 0 refcount it means it | ||
1244 | * was concurrently deleted from pagecache so try the deref | ||
1245 | * again. Fortunately there is already a requirement for logic | ||
1246 | * to retry the entire slot lookup -- the indirect pointer | ||
1247 | * problem (replacing direct root node with an indirect pointer | ||
1248 | * also results in a stale slot). So tag the slot as indirect | ||
1249 | * to force callers to retry. | ||
1250 | */ | ||
1251 | if (root->height == 0) | ||
1252 | *((unsigned long *)&to_free->slots[0]) |= | ||
1253 | RADIX_TREE_INDIRECT_PTR; | ||
1254 | |||
1092 | radix_tree_node_free(to_free); | 1255 | radix_tree_node_free(to_free); |
1093 | } | 1256 | } |
1094 | } | 1257 | } |
@@ -1125,7 +1288,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) | |||
1125 | root->rnode = NULL; | 1288 | root->rnode = NULL; |
1126 | goto out; | 1289 | goto out; |
1127 | } | 1290 | } |
1128 | slot = radix_tree_indirect_to_ptr(slot); | 1291 | slot = indirect_to_ptr(slot); |
1129 | 1292 | ||
1130 | shift = (height - 1) * RADIX_TREE_MAP_SHIFT; | 1293 | shift = (height - 1) * RADIX_TREE_MAP_SHIFT; |
1131 | pathp->node = NULL; | 1294 | pathp->node = NULL; |
@@ -1167,8 +1330,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) | |||
1167 | radix_tree_node_free(to_free); | 1330 | radix_tree_node_free(to_free); |
1168 | 1331 | ||
1169 | if (pathp->node->count) { | 1332 | if (pathp->node->count) { |
1170 | if (pathp->node == | 1333 | if (pathp->node == indirect_to_ptr(root->rnode)) |
1171 | radix_tree_indirect_to_ptr(root->rnode)) | ||
1172 | radix_tree_shrink(root); | 1334 | radix_tree_shrink(root); |
1173 | goto out; | 1335 | goto out; |
1174 | } | 1336 | } |