aboutsummaryrefslogtreecommitdiffstats
path: root/lib/radix-tree.c
diff options
context:
space:
mode:
authorMichal Marek <mmarek@suse.cz>2011-03-09 10:15:44 -0500
committerMichal Marek <mmarek@suse.cz>2011-03-09 10:15:44 -0500
commit2d8ad8719591fa803b0d589ed057fa46f49b7155 (patch)
tree4ae051577dad1161c91dafbf4207bb10a9dc91bb /lib/radix-tree.c
parent9b4ce7bce5f30712fd926ab4599a803314a07719 (diff)
parentc56eb8fb6dccb83d9fe62fd4dc00c834de9bc470 (diff)
Merge commit 'v2.6.38-rc1' into kbuild/packaging
Diffstat (limited to 'lib/radix-tree.c')
-rw-r--r--lib/radix-tree.c254
1 files changed, 208 insertions, 46 deletions
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 92cdd9936e3d..5086bb962b4d 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -28,7 +28,6 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/notifier.h> 29#include <linux/notifier.h>
30#include <linux/cpu.h> 30#include <linux/cpu.h>
31#include <linux/gfp.h>
32#include <linux/string.h> 31#include <linux/string.h>
33#include <linux/bitops.h> 32#include <linux/bitops.h>
34#include <linux/rcupdate.h> 33#include <linux/rcupdate.h>
@@ -50,7 +49,7 @@ struct radix_tree_node {
50 unsigned int height; /* Height from the bottom */ 49 unsigned int height; /* Height from the bottom */
51 unsigned int count; 50 unsigned int count;
52 struct rcu_head rcu_head; 51 struct rcu_head rcu_head;
53 void *slots[RADIX_TREE_MAP_SIZE]; 52 void __rcu *slots[RADIX_TREE_MAP_SIZE];
54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 53 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
55}; 54};
56 55
@@ -83,6 +82,16 @@ struct radix_tree_preload {
83}; 82};
84static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 83static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
85 84
85static inline void *ptr_to_indirect(void *ptr)
86{
87 return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
88}
89
90static inline void *indirect_to_ptr(void *ptr)
91{
92 return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
93}
94
86static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 95static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
87{ 96{
88 return root->gfp_mask & __GFP_BITS_MASK; 97 return root->gfp_mask & __GFP_BITS_MASK;
@@ -175,14 +184,16 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
175{ 184{
176 struct radix_tree_node *node = 185 struct radix_tree_node *node =
177 container_of(head, struct radix_tree_node, rcu_head); 186 container_of(head, struct radix_tree_node, rcu_head);
187 int i;
178 188
179 /* 189 /*
180 * must only free zeroed nodes into the slab. radix_tree_shrink 190 * must only free zeroed nodes into the slab. radix_tree_shrink
181 * can leave us with a non-NULL entry in the first slot, so clear 191 * can leave us with a non-NULL entry in the first slot, so clear
182 * that here to make sure. 192 * that here to make sure.
183 */ 193 */
184 tag_clear(node, 0, 0); 194 for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
185 tag_clear(node, 1, 0); 195 tag_clear(node, i, 0);
196
186 node->slots[0] = NULL; 197 node->slots[0] = NULL;
187 node->count = 0; 198 node->count = 0;
188 199
@@ -264,7 +275,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
264 return -ENOMEM; 275 return -ENOMEM;
265 276
266 /* Increase the height. */ 277 /* Increase the height. */
267 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 278 node->slots[0] = indirect_to_ptr(root->rnode);
268 279
269 /* Propagate the aggregated tag info into the new root */ 280 /* Propagate the aggregated tag info into the new root */
270 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 281 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
@@ -275,7 +286,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
275 newheight = root->height+1; 286 newheight = root->height+1;
276 node->height = newheight; 287 node->height = newheight;
277 node->count = 1; 288 node->count = 1;
278 node = radix_tree_ptr_to_indirect(node); 289 node = ptr_to_indirect(node);
279 rcu_assign_pointer(root->rnode, node); 290 rcu_assign_pointer(root->rnode, node);
280 root->height = newheight; 291 root->height = newheight;
281 } while (height > root->height); 292 } while (height > root->height);
@@ -308,7 +319,7 @@ int radix_tree_insert(struct radix_tree_root *root,
308 return error; 319 return error;
309 } 320 }
310 321
311 slot = radix_tree_indirect_to_ptr(root->rnode); 322 slot = indirect_to_ptr(root->rnode);
312 323
313 height = root->height; 324 height = root->height;
314 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 325 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
@@ -324,8 +335,7 @@ int radix_tree_insert(struct radix_tree_root *root,
324 rcu_assign_pointer(node->slots[offset], slot); 335 rcu_assign_pointer(node->slots[offset], slot);
325 node->count++; 336 node->count++;
326 } else 337 } else
327 rcu_assign_pointer(root->rnode, 338 rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
328 radix_tree_ptr_to_indirect(slot));
329 } 339 }
330 340
331 /* Go a level down */ 341 /* Go a level down */
@@ -364,7 +374,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
364 unsigned int height, shift; 374 unsigned int height, shift;
365 struct radix_tree_node *node, **slot; 375 struct radix_tree_node *node, **slot;
366 376
367 node = rcu_dereference(root->rnode); 377 node = rcu_dereference_raw(root->rnode);
368 if (node == NULL) 378 if (node == NULL)
369 return NULL; 379 return NULL;
370 380
@@ -373,7 +383,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
373 return NULL; 383 return NULL;
374 return is_slot ? (void *)&root->rnode : node; 384 return is_slot ? (void *)&root->rnode : node;
375 } 385 }
376 node = radix_tree_indirect_to_ptr(node); 386 node = indirect_to_ptr(node);
377 387
378 height = node->height; 388 height = node->height;
379 if (index > radix_tree_maxindex(height)) 389 if (index > radix_tree_maxindex(height))
@@ -384,7 +394,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
384 do { 394 do {
385 slot = (struct radix_tree_node **) 395 slot = (struct radix_tree_node **)
386 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 396 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
387 node = rcu_dereference(*slot); 397 node = rcu_dereference_raw(*slot);
388 if (node == NULL) 398 if (node == NULL)
389 return NULL; 399 return NULL;
390 400
@@ -392,7 +402,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
392 height--; 402 height--;
393 } while (height > 0); 403 } while (height > 0);
394 404
395 return is_slot ? (void *)slot:node; 405 return is_slot ? (void *)slot : indirect_to_ptr(node);
396} 406}
397 407
398/** 408/**
@@ -454,7 +464,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
454 height = root->height; 464 height = root->height;
455 BUG_ON(index > radix_tree_maxindex(height)); 465 BUG_ON(index > radix_tree_maxindex(height));
456 466
457 slot = radix_tree_indirect_to_ptr(root->rnode); 467 slot = indirect_to_ptr(root->rnode);
458 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 468 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
459 469
460 while (height > 0) { 470 while (height > 0) {
@@ -508,7 +518,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
508 518
509 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 519 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
510 pathp->node = NULL; 520 pathp->node = NULL;
511 slot = radix_tree_indirect_to_ptr(root->rnode); 521 slot = indirect_to_ptr(root->rnode);
512 522
513 while (height > 0) { 523 while (height > 0) {
514 int offset; 524 int offset;
@@ -556,6 +566,10 @@ EXPORT_SYMBOL(radix_tree_tag_clear);
556 * 566 *
557 * 0: tag not present or not set 567 * 0: tag not present or not set
558 * 1: tag set 568 * 1: tag set
569 *
570 * Note that the return value of this function may not be relied on, even if
571 * the RCU lock is held, unless tag modification and node deletion are excluded
572 * from concurrency.
559 */ 573 */
560int radix_tree_tag_get(struct radix_tree_root *root, 574int radix_tree_tag_get(struct radix_tree_root *root,
561 unsigned long index, unsigned int tag) 575 unsigned long index, unsigned int tag)
@@ -568,13 +582,13 @@ int radix_tree_tag_get(struct radix_tree_root *root,
568 if (!root_tag_get(root, tag)) 582 if (!root_tag_get(root, tag))
569 return 0; 583 return 0;
570 584
571 node = rcu_dereference(root->rnode); 585 node = rcu_dereference_raw(root->rnode);
572 if (node == NULL) 586 if (node == NULL)
573 return 0; 587 return 0;
574 588
575 if (!radix_tree_is_indirect_ptr(node)) 589 if (!radix_tree_is_indirect_ptr(node))
576 return (index == 0); 590 return (index == 0);
577 node = radix_tree_indirect_to_ptr(node); 591 node = indirect_to_ptr(node);
578 592
579 height = node->height; 593 height = node->height;
580 if (index > radix_tree_maxindex(height)) 594 if (index > radix_tree_maxindex(height))
@@ -596,13 +610,9 @@ int radix_tree_tag_get(struct radix_tree_root *root,
596 */ 610 */
597 if (!tag_get(node, tag, offset)) 611 if (!tag_get(node, tag, offset))
598 saw_unset_tag = 1; 612 saw_unset_tag = 1;
599 if (height == 1) { 613 if (height == 1)
600 int ret = tag_get(node, tag, offset); 614 return !!tag_get(node, tag, offset);
601 615 node = rcu_dereference_raw(node->slots[offset]);
602 BUG_ON(ret && saw_unset_tag);
603 return !!ret;
604 }
605 node = rcu_dereference(node->slots[offset]);
606 shift -= RADIX_TREE_MAP_SHIFT; 616 shift -= RADIX_TREE_MAP_SHIFT;
607 height--; 617 height--;
608 } 618 }
@@ -610,6 +620,134 @@ int radix_tree_tag_get(struct radix_tree_root *root,
610EXPORT_SYMBOL(radix_tree_tag_get); 620EXPORT_SYMBOL(radix_tree_tag_get);
611 621
612/** 622/**
623 * radix_tree_range_tag_if_tagged - for each item in given range set given
624 * tag if item has another tag set
625 * @root: radix tree root
626 * @first_indexp: pointer to a starting index of a range to scan
627 * @last_index: last index of a range to scan
628 * @nr_to_tag: maximum number items to tag
629 * @iftag: tag index to test
630 * @settag: tag index to set if tested tag is set
631 *
632 * This function scans range of radix tree from first_index to last_index
633 * (inclusive). For each item in the range if iftag is set, the function sets
634 * also settag. The function stops either after tagging nr_to_tag items or
635 * after reaching last_index.
636 *
637 * The tags must be set from the leaf level only and propagated back up the
638 * path to the root. We must do this so that we resolve the full path before
639 * setting any tags on intermediate nodes. If we set tags as we descend, then
640 * we can get to the leaf node and find that the index that has the iftag
641 * set is outside the range we are scanning. This reults in dangling tags and
642 * can lead to problems with later tag operations (e.g. livelocks on lookups).
643 *
644 * The function returns number of leaves where the tag was set and sets
645 * *first_indexp to the first unscanned index.
646 * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
647 * be prepared to handle that.
648 */
649unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
650 unsigned long *first_indexp, unsigned long last_index,
651 unsigned long nr_to_tag,
652 unsigned int iftag, unsigned int settag)
653{
654 unsigned int height = root->height;
655 struct radix_tree_path path[height];
656 struct radix_tree_path *pathp = path;
657 struct radix_tree_node *slot;
658 unsigned int shift;
659 unsigned long tagged = 0;
660 unsigned long index = *first_indexp;
661
662 last_index = min(last_index, radix_tree_maxindex(height));
663 if (index > last_index)
664 return 0;
665 if (!nr_to_tag)
666 return 0;
667 if (!root_tag_get(root, iftag)) {
668 *first_indexp = last_index + 1;
669 return 0;
670 }
671 if (height == 0) {
672 *first_indexp = last_index + 1;
673 root_tag_set(root, settag);
674 return 1;
675 }
676
677 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
678 slot = indirect_to_ptr(root->rnode);
679
680 /*
681 * we fill the path from (root->height - 2) to 0, leaving the index at
682 * (root->height - 1) as a terminator. Zero the node in the terminator
683 * so that we can use this to end walk loops back up the path.
684 */
685 path[height - 1].node = NULL;
686
687 for (;;) {
688 int offset;
689
690 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
691 if (!slot->slots[offset])
692 goto next;
693 if (!tag_get(slot, iftag, offset))
694 goto next;
695 if (height > 1) {
696 /* Go down one level */
697 height--;
698 shift -= RADIX_TREE_MAP_SHIFT;
699 path[height - 1].node = slot;
700 path[height - 1].offset = offset;
701 slot = slot->slots[offset];
702 continue;
703 }
704
705 /* tag the leaf */
706 tagged++;
707 tag_set(slot, settag, offset);
708
709 /* walk back up the path tagging interior nodes */
710 pathp = &path[0];
711 while (pathp->node) {
712 /* stop if we find a node with the tag already set */
713 if (tag_get(pathp->node, settag, pathp->offset))
714 break;
715 tag_set(pathp->node, settag, pathp->offset);
716 pathp++;
717 }
718
719next:
720 /* Go to next item at level determined by 'shift' */
721 index = ((index >> shift) + 1) << shift;
722 /* Overflow can happen when last_index is ~0UL... */
723 if (index > last_index || !index)
724 break;
725 if (tagged >= nr_to_tag)
726 break;
727 while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
728 /*
729 * We've fully scanned this node. Go up. Because
730 * last_index is guaranteed to be in the tree, what
731 * we do below cannot wander astray.
732 */
733 slot = path[height - 1].node;
734 height++;
735 shift += RADIX_TREE_MAP_SHIFT;
736 }
737 }
738 /*
739 * The iftag must have been set somewhere because otherwise
740 * we would return immediated at the beginning of the function
741 */
742 root_tag_set(root, settag);
743 *first_indexp = index;
744
745 return tagged;
746}
747EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
748
749
750/**
613 * radix_tree_next_hole - find the next hole (not-present entry) 751 * radix_tree_next_hole - find the next hole (not-present entry)
614 * @root: tree root 752 * @root: tree root
615 * @index: index key 753 * @index: index key
@@ -657,7 +795,7 @@ EXPORT_SYMBOL(radix_tree_next_hole);
657 * 795 *
658 * Returns: the index of the hole if found, otherwise returns an index 796 * Returns: the index of the hole if found, otherwise returns an index
659 * outside of the set specified (in which case 'index - return >= max_scan' 797 * outside of the set specified (in which case 'index - return >= max_scan'
660 * will be true). In rare cases of wrap-around, LONG_MAX will be returned. 798 * will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
661 * 799 *
662 * radix_tree_next_hole may be called under rcu_read_lock. However, like 800 * radix_tree_next_hole may be called under rcu_read_lock. However, like
663 * radix_tree_gang_lookup, this will not atomically search a snapshot of 801 * radix_tree_gang_lookup, this will not atomically search a snapshot of
@@ -675,7 +813,7 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
675 if (!radix_tree_lookup(root, index)) 813 if (!radix_tree_lookup(root, index))
676 break; 814 break;
677 index--; 815 index--;
678 if (index == LONG_MAX) 816 if (index == ULONG_MAX)
679 break; 817 break;
680 } 818 }
681 819
@@ -711,7 +849,7 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
711 } 849 }
712 850
713 shift -= RADIX_TREE_MAP_SHIFT; 851 shift -= RADIX_TREE_MAP_SHIFT;
714 slot = rcu_dereference(slot->slots[i]); 852 slot = rcu_dereference_raw(slot->slots[i]);
715 if (slot == NULL) 853 if (slot == NULL)
716 goto out; 854 goto out;
717 } 855 }
@@ -758,7 +896,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
758 unsigned long cur_index = first_index; 896 unsigned long cur_index = first_index;
759 unsigned int ret; 897 unsigned int ret;
760 898
761 node = rcu_dereference(root->rnode); 899 node = rcu_dereference_raw(root->rnode);
762 if (!node) 900 if (!node)
763 return 0; 901 return 0;
764 902
@@ -768,7 +906,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
768 results[0] = node; 906 results[0] = node;
769 return 1; 907 return 1;
770 } 908 }
771 node = radix_tree_indirect_to_ptr(node); 909 node = indirect_to_ptr(node);
772 910
773 max_index = radix_tree_maxindex(node->height); 911 max_index = radix_tree_maxindex(node->height);
774 912
@@ -787,7 +925,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
787 slot = *(((void ***)results)[ret + i]); 925 slot = *(((void ***)results)[ret + i]);
788 if (!slot) 926 if (!slot)
789 continue; 927 continue;
790 results[ret + nr_found] = rcu_dereference(slot); 928 results[ret + nr_found] =
929 indirect_to_ptr(rcu_dereference_raw(slot));
791 nr_found++; 930 nr_found++;
792 } 931 }
793 ret += nr_found; 932 ret += nr_found;
@@ -826,7 +965,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
826 unsigned long cur_index = first_index; 965 unsigned long cur_index = first_index;
827 unsigned int ret; 966 unsigned int ret;
828 967
829 node = rcu_dereference(root->rnode); 968 node = rcu_dereference_raw(root->rnode);
830 if (!node) 969 if (!node)
831 return 0; 970 return 0;
832 971
@@ -836,7 +975,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
836 results[0] = (void **)&root->rnode; 975 results[0] = (void **)&root->rnode;
837 return 1; 976 return 1;
838 } 977 }
839 node = radix_tree_indirect_to_ptr(node); 978 node = indirect_to_ptr(node);
840 979
841 max_index = radix_tree_maxindex(node->height); 980 max_index = radix_tree_maxindex(node->height);
842 981
@@ -915,7 +1054,7 @@ __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
915 } 1054 }
916 } 1055 }
917 shift -= RADIX_TREE_MAP_SHIFT; 1056 shift -= RADIX_TREE_MAP_SHIFT;
918 slot = rcu_dereference(slot->slots[i]); 1057 slot = rcu_dereference_raw(slot->slots[i]);
919 if (slot == NULL) 1058 if (slot == NULL)
920 break; 1059 break;
921 } 1060 }
@@ -951,7 +1090,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
951 if (!root_tag_get(root, tag)) 1090 if (!root_tag_get(root, tag))
952 return 0; 1091 return 0;
953 1092
954 node = rcu_dereference(root->rnode); 1093 node = rcu_dereference_raw(root->rnode);
955 if (!node) 1094 if (!node)
956 return 0; 1095 return 0;
957 1096
@@ -961,7 +1100,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
961 results[0] = node; 1100 results[0] = node;
962 return 1; 1101 return 1;
963 } 1102 }
964 node = radix_tree_indirect_to_ptr(node); 1103 node = indirect_to_ptr(node);
965 1104
966 max_index = radix_tree_maxindex(node->height); 1105 max_index = radix_tree_maxindex(node->height);
967 1106
@@ -980,7 +1119,8 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
980 slot = *(((void ***)results)[ret + i]); 1119 slot = *(((void ***)results)[ret + i]);
981 if (!slot) 1120 if (!slot)
982 continue; 1121 continue;
983 results[ret + nr_found] = rcu_dereference(slot); 1122 results[ret + nr_found] =
1123 indirect_to_ptr(rcu_dereference_raw(slot));
984 nr_found++; 1124 nr_found++;
985 } 1125 }
986 ret += nr_found; 1126 ret += nr_found;
@@ -1020,7 +1160,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1020 if (!root_tag_get(root, tag)) 1160 if (!root_tag_get(root, tag))
1021 return 0; 1161 return 0;
1022 1162
1023 node = rcu_dereference(root->rnode); 1163 node = rcu_dereference_raw(root->rnode);
1024 if (!node) 1164 if (!node)
1025 return 0; 1165 return 0;
1026 1166
@@ -1030,7 +1170,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1030 results[0] = (void **)&root->rnode; 1170 results[0] = (void **)&root->rnode;
1031 return 1; 1171 return 1;
1032 } 1172 }
1033 node = radix_tree_indirect_to_ptr(node); 1173 node = indirect_to_ptr(node);
1034 1174
1035 max_index = radix_tree_maxindex(node->height); 1175 max_index = radix_tree_maxindex(node->height);
1036 1176
@@ -1066,7 +1206,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1066 void *newptr; 1206 void *newptr;
1067 1207
1068 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1208 BUG_ON(!radix_tree_is_indirect_ptr(to_free));
1069 to_free = radix_tree_indirect_to_ptr(to_free); 1209 to_free = indirect_to_ptr(to_free);
1070 1210
1071 /* 1211 /*
1072 * The candidate node has more than one child, or its child 1212 * The candidate node has more than one child, or its child
@@ -1079,16 +1219,39 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1079 1219
1080 /* 1220 /*
1081 * We don't need rcu_assign_pointer(), since we are simply 1221 * We don't need rcu_assign_pointer(), since we are simply
1082 * moving the node from one part of the tree to another. If 1222 * moving the node from one part of the tree to another: if it
1083 * it was safe to dereference the old pointer to it 1223 * was safe to dereference the old pointer to it
1084 * (to_free->slots[0]), it will be safe to dereference the new 1224 * (to_free->slots[0]), it will be safe to dereference the new
1085 * one (root->rnode). 1225 * one (root->rnode) as far as dependent read barriers go.
1086 */ 1226 */
1087 newptr = to_free->slots[0]; 1227 newptr = to_free->slots[0];
1088 if (root->height > 1) 1228 if (root->height > 1)
1089 newptr = radix_tree_ptr_to_indirect(newptr); 1229 newptr = ptr_to_indirect(newptr);
1090 root->rnode = newptr; 1230 root->rnode = newptr;
1091 root->height--; 1231 root->height--;
1232
1233 /*
1234 * We have a dilemma here. The node's slot[0] must not be
1235 * NULLed in case there are concurrent lookups expecting to
1236 * find the item. However if this was a bottom-level node,
1237 * then it may be subject to the slot pointer being visible
1238 * to callers dereferencing it. If item corresponding to
1239 * slot[0] is subsequently deleted, these callers would expect
1240 * their slot to become empty sooner or later.
1241 *
1242 * For example, lockless pagecache will look up a slot, deref
1243 * the page pointer, and if the page is 0 refcount it means it
1244 * was concurrently deleted from pagecache so try the deref
1245 * again. Fortunately there is already a requirement for logic
1246 * to retry the entire slot lookup -- the indirect pointer
1247 * problem (replacing direct root node with an indirect pointer
1248 * also results in a stale slot). So tag the slot as indirect
1249 * to force callers to retry.
1250 */
1251 if (root->height == 0)
1252 *((unsigned long *)&to_free->slots[0]) |=
1253 RADIX_TREE_INDIRECT_PTR;
1254
1092 radix_tree_node_free(to_free); 1255 radix_tree_node_free(to_free);
1093 } 1256 }
1094} 1257}
@@ -1125,7 +1288,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1125 root->rnode = NULL; 1288 root->rnode = NULL;
1126 goto out; 1289 goto out;
1127 } 1290 }
1128 slot = radix_tree_indirect_to_ptr(slot); 1291 slot = indirect_to_ptr(slot);
1129 1292
1130 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 1293 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
1131 pathp->node = NULL; 1294 pathp->node = NULL;
@@ -1167,8 +1330,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1167 radix_tree_node_free(to_free); 1330 radix_tree_node_free(to_free);
1168 1331
1169 if (pathp->node->count) { 1332 if (pathp->node->count) {
1170 if (pathp->node == 1333 if (pathp->node == indirect_to_ptr(root->rnode))
1171 radix_tree_indirect_to_ptr(root->rnode))
1172 radix_tree_shrink(root); 1334 radix_tree_shrink(root);
1173 goto out; 1335 goto out;
1174 } 1336 }