aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2008-07-25 22:45:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-26 15:00:06 -0400
commit47feff2c8eefe85099f87c43d3096855f0085ca0 (patch)
treea3a6d005f202d1a37bb406c5623a9d09a447b0f0
parent30002ed2e41830ec03ec3e577ad83ac6b188f96e (diff)
radix-tree: add gang_lookup_slot, gang_lookup_slot_tag
Introduce gang_lookup_slot() and gang_lookup_slot_tag() functions, which are used by lockless pagecache. Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Hugh Dickins <hugh@veritas.com> Cc: "Paul E. McKenney" <paulmck@us.ibm.com> Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/radix-tree.h12
-rw-r--r--lib/radix-tree.c178
2 files changed, 166 insertions, 24 deletions
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index b8ce2b444bb5..a916c6660dfa 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -99,12 +99,15 @@ do { \
99 * 99 *
100 * The notable exceptions to this rule are the following functions: 100 * The notable exceptions to this rule are the following functions:
101 * radix_tree_lookup 101 * radix_tree_lookup
102 * radix_tree_lookup_slot
102 * radix_tree_tag_get 103 * radix_tree_tag_get
103 * radix_tree_gang_lookup 104 * radix_tree_gang_lookup
105 * radix_tree_gang_lookup_slot
104 * radix_tree_gang_lookup_tag 106 * radix_tree_gang_lookup_tag
107 * radix_tree_gang_lookup_tag_slot
105 * radix_tree_tagged 108 * radix_tree_tagged
106 * 109 *
107 * The first 4 functions are able to be called locklessly, using RCU. The 110 * The first 7 functions are able to be called locklessly, using RCU. The
108 * caller must ensure calls to these functions are made within rcu_read_lock() 111 * caller must ensure calls to these functions are made within rcu_read_lock()
109 * regions. Other readers (lock-free or otherwise) and modifications may be 112 * regions. Other readers (lock-free or otherwise) and modifications may be
110 * running concurrently. 113 * running concurrently.
@@ -159,6 +162,9 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
159unsigned int 162unsigned int
160radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 163radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
161 unsigned long first_index, unsigned int max_items); 164 unsigned long first_index, unsigned int max_items);
165unsigned int
166radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
167 unsigned long first_index, unsigned int max_items);
162unsigned long radix_tree_next_hole(struct radix_tree_root *root, 168unsigned long radix_tree_next_hole(struct radix_tree_root *root,
163 unsigned long index, unsigned long max_scan); 169 unsigned long index, unsigned long max_scan);
164int radix_tree_preload(gfp_t gfp_mask); 170int radix_tree_preload(gfp_t gfp_mask);
@@ -173,6 +179,10 @@ unsigned int
173radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, 179radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
174 unsigned long first_index, unsigned int max_items, 180 unsigned long first_index, unsigned int max_items,
175 unsigned int tag); 181 unsigned int tag);
182unsigned int
183radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
184 unsigned long first_index, unsigned int max_items,
185 unsigned int tag);
176int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); 186int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
177 187
178static inline void radix_tree_preload_end(void) 188static inline void radix_tree_preload_end(void)
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 56ec21a7f73d..9c4f1ffa2864 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -359,18 +359,17 @@ EXPORT_SYMBOL(radix_tree_insert);
359 * Returns: the slot corresponding to the position @index in the 359 * Returns: the slot corresponding to the position @index in the
360 * radix tree @root. This is useful for update-if-exists operations. 360 * radix tree @root. This is useful for update-if-exists operations.
361 * 361 *
362 * This function cannot be called under rcu_read_lock, it must be 362 * This function can be called under rcu_read_lock iff the slot is not
363 * excluded from writers, as must the returned slot for subsequent 363 * modified by radix_tree_replace_slot, otherwise it must be called
364 * use by radix_tree_deref_slot() and radix_tree_replace slot. 364 * exclusive from other writers. Any dereference of the slot must be done
365 * Caller must hold tree write locked across slot lookup and 365 * using radix_tree_deref_slot.
366 * replace.
367 */ 366 */
368void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) 367void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
369{ 368{
370 unsigned int height, shift; 369 unsigned int height, shift;
371 struct radix_tree_node *node, **slot; 370 struct radix_tree_node *node, **slot;
372 371
373 node = root->rnode; 372 node = rcu_dereference(root->rnode);
374 if (node == NULL) 373 if (node == NULL)
375 return NULL; 374 return NULL;
376 375
@@ -390,7 +389,7 @@ void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
390 do { 389 do {
391 slot = (struct radix_tree_node **) 390 slot = (struct radix_tree_node **)
392 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 391 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
393 node = *slot; 392 node = rcu_dereference(*slot);
394 if (node == NULL) 393 if (node == NULL)
395 return NULL; 394 return NULL;
396 395
@@ -667,7 +666,7 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root,
667EXPORT_SYMBOL(radix_tree_next_hole); 666EXPORT_SYMBOL(radix_tree_next_hole);
668 667
669static unsigned int 668static unsigned int
670__lookup(struct radix_tree_node *slot, void **results, unsigned long index, 669__lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
671 unsigned int max_items, unsigned long *next_index) 670 unsigned int max_items, unsigned long *next_index)
672{ 671{
673 unsigned int nr_found = 0; 672 unsigned int nr_found = 0;
@@ -701,11 +700,9 @@ __lookup(struct radix_tree_node *slot, void **results, unsigned long index,
701 700
702 /* Bottom level: grab some items */ 701 /* Bottom level: grab some items */
703 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 702 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
704 struct radix_tree_node *node;
705 index++; 703 index++;
706 node = slot->slots[i]; 704 if (slot->slots[i]) {
707 if (node) { 705 results[nr_found++] = &(slot->slots[i]);
708 results[nr_found++] = rcu_dereference(node);
709 if (nr_found == max_items) 706 if (nr_found == max_items)
710 goto out; 707 goto out;
711 } 708 }
@@ -759,13 +756,22 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
759 756
760 ret = 0; 757 ret = 0;
761 while (ret < max_items) { 758 while (ret < max_items) {
762 unsigned int nr_found; 759 unsigned int nr_found, slots_found, i;
763 unsigned long next_index; /* Index of next search */ 760 unsigned long next_index; /* Index of next search */
764 761
765 if (cur_index > max_index) 762 if (cur_index > max_index)
766 break; 763 break;
767 nr_found = __lookup(node, results + ret, cur_index, 764 slots_found = __lookup(node, (void ***)results + ret, cur_index,
768 max_items - ret, &next_index); 765 max_items - ret, &next_index);
766 nr_found = 0;
767 for (i = 0; i < slots_found; i++) {
768 struct radix_tree_node *slot;
769 slot = *(((void ***)results)[ret + i]);
770 if (!slot)
771 continue;
772 results[ret + nr_found] = rcu_dereference(slot);
773 nr_found++;
774 }
769 ret += nr_found; 775 ret += nr_found;
770 if (next_index == 0) 776 if (next_index == 0)
771 break; 777 break;
@@ -776,12 +782,71 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
776} 782}
777EXPORT_SYMBOL(radix_tree_gang_lookup); 783EXPORT_SYMBOL(radix_tree_gang_lookup);
778 784
785/**
786 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
787 * @root: radix tree root
788 * @results: where the results of the lookup are placed
789 * @first_index: start the lookup from this key
790 * @max_items: place up to this many items at *results
791 *
792 * Performs an index-ascending scan of the tree for present items. Places
793 * their slots at *@results and returns the number of items which were
794 * placed at *@results.
795 *
796 * The implementation is naive.
797 *
798 * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
799 * be dereferenced with radix_tree_deref_slot, and if using only RCU
800 * protection, radix_tree_deref_slot may fail requiring a retry.
801 */
802unsigned int
803radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
804 unsigned long first_index, unsigned int max_items)
805{
806 unsigned long max_index;
807 struct radix_tree_node *node;
808 unsigned long cur_index = first_index;
809 unsigned int ret;
810
811 node = rcu_dereference(root->rnode);
812 if (!node)
813 return 0;
814
815 if (!radix_tree_is_indirect_ptr(node)) {
816 if (first_index > 0)
817 return 0;
818 results[0] = (void **)&root->rnode;
819 return 1;
820 }
821 node = radix_tree_indirect_to_ptr(node);
822
823 max_index = radix_tree_maxindex(node->height);
824
825 ret = 0;
826 while (ret < max_items) {
827 unsigned int slots_found;
828 unsigned long next_index; /* Index of next search */
829
830 if (cur_index > max_index)
831 break;
832 slots_found = __lookup(node, results + ret, cur_index,
833 max_items - ret, &next_index);
834 ret += slots_found;
835 if (next_index == 0)
836 break;
837 cur_index = next_index;
838 }
839
840 return ret;
841}
842EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
843
779/* 844/*
780 * FIXME: the two tag_get()s here should use find_next_bit() instead of 845 * FIXME: the two tag_get()s here should use find_next_bit() instead of
781 * open-coding the search. 846 * open-coding the search.
782 */ 847 */
783static unsigned int 848static unsigned int
784__lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index, 849__lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
785 unsigned int max_items, unsigned long *next_index, unsigned int tag) 850 unsigned int max_items, unsigned long *next_index, unsigned int tag)
786{ 851{
787 unsigned int nr_found = 0; 852 unsigned int nr_found = 0;
@@ -811,11 +876,9 @@ __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index,
811 unsigned long j = index & RADIX_TREE_MAP_MASK; 876 unsigned long j = index & RADIX_TREE_MAP_MASK;
812 877
813 for ( ; j < RADIX_TREE_MAP_SIZE; j++) { 878 for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
814 struct radix_tree_node *node;
815 index++; 879 index++;
816 if (!tag_get(slot, tag, j)) 880 if (!tag_get(slot, tag, j))
817 continue; 881 continue;
818 node = slot->slots[j];
819 /* 882 /*
820 * Even though the tag was found set, we need to 883 * Even though the tag was found set, we need to
821 * recheck that we have a non-NULL node, because 884 * recheck that we have a non-NULL node, because
@@ -826,9 +889,8 @@ __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index,
826 * lookup ->slots[x] without a lock (ie. can't 889 * lookup ->slots[x] without a lock (ie. can't
827 * rely on its value remaining the same). 890 * rely on its value remaining the same).
828 */ 891 */
829 if (node) { 892 if (slot->slots[j]) {
830 node = rcu_dereference(node); 893 results[nr_found++] = &(slot->slots[j]);
831 results[nr_found++] = node;
832 if (nr_found == max_items) 894 if (nr_found == max_items)
833 goto out; 895 goto out;
834 } 896 }
@@ -887,13 +949,22 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
887 949
888 ret = 0; 950 ret = 0;
889 while (ret < max_items) { 951 while (ret < max_items) {
890 unsigned int nr_found; 952 unsigned int nr_found, slots_found, i;
891 unsigned long next_index; /* Index of next search */ 953 unsigned long next_index; /* Index of next search */
892 954
893 if (cur_index > max_index) 955 if (cur_index > max_index)
894 break; 956 break;
895 nr_found = __lookup_tag(node, results + ret, cur_index, 957 slots_found = __lookup_tag(node, (void ***)results + ret,
896 max_items - ret, &next_index, tag); 958 cur_index, max_items - ret, &next_index, tag);
959 nr_found = 0;
960 for (i = 0; i < slots_found; i++) {
961 struct radix_tree_node *slot;
962 slot = *(((void ***)results)[ret + i]);
963 if (!slot)
964 continue;
965 results[ret + nr_found] = rcu_dereference(slot);
966 nr_found++;
967 }
897 ret += nr_found; 968 ret += nr_found;
898 if (next_index == 0) 969 if (next_index == 0)
899 break; 970 break;
@@ -905,6 +976,67 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
905EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 976EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
906 977
907/** 978/**
979 * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
980 * radix tree based on a tag
981 * @root: radix tree root
982 * @results: where the results of the lookup are placed
983 * @first_index: start the lookup from this key
984 * @max_items: place up to this many items at *results
985 * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
986 *
987 * Performs an index-ascending scan of the tree for present items which
988 * have the tag indexed by @tag set. Places the slots at *@results and
989 * returns the number of slots which were placed at *@results.
990 */
991unsigned int
992radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
993 unsigned long first_index, unsigned int max_items,
994 unsigned int tag)
995{
996 struct radix_tree_node *node;
997 unsigned long max_index;
998 unsigned long cur_index = first_index;
999 unsigned int ret;
1000
1001 /* check the root's tag bit */
1002 if (!root_tag_get(root, tag))
1003 return 0;
1004
1005 node = rcu_dereference(root->rnode);
1006 if (!node)
1007 return 0;
1008
1009 if (!radix_tree_is_indirect_ptr(node)) {
1010 if (first_index > 0)
1011 return 0;
1012 results[0] = (void **)&root->rnode;
1013 return 1;
1014 }
1015 node = radix_tree_indirect_to_ptr(node);
1016
1017 max_index = radix_tree_maxindex(node->height);
1018
1019 ret = 0;
1020 while (ret < max_items) {
1021 unsigned int slots_found;
1022 unsigned long next_index; /* Index of next search */
1023
1024 if (cur_index > max_index)
1025 break;
1026 slots_found = __lookup_tag(node, results + ret,
1027 cur_index, max_items - ret, &next_index, tag);
1028 ret += slots_found;
1029 if (next_index == 0)
1030 break;
1031 cur_index = next_index;
1032 }
1033
1034 return ret;
1035}
1036EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
1037
1038
1039/**
908 * radix_tree_shrink - shrink height of a radix tree to minimal 1040 * radix_tree_shrink - shrink height of a radix tree to minimal
909 * @root radix tree root 1041 * @root radix tree root
910 */ 1042 */