diff options
author | Nick Piggin <npiggin@suse.de> | 2008-07-25 22:45:29 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-26 15:00:06 -0400 |
commit | 47feff2c8eefe85099f87c43d3096855f0085ca0 (patch) | |
tree | a3a6d005f202d1a37bb406c5623a9d09a447b0f0 | |
parent | 30002ed2e41830ec03ec3e577ad83ac6b188f96e (diff) |
radix-tree: add gang_lookup_slot, gang_lookup_slot_tag
Introduce gang_lookup_slot() and gang_lookup_slot_tag() functions, which
are used by lockless pagecache.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/radix-tree.h | 12 | ||||
-rw-r--r-- | lib/radix-tree.c | 178 |
2 files changed, 166 insertions, 24 deletions
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index b8ce2b444bb5..a916c6660dfa 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h | |||
@@ -99,12 +99,15 @@ do { \ | |||
99 | * | 99 | * |
100 | * The notable exceptions to this rule are the following functions: | 100 | * The notable exceptions to this rule are the following functions: |
101 | * radix_tree_lookup | 101 | * radix_tree_lookup |
102 | * radix_tree_lookup_slot | ||
102 | * radix_tree_tag_get | 103 | * radix_tree_tag_get |
103 | * radix_tree_gang_lookup | 104 | * radix_tree_gang_lookup |
105 | * radix_tree_gang_lookup_slot | ||
104 | * radix_tree_gang_lookup_tag | 106 | * radix_tree_gang_lookup_tag |
107 | * radix_tree_gang_lookup_tag_slot | ||
105 | * radix_tree_tagged | 108 | * radix_tree_tagged |
106 | * | 109 | * |
107 | * The first 4 functions are able to be called locklessly, using RCU. The | 110 | * The first 7 functions are able to be called locklessly, using RCU. The |
108 | * caller must ensure calls to these functions are made within rcu_read_lock() | 111 | * caller must ensure calls to these functions are made within rcu_read_lock() |
109 | * regions. Other readers (lock-free or otherwise) and modifications may be | 112 | * regions. Other readers (lock-free or otherwise) and modifications may be |
110 | * running concurrently. | 113 | * running concurrently. |
@@ -159,6 +162,9 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long); | |||
159 | unsigned int | 162 | unsigned int |
160 | radix_tree_gang_lookup(struct radix_tree_root *root, void **results, | 163 | radix_tree_gang_lookup(struct radix_tree_root *root, void **results, |
161 | unsigned long first_index, unsigned int max_items); | 164 | unsigned long first_index, unsigned int max_items); |
165 | unsigned int | ||
166 | radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, | ||
167 | unsigned long first_index, unsigned int max_items); | ||
162 | unsigned long radix_tree_next_hole(struct radix_tree_root *root, | 168 | unsigned long radix_tree_next_hole(struct radix_tree_root *root, |
163 | unsigned long index, unsigned long max_scan); | 169 | unsigned long index, unsigned long max_scan); |
164 | int radix_tree_preload(gfp_t gfp_mask); | 170 | int radix_tree_preload(gfp_t gfp_mask); |
@@ -173,6 +179,10 @@ unsigned int | |||
173 | radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, | 179 | radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, |
174 | unsigned long first_index, unsigned int max_items, | 180 | unsigned long first_index, unsigned int max_items, |
175 | unsigned int tag); | 181 | unsigned int tag); |
182 | unsigned int | ||
183 | radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, | ||
184 | unsigned long first_index, unsigned int max_items, | ||
185 | unsigned int tag); | ||
176 | int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); | 186 | int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); |
177 | 187 | ||
178 | static inline void radix_tree_preload_end(void) | 188 | static inline void radix_tree_preload_end(void) |
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 56ec21a7f73d..9c4f1ffa2864 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
@@ -359,18 +359,17 @@ EXPORT_SYMBOL(radix_tree_insert); | |||
359 | * Returns: the slot corresponding to the position @index in the | 359 | * Returns: the slot corresponding to the position @index in the |
360 | * radix tree @root. This is useful for update-if-exists operations. | 360 | * radix tree @root. This is useful for update-if-exists operations. |
361 | * | 361 | * |
362 | * This function cannot be called under rcu_read_lock, it must be | 362 | * This function can be called under rcu_read_lock iff the slot is not |
363 | * excluded from writers, as must the returned slot for subsequent | 363 | * modified by radix_tree_replace_slot, otherwise it must be called |
364 | * use by radix_tree_deref_slot() and radix_tree_replace slot. | 364 | * exclusive from other writers. Any dereference of the slot must be done |
365 | * Caller must hold tree write locked across slot lookup and | 365 | * using radix_tree_deref_slot. |
366 | * replace. | ||
367 | */ | 366 | */ |
368 | void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) | 367 | void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) |
369 | { | 368 | { |
370 | unsigned int height, shift; | 369 | unsigned int height, shift; |
371 | struct radix_tree_node *node, **slot; | 370 | struct radix_tree_node *node, **slot; |
372 | 371 | ||
373 | node = root->rnode; | 372 | node = rcu_dereference(root->rnode); |
374 | if (node == NULL) | 373 | if (node == NULL) |
375 | return NULL; | 374 | return NULL; |
376 | 375 | ||
@@ -390,7 +389,7 @@ void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) | |||
390 | do { | 389 | do { |
391 | slot = (struct radix_tree_node **) | 390 | slot = (struct radix_tree_node **) |
392 | (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); | 391 | (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); |
393 | node = *slot; | 392 | node = rcu_dereference(*slot); |
394 | if (node == NULL) | 393 | if (node == NULL) |
395 | return NULL; | 394 | return NULL; |
396 | 395 | ||
@@ -667,7 +666,7 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root, | |||
667 | EXPORT_SYMBOL(radix_tree_next_hole); | 666 | EXPORT_SYMBOL(radix_tree_next_hole); |
668 | 667 | ||
669 | static unsigned int | 668 | static unsigned int |
670 | __lookup(struct radix_tree_node *slot, void **results, unsigned long index, | 669 | __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, |
671 | unsigned int max_items, unsigned long *next_index) | 670 | unsigned int max_items, unsigned long *next_index) |
672 | { | 671 | { |
673 | unsigned int nr_found = 0; | 672 | unsigned int nr_found = 0; |
@@ -701,11 +700,9 @@ __lookup(struct radix_tree_node *slot, void **results, unsigned long index, | |||
701 | 700 | ||
702 | /* Bottom level: grab some items */ | 701 | /* Bottom level: grab some items */ |
703 | for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { | 702 | for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { |
704 | struct radix_tree_node *node; | ||
705 | index++; | 703 | index++; |
706 | node = slot->slots[i]; | 704 | if (slot->slots[i]) { |
707 | if (node) { | 705 | results[nr_found++] = &(slot->slots[i]); |
708 | results[nr_found++] = rcu_dereference(node); | ||
709 | if (nr_found == max_items) | 706 | if (nr_found == max_items) |
710 | goto out; | 707 | goto out; |
711 | } | 708 | } |
@@ -759,13 +756,22 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, | |||
759 | 756 | ||
760 | ret = 0; | 757 | ret = 0; |
761 | while (ret < max_items) { | 758 | while (ret < max_items) { |
762 | unsigned int nr_found; | 759 | unsigned int nr_found, slots_found, i; |
763 | unsigned long next_index; /* Index of next search */ | 760 | unsigned long next_index; /* Index of next search */ |
764 | 761 | ||
765 | if (cur_index > max_index) | 762 | if (cur_index > max_index) |
766 | break; | 763 | break; |
767 | nr_found = __lookup(node, results + ret, cur_index, | 764 | slots_found = __lookup(node, (void ***)results + ret, cur_index, |
768 | max_items - ret, &next_index); | 765 | max_items - ret, &next_index); |
766 | nr_found = 0; | ||
767 | for (i = 0; i < slots_found; i++) { | ||
768 | struct radix_tree_node *slot; | ||
769 | slot = *(((void ***)results)[ret + i]); | ||
770 | if (!slot) | ||
771 | continue; | ||
772 | results[ret + nr_found] = rcu_dereference(slot); | ||
773 | nr_found++; | ||
774 | } | ||
769 | ret += nr_found; | 775 | ret += nr_found; |
770 | if (next_index == 0) | 776 | if (next_index == 0) |
771 | break; | 777 | break; |
@@ -776,12 +782,71 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, | |||
776 | } | 782 | } |
777 | EXPORT_SYMBOL(radix_tree_gang_lookup); | 783 | EXPORT_SYMBOL(radix_tree_gang_lookup); |
778 | 784 | ||
785 | /** | ||
786 | * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree | ||
787 | * @root: radix tree root | ||
788 | * @results: where the results of the lookup are placed | ||
789 | * @first_index: start the lookup from this key | ||
790 | * @max_items: place up to this many items at *results | ||
791 | * | ||
792 | * Performs an index-ascending scan of the tree for present items. Places | ||
793 | * their slots at *@results and returns the number of items which were | ||
794 | * placed at *@results. | ||
795 | * | ||
796 | * The implementation is naive. | ||
797 | * | ||
798 | * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must | ||
799 | * be dereferenced with radix_tree_deref_slot, and if using only RCU | ||
800 | * protection, radix_tree_deref_slot may fail requiring a retry. | ||
801 | */ | ||
802 | unsigned int | ||
803 | radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, | ||
804 | unsigned long first_index, unsigned int max_items) | ||
805 | { | ||
806 | unsigned long max_index; | ||
807 | struct radix_tree_node *node; | ||
808 | unsigned long cur_index = first_index; | ||
809 | unsigned int ret; | ||
810 | |||
811 | node = rcu_dereference(root->rnode); | ||
812 | if (!node) | ||
813 | return 0; | ||
814 | |||
815 | if (!radix_tree_is_indirect_ptr(node)) { | ||
816 | if (first_index > 0) | ||
817 | return 0; | ||
818 | results[0] = (void **)&root->rnode; | ||
819 | return 1; | ||
820 | } | ||
821 | node = radix_tree_indirect_to_ptr(node); | ||
822 | |||
823 | max_index = radix_tree_maxindex(node->height); | ||
824 | |||
825 | ret = 0; | ||
826 | while (ret < max_items) { | ||
827 | unsigned int slots_found; | ||
828 | unsigned long next_index; /* Index of next search */ | ||
829 | |||
830 | if (cur_index > max_index) | ||
831 | break; | ||
832 | slots_found = __lookup(node, results + ret, cur_index, | ||
833 | max_items - ret, &next_index); | ||
834 | ret += slots_found; | ||
835 | if (next_index == 0) | ||
836 | break; | ||
837 | cur_index = next_index; | ||
838 | } | ||
839 | |||
840 | return ret; | ||
841 | } | ||
842 | EXPORT_SYMBOL(radix_tree_gang_lookup_slot); | ||
843 | |||
779 | /* | 844 | /* |
780 | * FIXME: the two tag_get()s here should use find_next_bit() instead of | 845 | * FIXME: the two tag_get()s here should use find_next_bit() instead of |
781 | * open-coding the search. | 846 | * open-coding the search. |
782 | */ | 847 | */ |
783 | static unsigned int | 848 | static unsigned int |
784 | __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index, | 849 | __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index, |
785 | unsigned int max_items, unsigned long *next_index, unsigned int tag) | 850 | unsigned int max_items, unsigned long *next_index, unsigned int tag) |
786 | { | 851 | { |
787 | unsigned int nr_found = 0; | 852 | unsigned int nr_found = 0; |
@@ -811,11 +876,9 @@ __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index, | |||
811 | unsigned long j = index & RADIX_TREE_MAP_MASK; | 876 | unsigned long j = index & RADIX_TREE_MAP_MASK; |
812 | 877 | ||
813 | for ( ; j < RADIX_TREE_MAP_SIZE; j++) { | 878 | for ( ; j < RADIX_TREE_MAP_SIZE; j++) { |
814 | struct radix_tree_node *node; | ||
815 | index++; | 879 | index++; |
816 | if (!tag_get(slot, tag, j)) | 880 | if (!tag_get(slot, tag, j)) |
817 | continue; | 881 | continue; |
818 | node = slot->slots[j]; | ||
819 | /* | 882 | /* |
820 | * Even though the tag was found set, we need to | 883 | * Even though the tag was found set, we need to |
821 | * recheck that we have a non-NULL node, because | 884 | * recheck that we have a non-NULL node, because |
@@ -826,9 +889,8 @@ __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index, | |||
826 | * lookup ->slots[x] without a lock (ie. can't | 889 | * lookup ->slots[x] without a lock (ie. can't |
827 | * rely on its value remaining the same). | 890 | * rely on its value remaining the same). |
828 | */ | 891 | */ |
829 | if (node) { | 892 | if (slot->slots[j]) { |
830 | node = rcu_dereference(node); | 893 | results[nr_found++] = &(slot->slots[j]); |
831 | results[nr_found++] = node; | ||
832 | if (nr_found == max_items) | 894 | if (nr_found == max_items) |
833 | goto out; | 895 | goto out; |
834 | } | 896 | } |
@@ -887,13 +949,22 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, | |||
887 | 949 | ||
888 | ret = 0; | 950 | ret = 0; |
889 | while (ret < max_items) { | 951 | while (ret < max_items) { |
890 | unsigned int nr_found; | 952 | unsigned int nr_found, slots_found, i; |
891 | unsigned long next_index; /* Index of next search */ | 953 | unsigned long next_index; /* Index of next search */ |
892 | 954 | ||
893 | if (cur_index > max_index) | 955 | if (cur_index > max_index) |
894 | break; | 956 | break; |
895 | nr_found = __lookup_tag(node, results + ret, cur_index, | 957 | slots_found = __lookup_tag(node, (void ***)results + ret, |
896 | max_items - ret, &next_index, tag); | 958 | cur_index, max_items - ret, &next_index, tag); |
959 | nr_found = 0; | ||
960 | for (i = 0; i < slots_found; i++) { | ||
961 | struct radix_tree_node *slot; | ||
962 | slot = *(((void ***)results)[ret + i]); | ||
963 | if (!slot) | ||
964 | continue; | ||
965 | results[ret + nr_found] = rcu_dereference(slot); | ||
966 | nr_found++; | ||
967 | } | ||
897 | ret += nr_found; | 968 | ret += nr_found; |
898 | if (next_index == 0) | 969 | if (next_index == 0) |
899 | break; | 970 | break; |
@@ -905,6 +976,67 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, | |||
905 | EXPORT_SYMBOL(radix_tree_gang_lookup_tag); | 976 | EXPORT_SYMBOL(radix_tree_gang_lookup_tag); |
906 | 977 | ||
907 | /** | 978 | /** |
979 | * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a | ||
980 | * radix tree based on a tag | ||
981 | * @root: radix tree root | ||
982 | * @results: where the results of the lookup are placed | ||
983 | * @first_index: start the lookup from this key | ||
984 | * @max_items: place up to this many items at *results | ||
985 | * @tag: the tag index (< RADIX_TREE_MAX_TAGS) | ||
986 | * | ||
987 | * Performs an index-ascending scan of the tree for present items which | ||
988 | * have the tag indexed by @tag set. Places the slots at *@results and | ||
989 | * returns the number of slots which were placed at *@results. | ||
990 | */ | ||
991 | unsigned int | ||
992 | radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, | ||
993 | unsigned long first_index, unsigned int max_items, | ||
994 | unsigned int tag) | ||
995 | { | ||
996 | struct radix_tree_node *node; | ||
997 | unsigned long max_index; | ||
998 | unsigned long cur_index = first_index; | ||
999 | unsigned int ret; | ||
1000 | |||
1001 | /* check the root's tag bit */ | ||
1002 | if (!root_tag_get(root, tag)) | ||
1003 | return 0; | ||
1004 | |||
1005 | node = rcu_dereference(root->rnode); | ||
1006 | if (!node) | ||
1007 | return 0; | ||
1008 | |||
1009 | if (!radix_tree_is_indirect_ptr(node)) { | ||
1010 | if (first_index > 0) | ||
1011 | return 0; | ||
1012 | results[0] = (void **)&root->rnode; | ||
1013 | return 1; | ||
1014 | } | ||
1015 | node = radix_tree_indirect_to_ptr(node); | ||
1016 | |||
1017 | max_index = radix_tree_maxindex(node->height); | ||
1018 | |||
1019 | ret = 0; | ||
1020 | while (ret < max_items) { | ||
1021 | unsigned int slots_found; | ||
1022 | unsigned long next_index; /* Index of next search */ | ||
1023 | |||
1024 | if (cur_index > max_index) | ||
1025 | break; | ||
1026 | slots_found = __lookup_tag(node, results + ret, | ||
1027 | cur_index, max_items - ret, &next_index, tag); | ||
1028 | ret += slots_found; | ||
1029 | if (next_index == 0) | ||
1030 | break; | ||
1031 | cur_index = next_index; | ||
1032 | } | ||
1033 | |||
1034 | return ret; | ||
1035 | } | ||
1036 | EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); | ||
1037 | |||
1038 | |||
1039 | /** | ||
908 | * radix_tree_shrink - shrink height of a radix tree to minimal | 1040 | * radix_tree_shrink - shrink height of a radix tree to minimal |
909 | * @root radix tree root | 1041 | * @root radix tree root |
910 | */ | 1042 | */ |