diff options
author | Matthew Wilcox <willy@linux.intel.com> | 2016-12-14 18:09:01 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-14 19:04:10 -0500 |
commit | e157b555945fb16ddc6cce605a1eb6b4135ea5f1 (patch) | |
tree | f0b53b641059c010cfd38a32ae7f4f3b27b04303 | |
parent | 175542f575723e43f897ddb09d0011c13f7cf0ec (diff) |
radix-tree: add radix_tree_split
This new function splits a larger multiorder entry into smaller entries
(potentially multi-order entries). These entries are initialised to
RADIX_TREE_RETRY to ensure that RCU walkers who see this state aren't
confused. The caller should then call radix_tree_for_each_slot() and
radix_tree_replace_slot() in order to turn these retry entries into the
intended new entries. Tags are replicated from the original multiorder
entry into each new entry.
Link: http://lkml.kernel.org/r/1480369871-5271-59-git-send-email-mawilcox@linuxonhyperv.com
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/radix-tree.h | 12 | ||||
-rw-r--r-- | lib/radix-tree.c | 142 | ||||
-rw-r--r-- | tools/testing/radix-tree/multiorder.c | 64 |
3 files changed, 214 insertions, 4 deletions
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 935293a24f7d..1f4b56120de8 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h | |||
@@ -80,6 +80,14 @@ static inline bool radix_tree_is_internal_node(void *ptr) | |||
80 | #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ | 80 | #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ |
81 | RADIX_TREE_MAP_SHIFT)) | 81 | RADIX_TREE_MAP_SHIFT)) |
82 | 82 | ||
83 | /* | ||
84 | * @count is the count of every non-NULL element in the ->slots array | ||
85 | * whether that is an exceptional entry, a retry entry, a user pointer, | ||
86 | * a sibling entry or a pointer to the next level of the tree. | ||
87 | * @exceptional is the count of every element in ->slots which is | ||
88 | * either radix_tree_exceptional_entry() or is a sibling entry for an | ||
89 | * exceptional entry. | ||
90 | */ | ||
83 | struct radix_tree_node { | 91 | struct radix_tree_node { |
84 | unsigned char shift; /* Bits remaining in each slot */ | 92 | unsigned char shift; /* Bits remaining in each slot */ |
85 | unsigned char offset; /* Slot offset in parent */ | 93 | unsigned char offset; /* Slot offset in parent */ |
@@ -293,6 +301,8 @@ void __radix_tree_replace(struct radix_tree_root *root, | |||
293 | struct radix_tree_node *node, | 301 | struct radix_tree_node *node, |
294 | void **slot, void *item, | 302 | void **slot, void *item, |
295 | radix_tree_update_node_t update_node, void *private); | 303 | radix_tree_update_node_t update_node, void *private); |
304 | void radix_tree_iter_replace(struct radix_tree_root *, | ||
305 | const struct radix_tree_iter *, void **slot, void *item); | ||
296 | void radix_tree_replace_slot(struct radix_tree_root *root, | 306 | void radix_tree_replace_slot(struct radix_tree_root *root, |
297 | void **slot, void *item); | 307 | void **slot, void *item); |
298 | void __radix_tree_delete_node(struct radix_tree_root *root, | 308 | void __radix_tree_delete_node(struct radix_tree_root *root, |
@@ -335,6 +345,8 @@ static inline void radix_tree_preload_end(void) | |||
335 | preempt_enable(); | 345 | preempt_enable(); |
336 | } | 346 | } |
337 | 347 | ||
348 | int radix_tree_split(struct radix_tree_root *, unsigned long index, | ||
349 | unsigned new_order); | ||
338 | int radix_tree_join(struct radix_tree_root *, unsigned long index, | 350 | int radix_tree_join(struct radix_tree_root *, unsigned long index, |
339 | unsigned new_order, void *); | 351 | unsigned new_order, void *); |
340 | 352 | ||
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 962cfb3a7671..ade2ed3f5190 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
@@ -22,6 +22,7 @@ | |||
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/cpu.h> | ||
25 | #include <linux/errno.h> | 26 | #include <linux/errno.h> |
26 | #include <linux/init.h> | 27 | #include <linux/init.h> |
27 | #include <linux/kernel.h> | 28 | #include <linux/kernel.h> |
@@ -758,7 +759,10 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot, | |||
758 | unsigned i, n, tag, offset, tags = 0; | 759 | unsigned i, n, tag, offset, tags = 0; |
759 | 760 | ||
760 | if (node) { | 761 | if (node) { |
761 | n = 1 << (order - node->shift); | 762 | if (order > node->shift) |
763 | n = 1 << (order - node->shift); | ||
764 | else | ||
765 | n = 1; | ||
762 | offset = get_slot_offset(node, slot); | 766 | offset = get_slot_offset(node, slot); |
763 | } else { | 767 | } else { |
764 | n = 1; | 768 | n = 1; |
@@ -797,7 +801,8 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot, | |||
797 | tag_set(node, tag, offset); | 801 | tag_set(node, tag, offset); |
798 | } | 802 | } |
799 | if (radix_tree_is_internal_node(old) && | 803 | if (radix_tree_is_internal_node(old) && |
800 | !is_sibling_entry(node, old)) | 804 | !is_sibling_entry(node, old) && |
805 | (old != RADIX_TREE_RETRY)) | ||
801 | radix_tree_free_nodes(old); | 806 | radix_tree_free_nodes(old); |
802 | if (radix_tree_exceptional_entry(old)) | 807 | if (radix_tree_exceptional_entry(old)) |
803 | node->exceptional--; | 808 | node->exceptional--; |
@@ -1021,7 +1026,8 @@ void __radix_tree_replace(struct radix_tree_root *root, | |||
1021 | * NOTE: This cannot be used to switch between non-entries (empty slots), | 1026 | * NOTE: This cannot be used to switch between non-entries (empty slots), |
1022 | * regular entries, and exceptional entries, as that requires accounting | 1027 | * regular entries, and exceptional entries, as that requires accounting |
1023 | * inside the radix tree node. When switching from one type of entry or | 1028 | * inside the radix tree node. When switching from one type of entry or |
1024 | * deleting, use __radix_tree_lookup() and __radix_tree_replace(). | 1029 | * deleting, use __radix_tree_lookup() and __radix_tree_replace() or |
1030 | * radix_tree_iter_replace(). | ||
1025 | */ | 1031 | */ |
1026 | void radix_tree_replace_slot(struct radix_tree_root *root, | 1032 | void radix_tree_replace_slot(struct radix_tree_root *root, |
1027 | void **slot, void *item) | 1033 | void **slot, void *item) |
@@ -1029,6 +1035,21 @@ void radix_tree_replace_slot(struct radix_tree_root *root, | |||
1029 | replace_slot(root, NULL, slot, item, true); | 1035 | replace_slot(root, NULL, slot, item, true); |
1030 | } | 1036 | } |
1031 | 1037 | ||
1038 | /** | ||
1039 | * radix_tree_iter_replace - replace item in a slot | ||
1040 | * @root: radix tree root | ||
1041 | * @slot: pointer to slot | ||
1042 | * @item: new item to store in the slot. | ||
1043 | * | ||
1044 | * For use with radix_tree_split() and radix_tree_for_each_slot(). | ||
1045 | * Caller must hold tree write locked across split and replacement. | ||
1046 | */ | ||
1047 | void radix_tree_iter_replace(struct radix_tree_root *root, | ||
1048 | const struct radix_tree_iter *iter, void **slot, void *item) | ||
1049 | { | ||
1050 | __radix_tree_replace(root, iter->node, slot, item, NULL, NULL); | ||
1051 | } | ||
1052 | |||
1032 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | 1053 | #ifdef CONFIG_RADIX_TREE_MULTIORDER |
1033 | /** | 1054 | /** |
1034 | * radix_tree_join - replace multiple entries with one multiorder entry | 1055 | * radix_tree_join - replace multiple entries with one multiorder entry |
@@ -1061,6 +1082,117 @@ int radix_tree_join(struct radix_tree_root *root, unsigned long index, | |||
1061 | 1082 | ||
1062 | return error; | 1083 | return error; |
1063 | } | 1084 | } |
1085 | |||
1086 | /** | ||
1087 | * radix_tree_split - Split an entry into smaller entries | ||
1088 | * @root: radix tree root | ||
1089 | * @index: An index within the large entry | ||
1090 | * @order: Order of new entries | ||
1091 | * | ||
1092 | * Call this function as the first step in replacing a multiorder entry | ||
1093 | * with several entries of lower order. After this function returns, | ||
1094 | * loop over the relevant portion of the tree using radix_tree_for_each_slot() | ||
1095 | * and call radix_tree_iter_replace() to set up each new entry. | ||
1096 | * | ||
1097 | * The tags from this entry are replicated to all the new entries. | ||
1098 | * | ||
1099 | * The radix tree should be locked against modification during the entire | ||
1100 | * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which | ||
1101 | * should prompt RCU walkers to restart the lookup from the root. | ||
1102 | */ | ||
1103 | int radix_tree_split(struct radix_tree_root *root, unsigned long index, | ||
1104 | unsigned order) | ||
1105 | { | ||
1106 | struct radix_tree_node *parent, *node, *child; | ||
1107 | void **slot; | ||
1108 | unsigned int offset, end; | ||
1109 | unsigned n, tag, tags = 0; | ||
1110 | |||
1111 | if (!__radix_tree_lookup(root, index, &parent, &slot)) | ||
1112 | return -ENOENT; | ||
1113 | if (!parent) | ||
1114 | return -ENOENT; | ||
1115 | |||
1116 | offset = get_slot_offset(parent, slot); | ||
1117 | |||
1118 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
1119 | if (tag_get(parent, tag, offset)) | ||
1120 | tags |= 1 << tag; | ||
1121 | |||
1122 | for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) { | ||
1123 | if (!is_sibling_entry(parent, parent->slots[end])) | ||
1124 | break; | ||
1125 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
1126 | if (tags & (1 << tag)) | ||
1127 | tag_set(parent, tag, end); | ||
1128 | /* rcu_assign_pointer ensures tags are set before RETRY */ | ||
1129 | rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY); | ||
1130 | } | ||
1131 | rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY); | ||
1132 | parent->exceptional -= (end - offset); | ||
1133 | |||
1134 | if (order == parent->shift) | ||
1135 | return 0; | ||
1136 | if (order > parent->shift) { | ||
1137 | while (offset < end) | ||
1138 | offset += insert_entries(parent, &parent->slots[offset], | ||
1139 | RADIX_TREE_RETRY, order, true); | ||
1140 | return 0; | ||
1141 | } | ||
1142 | |||
1143 | node = parent; | ||
1144 | |||
1145 | for (;;) { | ||
1146 | if (node->shift > order) { | ||
1147 | child = radix_tree_node_alloc(root); | ||
1148 | if (!child) | ||
1149 | goto nomem; | ||
1150 | child->shift = node->shift - RADIX_TREE_MAP_SHIFT; | ||
1151 | child->offset = offset; | ||
1152 | child->count = 0; | ||
1153 | child->parent = node; | ||
1154 | if (node != parent) { | ||
1155 | node->count++; | ||
1156 | node->slots[offset] = node_to_entry(child); | ||
1157 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
1158 | if (tags & (1 << tag)) | ||
1159 | tag_set(node, tag, offset); | ||
1160 | } | ||
1161 | |||
1162 | node = child; | ||
1163 | offset = 0; | ||
1164 | continue; | ||
1165 | } | ||
1166 | |||
1167 | n = insert_entries(node, &node->slots[offset], | ||
1168 | RADIX_TREE_RETRY, order, false); | ||
1169 | BUG_ON(n > RADIX_TREE_MAP_SIZE); | ||
1170 | |||
1171 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
1172 | if (tags & (1 << tag)) | ||
1173 | tag_set(node, tag, offset); | ||
1174 | offset += n; | ||
1175 | |||
1176 | while (offset == RADIX_TREE_MAP_SIZE) { | ||
1177 | if (node == parent) | ||
1178 | break; | ||
1179 | offset = node->offset; | ||
1180 | child = node; | ||
1181 | node = node->parent; | ||
1182 | rcu_assign_pointer(node->slots[offset], | ||
1183 | node_to_entry(child)); | ||
1184 | offset++; | ||
1185 | } | ||
1186 | if ((node == parent) && (offset == end)) | ||
1187 | return 0; | ||
1188 | } | ||
1189 | |||
1190 | nomem: | ||
1191 | /* Shouldn't happen; did user forget to preload? */ | ||
1192 | /* TODO: free all the allocated nodes */ | ||
1193 | WARN_ON(1); | ||
1194 | return -ENOMEM; | ||
1195 | } | ||
1064 | #endif | 1196 | #endif |
1065 | 1197 | ||
1066 | /** | 1198 | /** |
@@ -1441,8 +1573,10 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, | |||
1441 | child = rcu_dereference_raw(node->slots[offset]); | 1573 | child = rcu_dereference_raw(node->slots[offset]); |
1442 | } | 1574 | } |
1443 | 1575 | ||
1444 | if ((child == NULL) || (child == RADIX_TREE_RETRY)) | 1576 | if (!child) |
1445 | goto restart; | 1577 | goto restart; |
1578 | if (child == RADIX_TREE_RETRY) | ||
1579 | break; | ||
1446 | } while (radix_tree_is_internal_node(child)); | 1580 | } while (radix_tree_is_internal_node(child)); |
1447 | 1581 | ||
1448 | /* Update the iterator state */ | 1582 | /* Update the iterator state */ |
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index c9f656cf5f52..fa6effe997a3 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c | |||
@@ -389,6 +389,69 @@ static void multiorder_join(void) | |||
389 | } | 389 | } |
390 | } | 390 | } |
391 | 391 | ||
392 | static void __multiorder_split(int old_order, int new_order) | ||
393 | { | ||
394 | RADIX_TREE(tree, GFP_KERNEL); | ||
395 | void **slot; | ||
396 | struct radix_tree_iter iter; | ||
397 | struct radix_tree_node *node; | ||
398 | void *item; | ||
399 | |||
400 | item_insert_order(&tree, 0, old_order); | ||
401 | radix_tree_tag_set(&tree, 0, 2); | ||
402 | radix_tree_split(&tree, 0, new_order); | ||
403 | radix_tree_for_each_slot(slot, &tree, &iter, 0) { | ||
404 | radix_tree_iter_replace(&tree, &iter, slot, | ||
405 | item_create(iter.index, new_order)); | ||
406 | } | ||
407 | |||
408 | item_kill_tree(&tree); | ||
409 | |||
410 | __radix_tree_insert(&tree, 0, old_order, (void *)0x12); | ||
411 | |||
412 | item = __radix_tree_lookup(&tree, 0, &node, NULL); | ||
413 | assert(item == (void *)0x12); | ||
414 | assert(node->exceptional > 0); | ||
415 | |||
416 | radix_tree_split(&tree, 0, new_order); | ||
417 | radix_tree_for_each_slot(slot, &tree, &iter, 0) { | ||
418 | radix_tree_iter_replace(&tree, &iter, slot, | ||
419 | item_create(iter.index, new_order)); | ||
420 | } | ||
421 | |||
422 | item = __radix_tree_lookup(&tree, 0, &node, NULL); | ||
423 | assert(item != (void *)0x12); | ||
424 | assert(node->exceptional == 0); | ||
425 | |||
426 | item_kill_tree(&tree); | ||
427 | |||
428 | __radix_tree_insert(&tree, 0, old_order, (void *)0x12); | ||
429 | |||
430 | item = __radix_tree_lookup(&tree, 0, &node, NULL); | ||
431 | assert(item == (void *)0x12); | ||
432 | assert(node->exceptional > 0); | ||
433 | |||
434 | radix_tree_split(&tree, 0, new_order); | ||
435 | radix_tree_for_each_slot(slot, &tree, &iter, 0) { | ||
436 | radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16); | ||
437 | } | ||
438 | |||
439 | item = __radix_tree_lookup(&tree, 0, &node, NULL); | ||
440 | assert(item == (void *)0x16); | ||
441 | assert(node->exceptional > 0); | ||
442 | |||
443 | item_kill_tree(&tree); | ||
444 | } | ||
445 | |||
446 | static void multiorder_split(void) | ||
447 | { | ||
448 | int i, j; | ||
449 | |||
450 | for (i = 9; i < 19; i++) | ||
451 | for (j = 0; j < i; j++) | ||
452 | __multiorder_split(i, j); | ||
453 | } | ||
454 | |||
392 | void multiorder_checks(void) | 455 | void multiorder_checks(void) |
393 | { | 456 | { |
394 | int i; | 457 | int i; |
@@ -407,4 +470,5 @@ void multiorder_checks(void) | |||
407 | multiorder_iteration(); | 470 | multiorder_iteration(); |
408 | multiorder_tagged_iteration(); | 471 | multiorder_tagged_iteration(); |
409 | multiorder_join(); | 472 | multiorder_join(); |
473 | multiorder_split(); | ||
410 | } | 474 | } |